This commit is contained in:
van
2026-05-09 22:57:53 +08:00
parent f4697481fa
commit 30ff4077fe
14 changed files with 744 additions and 55 deletions

View File

@@ -0,0 +1,59 @@
package com.ruoyi.jarvis.util;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* 企微文本按 UTF-8 字节切分(与应用消息 / 被动回复上限一致)。
*/
public final class WeComUtf8ChunkUtil {
/** 企微文本 content 官方上限约 2048 UTF-8 字节 */
public static final int WE_COM_TEXT_MAX_UTF8_BYTES = 2048;
private WeComUtf8ChunkUtil() {
}
/**
* 按 UTF-8 字节长度切分,每段不超过 maxUtf8Bytes非 BMP 码点按整字符保留)。
*/
public static List<String> splitUtf8Chunks(String text, int maxUtf8Bytes) {
if (text == null) {
return Collections.singletonList("");
}
if (text.isEmpty()) {
return Collections.singletonList("");
}
if (maxUtf8Bytes < 1) {
throw new IllegalArgumentException("maxUtf8Bytes must be >= 1");
}
List<String> out = new ArrayList<>();
int i = 0;
final int n = text.length();
while (i < n) {
int chunkStart = i;
int usedBytes = 0;
while (i < n) {
int cp = text.codePointAt(i);
int charCount = Character.charCount(cp);
int b = new String(Character.toChars(cp)).getBytes(StandardCharsets.UTF_8).length;
if (usedBytes + b > maxUtf8Bytes) {
break;
}
usedBytes += b;
i += charCount;
}
if (i == chunkStart) {
int cp = text.codePointAt(i);
int charCount = Character.charCount(cp);
out.add(text.substring(chunkStart, chunkStart + charCount));
i = chunkStart + charCount;
} else {
out.add(text.substring(chunkStart, i));
}
}
return out;
}
}