Skip to content

Commit a1acdcb

Browse files
committed
feat: add double width char checker
1 parent ea60000 commit a1acdcb

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

tools/double-width-char.js

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
function findLengthGreaterThanOneHanChars(str) {
2+
const result = new Set();
3+
4+
// 使用 for...of 遍历,可以正确处理代理对字符(即长度为2的字符)
5+
for (const char of str) {
6+
// 判断长度(代码单元长度),大于1即是代理对字符
7+
if (char.length > 1) {
8+
// 判断是否是汉字(补充平面的范围一般在U+20000至U+2FA1F)
9+
const codePoint = char.codePointAt(0);
10+
if (
11+
(codePoint >= 0x20000 && codePoint <= 0x2A6DF) || // CJK Unified Ideographs Extension B
12+
(codePoint >= 0x2A700 && codePoint <= 0x2B73F) || // CJK Unified Ideographs Extension C
13+
(codePoint >= 0x2B740 && codePoint <= 0x2B81F) || // CJK Unified Ideographs Extension D
14+
(codePoint >= 0x2B820 && codePoint <= 0x2CEAF) || // CJK Unified Ideographs Extension E
15+
(codePoint >= 0x2CEB0 && codePoint <= 0x2EBEF) // CJK Unified Ideographs Extension F
16+
) {
17+
result.add(char);
18+
}
19+
}
20+
}
21+
22+
return Array.from(result);
23+
}
24+
25+
// 测试字符串中含有常见和补充区汉字
26+
const testStr = "汉字𠀀𠂢𠃌常见字和补充区字混合𠄀𠆢";
27+
console.log("测试字符串:", testStr.length);
28+
const found = findLengthGreaterThanOneHanChars(testStr);
29+
30+
console.log(found);

0 commit comments

Comments
 (0)