Skip to content

Commit be33439

Browse files
committed
Handle more edge-cases
Closes #58
1 parent 94ad7e1 commit be33439

File tree

2 files changed

+85
-1
lines changed

2 files changed

+85
-1
lines changed

index.js

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import emojiRegex from 'emoji-regex';
44

55
const segmenter = new Intl.Segmenter();
66

7+
const defaultIgnorableCodePointRegex = /^\p{Default_Ignorable_Code_Point}$/u;
8+
79
export default function stringWidth(string, options = {}) {
810
if (typeof string !== 'string' || string.length === 0) {
911
return 0;
@@ -33,8 +35,37 @@ export default function stringWidth(string, options = {}) {
3335
continue;
3436
}
3537

38+
// Ignore zero-width characters
39+
if (
40+
(codePoint >= 0x20_0B && codePoint <= 0x20_0F) // Zero-width space, non-joiner, joiner, left-to-right mark, right-to-left mark
41+
|| codePoint === 0xFE_FF // Zero-width no-break space
42+
) {
43+
continue;
44+
}
45+
3646
// Ignore combining characters
37-
if (codePoint >= 0x3_00 && codePoint <= 0x3_6F) {
47+
if (
48+
(codePoint >= 0x3_00 && codePoint <= 0x3_6F) // Combining diacritical marks
49+
|| (codePoint >= 0x1A_B0 && codePoint <= 0x1A_FF) // Combining diacritical marks extended
50+
|| (codePoint >= 0x1D_C0 && codePoint <= 0x1D_FF) // Combining diacritical marks supplement
51+
|| (codePoint >= 0x20_D0 && codePoint <= 0x20_FF) // Combining diacritical marks for symbols
52+
|| (codePoint >= 0xFE_20 && codePoint <= 0xFE_2F) // Combining half marks
53+
) {
54+
continue;
55+
}
56+
57+
// Ignore surrogate pairs
58+
if (codePoint >= 0xD8_00 && codePoint <= 0xDF_FF) {
59+
continue;
60+
}
61+
62+
// Ignore variation selectors
63+
if (codePoint >= 0xFE_00 && codePoint <= 0xFE_0F) {
64+
continue;
65+
}
66+
67+
// This covers some of the above cases, but we still keep them for performance reasons.
68+
if (defaultIgnorableCodePointRegex.test(character)) {
3869
continue;
3970
}
4071

test.js

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ test('ignores control characters', t => {
3737

3838
test('handles combining characters', t => {
3939
t.is(stringWidth('x\u0300'), 1);
40+
t.is(stringWidth('\u0300\u0301'), 0);
41+
t.is(stringWidth('e\u0301e'), 2);
42+
t.is(stringWidth('x\u036F'), 1);
43+
t.is(stringWidth('\u036F\u036F'), 0);
4044
});
4145

4246
test('handles ZWJ characters', t => {
@@ -45,3 +49,52 @@ test('handles ZWJ characters', t => {
4549
t.is(stringWidth('👩‍👩‍👦‍👦'), 2);
4650
t.is(stringWidth('👨‍❤️‍💋‍👨'), 2);
4751
});
52+
53+
test('handles zero-width characters', t => {
54+
t.is(stringWidth('\u200B'), 0);
55+
t.is(stringWidth('x\u200Bx'), 2);
56+
t.is(stringWidth('\u200C'), 0);
57+
t.is(stringWidth('x\u200Cx'), 2);
58+
t.is(stringWidth('\u200D'), 0);
59+
t.is(stringWidth('x\u200Dx'), 2);
60+
t.is(stringWidth('\uFEFF'), 0);
61+
t.is(stringWidth('x\uFEFFx'), 2);
62+
});
63+
64+
test('handles surrogate pairs', t => {
65+
t.is(stringWidth('\uD83D\uDE00'), 2); // 😀
66+
t.is(stringWidth('A\uD83D\uDE00B'), 4);
67+
});
68+
69+
test('handles variation selectors', t => {
70+
t.is(stringWidth('\u{1F1E6}\uFE0F'), 1); // Regional indicator symbol A with variation selector
71+
t.is(stringWidth('A\uFE0F'), 1);
72+
t.is(stringWidth('\uFE0F'), 0);
73+
});
74+
75+
test('handles edge cases', t => {
76+
t.is(stringWidth(''), 0);
77+
t.is(stringWidth('\u200B\u200B'), 0);
78+
t.is(stringWidth('x\u200Bx\u200B'), 2);
79+
t.is(stringWidth('x\u0300x\u0300'), 2);
80+
t.is(stringWidth('\uD83D\uDE00\uFE0F'), 2); // 😀 with variation selector
81+
t.is(stringWidth('\uD83D\uDC69\u200D\uD83C\uDF93'), 2); // 👩‍🎓
82+
t.is(stringWidth('x\u1AB0x\u1AB0'), 2); // Combining diacritical marks extended
83+
t.is(stringWidth('x\u1DC0x\u1DC0'), 2); // Combining diacritical marks supplement
84+
t.is(stringWidth('x\u20D0x\u20D0'), 2); // Combining diacritical marks for symbols
85+
t.is(stringWidth('x\uFE20x\uFE20'), 2); // Combining half marks
86+
});
87+
88+
test('ignores default ignorable code points', t => {
89+
t.is(stringWidth('\u2060'), 0); // Word joiner
90+
t.is(stringWidth('\u2061'), 0); // Function application
91+
t.is(stringWidth('\u2062'), 0); // Invisible times
92+
t.is(stringWidth('\u2063'), 0); // Invisible separator
93+
t.is(stringWidth('\u2064'), 0); // Invisible plus
94+
t.is(stringWidth('\uFEFF'), 0); // Zero-width no-break space
95+
t.is(stringWidth('x\u2060x'), 2);
96+
t.is(stringWidth('x\u2061x'), 2);
97+
t.is(stringWidth('x\u2062x'), 2);
98+
t.is(stringWidth('x\u2063x'), 2);
99+
t.is(stringWidth('x\u2064x'), 2);
100+
});

0 commit comments

Comments
 (0)