|
1 | | -import stripAnsi from 'strip-ansi'; |
2 | | - |
3 | 1 | /** |
4 | 2 | Logic: |
5 | 3 | - Segment graphemes to match how terminals render clusters. |
|
9 | 7 | 3. Otherwise use East Asian Width of the cluster's first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark). |
10 | 8 | */ |
11 | 9 |
|
| 10 | +import stripAnsi from 'strip-ansi'; |
| 11 | + |
12 | 12 | const segmenter = new Intl.Segmenter(); |
13 | 13 |
|
14 | 14 | // Whole-cluster zero-width |
@@ -96,6 +96,7 @@ function trailingHalfwidthWidth(segment, ambiguousAsWide) { |
96 | 96 | return extra; |
97 | 97 | } |
98 | 98 |
|
| 99 | +// eslint-disable-next-line complexity |
99 | 100 | export default function stringWidth(input, options = {}) { |
100 | 101 | if (typeof input !== 'string' || input.length === 0) { |
101 | 102 | return 0; |
@@ -134,9 +135,89 @@ export default function stringWidth(input, options = {}) { |
134 | 135 |
|
135 | 136 | const ambiguousAsWide = !ambiguousIsNarrow; |
136 | 137 |
|
| 138 | + // Try per-codepoint iteration first — avoids Intl.Segmenter overhead (~2–4µs per call). |
| 139 | + // Bail to segmenter only when we encounter characters that form multi-codepoint grapheme clusters |
| 140 | + // (emoji ZWJ sequences, flags, skin tones, keycaps, tag sequences). |
137 | 141 | let width = 0; |
| 142 | + let useSegmenter = false; |
| 143 | + |
| 144 | + for (const character of string) { |
| 145 | + const codePoint = character.codePointAt(0); |
| 146 | + |
| 147 | + // These characters join with adjacent codepoints into multi-codepoint grapheme clusters, |
| 148 | + // changing the combined width. Fall back to Intl.Segmenter for correctness. |
| 149 | + if ( |
| 150 | + codePoint === 0x20_0D // ZWJ — joins emoji sequences (e.g., 👩👩👧👦) |
| 151 | + || codePoint === 0xFE_0F // VS16 — emoji presentation (e.g., ❤️ vs ❤) |
| 152 | + || codePoint === 0x20_E3 // Combining Enclosing Keycap (e.g., 1️⃣) |
| 153 | + || (codePoint >= 0x1_F1_E6 && codePoint <= 0x1_F1_FF) // Regional Indicators (flags, e.g., 🇺🇸) |
| 154 | + || (codePoint >= 0x1_F3_FB && codePoint <= 0x1_F3_FF) // Skin Tone Modifiers |
| 155 | + || (codePoint >= 0xE_00_20 && codePoint <= 0xE_00_7F) // Tag characters (subdivision flags) |
| 156 | + ) { |
| 157 | + useSegmenter = true; |
| 158 | + break; |
| 159 | + } |
| 160 | + |
| 161 | + // Wide/fullwidth characters (CJK, compatibility forms) are always visible — skip zero-width regex |
| 162 | + if (isDoubleWidth(codePoint)) { |
| 163 | + width += 2; |
| 164 | + continue; |
| 165 | + } |
| 166 | + |
| 167 | + if (ambiguousAsWide && isAmbiguous(codePoint)) { |
| 168 | + width += 2; |
| 169 | + continue; |
| 170 | + } |
| 171 | + |
| 172 | + // Printable ASCII (0x20–0x7E) is always width 1 and never zero-width |
| 173 | + if (codePoint >= 0x20 && codePoint < 0x7F) { |
| 174 | + width += 1; |
| 175 | + continue; |
| 176 | + } |
| 177 | + |
| 178 | + // Latin1 through Spacing Modifier Letters (0xA0–0x2FF, except soft hyphen 0xAD) |
| 179 | + // are all visible width-1 when ambiguous-as-narrow (default) |
| 180 | + if (!ambiguousAsWide && codePoint >= 0xA0 && codePoint < 0x3_00 && codePoint !== 0xAD) { |
| 181 | + width += 1; |
| 182 | + continue; |
| 183 | + } |
| 184 | + |
| 185 | + // Remaining: check if zero-width (Control, Format, Mark, Default_Ignorable) |
| 186 | + if (isZeroWidthCluster(character)) { |
| 187 | + continue; |
| 188 | + } |
| 189 | + |
| 190 | + width += (ambiguousAsWide && isAmbiguous(codePoint)) ? 2 : 1; |
| 191 | + } |
| 192 | + |
| 193 | + if (!useSegmenter) { |
| 194 | + return width; |
| 195 | + } |
| 196 | + |
| 197 | + // Slow path: use Intl.Segmenter for strings with multi-codepoint grapheme clusters |
| 198 | + width = 0; |
138 | 199 |
|
139 | 200 | for (const {segment} of segmenter.segment(string)) { |
| 201 | + // Single BMP codepoint — skip regex tests for known-width characters |
| 202 | + if (segment.length === 1) { |
| 203 | + const codePoint = segment.codePointAt(0); |
| 204 | + |
| 205 | + if (isDoubleWidth(codePoint)) { |
| 206 | + width += 2; |
| 207 | + continue; |
| 208 | + } |
| 209 | + |
| 210 | + if (codePoint >= 0x20 && codePoint < 0x7F) { |
| 211 | + width += 1; |
| 212 | + continue; |
| 213 | + } |
| 214 | + |
| 215 | + if (!ambiguousAsWide && codePoint >= 0xA0 && codePoint < 0x3_00 && codePoint !== 0xAD) { |
| 216 | + width += 1; |
| 217 | + continue; |
| 218 | + } |
| 219 | + } |
| 220 | + |
140 | 221 | // Zero-width / non-printing clusters |
141 | 222 | if (isZeroWidthCluster(segment)) { |
142 | 223 | continue; |
|
0 commit comments