Skip to content

Commit 7d0108f

Browse files
committed
perf: use codepoint iteration for non-emoji text
Detect grapheme-clustering characters (ZWJ, VS16, regional indicators, skin tones, keycaps, tags) in a pre-scan. When none found, use for...of codepoint iteration instead of Intl.Segmenter. Skip zero-width regex for codepoints known to be visible (wide, ASCII, Latin1).
1 parent ad63a94 commit 7d0108f

File tree

1 file changed

+83
-2
lines changed

1 file changed

+83
-2
lines changed

index.js

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import stripAnsi from 'strip-ansi';
2-
31
/**
42
Logic:
53
- Segment graphemes to match how terminals render clusters.
@@ -9,6 +7,8 @@ Logic:
97
3. Otherwise use East Asian Width of the cluster's first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark).
108
*/
119

10+
import stripAnsi from 'strip-ansi';
11+
1212
const segmenter = new Intl.Segmenter();
1313

1414
// Whole-cluster zero-width
@@ -96,6 +96,7 @@ function trailingHalfwidthWidth(segment, ambiguousAsWide) {
9696
return extra;
9797
}
9898

99+
// eslint-disable-next-line complexity
99100
export default function stringWidth(input, options = {}) {
100101
if (typeof input !== 'string' || input.length === 0) {
101102
return 0;
@@ -134,9 +135,89 @@ export default function stringWidth(input, options = {}) {
134135

135136
const ambiguousAsWide = !ambiguousIsNarrow;
136137

138+
// Try per-codepoint iteration first — avoids Intl.Segmenter overhead (~2–4µs per call).
139+
// Bail to segmenter only when we encounter characters that form multi-codepoint grapheme clusters
140+
// (emoji ZWJ sequences, flags, skin tones, keycaps, tag sequences).
137141
let width = 0;
142+
let useSegmenter = false;
143+
144+
for (const character of string) {
145+
const codePoint = character.codePointAt(0);
146+
147+
// These characters join with adjacent codepoints into multi-codepoint grapheme clusters,
148+
// changing the combined width. Fall back to Intl.Segmenter for correctness.
149+
if (
150+
codePoint === 0x20_0D // ZWJ — joins emoji sequences (e.g., 👩‍👩‍👧‍👦)
151+
|| codePoint === 0xFE_0F // VS16 — emoji presentation (e.g., ❤️ vs ❤)
152+
|| codePoint === 0x20_E3 // Combining Enclosing Keycap (e.g., 1️⃣)
153+
|| (codePoint >= 0x1_F1_E6 && codePoint <= 0x1_F1_FF) // Regional Indicators (flags, e.g., 🇺🇸)
154+
|| (codePoint >= 0x1_F3_FB && codePoint <= 0x1_F3_FF) // Skin Tone Modifiers
155+
|| (codePoint >= 0xE_00_20 && codePoint <= 0xE_00_7F) // Tag characters (subdivision flags)
156+
) {
157+
useSegmenter = true;
158+
break;
159+
}
160+
161+
// Wide/fullwidth characters (CJK, compatibility forms) are always visible — skip zero-width regex
162+
if (isDoubleWidth(codePoint)) {
163+
width += 2;
164+
continue;
165+
}
166+
167+
if (ambiguousAsWide && isAmbiguous(codePoint)) {
168+
width += 2;
169+
continue;
170+
}
171+
172+
// Printable ASCII (0x20–0x7E) is always width 1 and never zero-width
173+
if (codePoint >= 0x20 && codePoint < 0x7F) {
174+
width += 1;
175+
continue;
176+
}
177+
178+
// Latin1 through Spacing Modifier Letters (0xA0–0x2FF, except soft hyphen 0xAD)
179+
// are all visible width-1 when ambiguous-as-narrow (default)
180+
if (!ambiguousAsWide && codePoint >= 0xA0 && codePoint < 0x3_00 && codePoint !== 0xAD) {
181+
width += 1;
182+
continue;
183+
}
184+
185+
// Remaining: check if zero-width (Control, Format, Mark, Default_Ignorable)
186+
if (isZeroWidthCluster(character)) {
187+
continue;
188+
}
189+
190+
width += (ambiguousAsWide && isAmbiguous(codePoint)) ? 2 : 1;
191+
}
192+
193+
if (!useSegmenter) {
194+
return width;
195+
}
196+
197+
// Slow path: use Intl.Segmenter for strings with multi-codepoint grapheme clusters
198+
width = 0;
138199

139200
for (const {segment} of segmenter.segment(string)) {
201+
// Single BMP codepoint — skip regex tests for known-width characters
202+
if (segment.length === 1) {
203+
const codePoint = segment.codePointAt(0);
204+
205+
if (isDoubleWidth(codePoint)) {
206+
width += 2;
207+
continue;
208+
}
209+
210+
if (codePoint >= 0x20 && codePoint < 0x7F) {
211+
width += 1;
212+
continue;
213+
}
214+
215+
if (!ambiguousAsWide && codePoint >= 0xA0 && codePoint < 0x3_00 && codePoint !== 0xAD) {
216+
width += 1;
217+
continue;
218+
}
219+
}
220+
140221
// Zero-width / non-printing clusters
141222
if (isZeroWidthCluster(segment)) {
142223
continue;

0 commit comments

Comments
 (0)