Skip to content

Commit ad63a94

Browse files
committed
perf: inline East Asian Width with packed arrays
Replace get-east-asian-width dependency with inline flat [start, end] range arrays and O(log n) binary search. Merged isFullWidth + isWide into one sorted array (123 ranges).
1 parent fe77250 commit ad63a94

File tree

1 file changed

+61
-6
lines changed

1 file changed

+61
-6
lines changed

index.js

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import stripAnsi from 'strip-ansi';
2-
import {eastAsianWidth} from 'get-east-asian-width';
32

43
/**
54
Logic:
@@ -21,6 +20,61 @@ const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p
2120
// RGI emoji sequences
2221
const rgiEmojiRegex = /^\p{RGI_Emoji}$/v;
2322

23+
// Binary search on a sorted flat array of [start, end] pairs.
24+
// Returns true if x falls within any range.
25+
function isInRange(ranges, x) {
26+
let low = 0;
27+
// eslint-disable-next-line no-bitwise
28+
let high = (ranges.length >>> 1) - 1;
29+
while (low <= high) {
30+
// eslint-disable-next-line no-bitwise
31+
const mid = (low + high) >>> 1;
32+
const i = mid * 2;
33+
if (x < ranges[i]) {
34+
high = mid - 1;
35+
} else if (x > ranges[i + 1]) {
36+
low = mid + 1;
37+
} else {
38+
return true;
39+
}
40+
}
41+
42+
return false;
43+
}
44+
45+
// East Asian Width: Wide (W) + Fullwidth (F) — sorted [start, end] pairs.
46+
// Generated from Unicode EastAsianWidth.txt. Includes CJK ideographs, Hangul, Katakana, emoji, fullwidth forms.
47+
// prettier-ignore
48+
const wideRanges = [4352, 4447, 8986, 8987, 9001, 9002, 9193, 9196, 9200, 9200, 9203, 9203, 9725, 9726, 9748, 9749, 9776, 9783, 9800, 9811, 9855, 9855, 9866, 9871, 9875, 9875, 9889, 9889, 9898, 9899, 9917, 9918, 9924, 9925, 9934, 9934, 9940, 9940, 9962, 9962, 9970, 9971, 9973, 9973, 9978, 9978, 9981, 9981, 9989, 9989, 9994, 9995, 10_024, 10_024, 10_060, 10_060, 10_062, 10_062, 10_067, 10_069, 10_071, 10_071, 10_133, 10_135, 10_160, 10_160, 10_175, 10_175, 11_035, 11_036, 11_088, 11_088, 11_093, 11_093, 11_904, 11_929, 11_931, 12_019, 12_032, 12_245, 12_272, 12_350, 12_353, 12_438, 12_441, 12_543, 12_549, 12_591, 12_593, 12_686, 12_688, 12_773, 12_783, 12_830, 12_832, 12_871, 12_880, 42_124, 42_128, 42_182, 43_360, 43_388, 44_032, 55_203, 63_744, 64_255, 65_040, 65_049, 65_072, 65_106, 65_108, 65_126, 65_128, 65_131, 65_281, 65_376, 65_504, 65_510, 94_176, 94_180, 94_192, 94_198, 94_208, 101_589, 101_631, 101_662, 101_760, 101_874, 110_576, 110_579, 110_581, 110_587, 110_589, 110_590, 110_592, 110_882, 110_898, 110_898, 110_928, 110_930, 110_933, 110_933, 110_948, 110_951, 110_960, 111_355, 119_552, 119_638, 119_648, 119_670, 126_980, 126_980, 127_183, 127_183, 127_374, 127_374, 127_377, 127_386, 127_488, 127_490, 127_504, 127_547, 127_552, 127_560, 127_568, 127_569, 127_584, 127_589, 127_744, 127_776, 127_789, 127_797, 127_799, 127_868, 127_870, 127_891, 127_904, 127_946, 127_951, 127_955, 127_968, 127_984, 127_988, 127_988, 127_992, 128_062, 128_064, 128_064, 128_066, 128_252, 128_255, 128_317, 128_331, 128_334, 128_336, 128_359, 128_378, 128_378, 128_405, 128_406, 128_420, 128_420, 128_507, 128_591, 128_640, 128_709, 128_716, 128_716, 128_720, 128_722, 128_725, 128_728, 128_732, 128_735, 128_747, 128_748, 128_756, 128_764, 128_992, 129_003, 129_008, 129_008, 129_292, 129_338, 129_340, 129_349, 129_351, 129_535, 129_648, 129_660, 129_664, 129_674, 129_678, 129_734, 129_736, 129_736, 129_741, 129_756, 129_759, 129_770, 129_775, 129_784, 131_072, 196_605, 196_608, 262_141];
49+
50+
// East Asian Width: Ambiguous (A) — characters that are wide in CJK contexts but narrow in Western.
51+
// Only checked when ambiguousAsWide option is set.
52+
// prettier-ignore
53+
const ambiguousRanges = [161, 161, 164, 164, 167, 168, 170, 170, 173, 174, 176, 180, 182, 186, 188, 191, 198, 198, 208, 208, 215, 216, 222, 225, 230, 230, 232, 234, 236, 237, 240, 240, 242, 243, 247, 250, 252, 252, 254, 254, 257, 257, 273, 273, 275, 275, 283, 283, 294, 295, 299, 299, 305, 307, 312, 312, 319, 322, 324, 324, 328, 331, 333, 333, 338, 339, 358, 359, 363, 363, 462, 462, 464, 464, 466, 466, 468, 468, 470, 470, 472, 472, 474, 474, 476, 476, 593, 593, 609, 609, 708, 708, 711, 711, 713, 715, 717, 717, 720, 720, 728, 731, 733, 733, 735, 735, 768, 879, 913, 929, 931, 937, 945, 961, 963, 969, 1025, 1025, 1040, 1103, 1105, 1105, 8208, 8208, 8211, 8214, 8216, 8217, 8220, 8221, 8224, 8226, 8228, 8231, 8240, 8240, 8242, 8243, 8245, 8245, 8251, 8251, 8254, 8254, 8308, 8308, 8319, 8319, 8321, 8324, 8364, 8364, 8451, 8451, 8453, 8453, 8457, 8457, 8467, 8467, 8470, 8470, 8481, 8482, 8486, 8486, 8491, 8491, 8531, 8532, 8539, 8542, 8544, 8555, 8560, 8569, 8585, 8585, 8592, 8601, 8632, 8633, 8658, 8658, 8660, 8660, 8679, 8679, 8704, 8704, 8706, 8707, 8711, 8712, 8715, 8715, 8719, 8719, 8721, 8721, 8725, 8725, 8730, 8730, 8733, 8736, 8739, 8739, 8741, 8741, 8743, 8748, 8750, 8750, 8756, 8759, 8764, 8765, 8776, 8776, 8780, 8780, 8786, 8786, 8800, 8801, 8804, 8807, 8810, 8811, 8814, 8815, 8834, 8835, 8838, 8839, 8853, 8853, 8857, 8857, 8869, 8869, 8895, 8895, 8978, 8978, 9312, 9449, 9451, 9547, 9552, 9587, 9600, 9615, 9618, 9621, 9632, 9633, 9635, 9641, 9650, 9651, 9654, 9655, 9660, 9661, 9664, 9665, 9670, 9672, 9675, 9675, 9678, 9681, 9698, 9701, 9711, 9711, 9733, 9734, 9737, 9737, 9742, 9743, 9756, 9756, 9758, 9758, 9792, 9792, 9794, 9794, 9824, 9825, 9827, 9829, 9831, 9834, 9836, 9837, 9839, 9839, 9886, 9887, 9919, 9919, 9926, 9933, 9935, 9939, 9941, 9953, 9955, 9955, 9960, 9961, 9963, 9969, 9972, 9972, 9974, 9977, 9979, 9980, 9982, 9983, 10_045, 10_045, 10_102, 10_111, 11_094, 11_097, 12_872, 12_879, 57_344, 63_743, 65_024, 65_039, 65_533, 65_533, 127_232, 127_242, 127_248, 127_277, 127_280, 127_337, 127_344, 127_373, 127_375, 127_376, 127_387, 127_404, 917_760, 917_999, 983_040, 1_048_573, 1_048_576, 1_114_109];
54+
55+
function isDoubleWidth(x) {
56+
// Early exit: nothing below U+1100 is wide/fullwidth
57+
return x >= 4352 && isInRange(wideRanges, x);
58+
}
59+
60+
function isAmbiguous(x) {
61+
return isInRange(ambiguousRanges, x);
62+
}
63+
64+
// Returns the display width of a codepoint: 1 (narrow) or 2 (wide/fullwidth/ambiguous).
65+
// Zero-width characters are handled separately by the regex.
66+
function eastAsianWidth(codePoint, ambiguousAsWide) {
67+
if (isDoubleWidth(codePoint)) {
68+
return 2;
69+
}
70+
71+
if (ambiguousAsWide && isAmbiguous(codePoint)) {
72+
return 2;
73+
}
74+
75+
return 1;
76+
}
77+
2478
function baseVisible(segment) {
2579
return segment.replace(leadingNonPrintingRegex, '');
2680
}
@@ -29,12 +83,12 @@ function isZeroWidthCluster(segment) {
2983
return zeroWidthClusterRegex.test(segment);
3084
}
3185

32-
function trailingHalfwidthWidth(segment, eastAsianWidthOptions) {
86+
function trailingHalfwidthWidth(segment, ambiguousAsWide) {
3387
let extra = 0;
3488
if (segment.length > 1) {
3589
for (const char of segment.slice(1)) {
3690
if (char >= '\uFF00' && char <= '\uFFEF') {
37-
extra += eastAsianWidth(char.codePointAt(0), eastAsianWidthOptions);
91+
extra += eastAsianWidth(char.codePointAt(0), ambiguousAsWide);
3892
}
3993
}
4094
}
@@ -78,8 +132,9 @@ export default function stringWidth(input, options = {}) {
78132
return string.length;
79133
}
80134

135+
const ambiguousAsWide = !ambiguousIsNarrow;
136+
81137
let width = 0;
82-
const eastAsianWidthOptions = {ambiguousAsWide: !ambiguousIsNarrow};
83138

84139
for (const {segment} of segmenter.segment(string)) {
85140
// Zero-width / non-printing clusters
@@ -95,10 +150,10 @@ export default function stringWidth(input, options = {}) {
95150

96151
// Everything else: EAW of the cluster's first visible scalar
97152
const codePoint = baseVisible(segment).codePointAt(0);
98-
width += eastAsianWidth(codePoint, eastAsianWidthOptions);
153+
width += eastAsianWidth(codePoint, ambiguousAsWide);
99154

100155
// Add width for trailing Halfwidth and Fullwidth Forms (e.g., ゙, ゚, ー)
101-
width += trailingHalfwidthWidth(segment, eastAsianWidthOptions);
156+
width += trailingHalfwidthWidth(segment, ambiguousAsWide);
102157
}
103158

104159
return width;

0 commit comments

Comments
 (0)