Skip to content

Commit 9a00965

Browse files
byrootmame
andcommitted
Use a table to check for escaping needs
This performs noticeably better than the boolean logic. Before: ``` == Encoding twitter.json (466906 bytes) ruby 3.4.0preview2 (2024-10-07 master 32c733f57b) +YJIT +PRISM [arm64-darwin23] Warming up -------------------------------------- json 189.000 i/100ms oj 228.000 i/100ms rapidjson 108.000 i/100ms Calculating ------------------------------------- json 1.903k (± 1.2%) i/s (525.55 μs/i) - 9.639k in 5.066521s oj 2.306k (± 1.3%) i/s (433.71 μs/i) - 11.628k in 5.044096s rapidjson 1.069k (± 2.4%) i/s (935.38 μs/i) - 5.400k in 5.053794s Comparison: json: 1902.8 i/s oj: 2305.7 i/s - 1.21x faster rapidjson: 1069.1 i/s - 1.78x slower ``` After: ``` == Encoding twitter.json (466906 bytes) ruby 3.4.0preview2 (2024-10-07 master 32c733f57b) +YJIT +PRISM [arm64-darwin23] Warming up -------------------------------------- json 224.000 i/100ms oj 230.000 i/100ms rapidjson 107.000 i/100ms Calculating ------------------------------------- json 2.254k (± 1.6%) i/s (443.69 μs/i) - 11.424k in 5.069999s oj 2.318k (± 1.4%) i/s (431.32 μs/i) - 11.730k in 5.060421s rapidjson 1.081k (± 1.9%) i/s (925.05 μs/i) - 5.457k in 5.049738s Comparison: json: 2253.8 i/s oj: 2318.5 i/s - same-ish: difference falls within error rapidjson: 1081.0 i/s - 2.08x slower ``` The escape table is taken directly from Mame's PR. Co-Authored-By: Yusuke Endoh <mame@ruby-lang.org>
1 parent 2aefa41 commit 9a00965

File tree

1 file changed

+25
-12
lines changed

1 file changed

+25
-12
lines changed

ext/json/ext/generator/generator.c

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,29 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_
117117
RB_GC_GUARD(in_string);
118118
}
119119

120-
static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, bool out_script_safe)
120+
static const bool escape_table[256] = {
121+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
122+
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* '"' and '/' */
123+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, /* '\\' */
124+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
125+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
126+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
127+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
128+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
129+
};
130+
131+
static const bool script_safe_escape_table[256] = {
132+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
133+
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* '"' and '/' */
134+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, /* '\\' */
135+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
136+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
137+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
138+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
139+
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
140+
};
141+
142+
static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const bool escape_table[256])
121143
{
122144
const char *hexdig = "0123456789abcdef";
123145
char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
@@ -129,17 +151,8 @@ static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, bool out_scrip
129151

130152
for (pos = 0; pos < len;) {
131153
unsigned char ch = ptr[pos];
132-
bool should_escape;
133-
134-
/* JSON policy */
135-
should_escape =
136-
(ch < 0x20) ||
137-
(ch == '"') ||
138-
(ch == '\\') ||
139-
(out_script_safe && (ch == '/'));
140-
141154
/* JSON encoding */
142-
if (should_escape) {
155+
if (escape_table[ch]) {
143156
if (pos > beg) {
144157
fbuffer_append(out_buffer, &ptr[beg], pos - beg);
145158
}
@@ -717,7 +730,7 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
717730

718731
switch(rb_enc_str_coderange(obj)) {
719732
case ENC_CODERANGE_7BIT:
720-
convert_ASCII_to_JSON(buffer, obj, state->script_safe);
733+
convert_ASCII_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table);
721734
break;
722735
case ENC_CODERANGE_VALID:
723736
if (RB_UNLIKELY(state->ascii_only)) {

0 commit comments

Comments
 (0)