@@ -25,14 +25,13 @@ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend;
25
25
* Everything else (should be UTF-8) is just passed through and
26
26
* appended to the result.
27
27
*/
28
- static void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE in_string , bool out_ascii_only , bool out_script_safe )
28
+ static void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE in_string , bool out_script_safe )
29
29
{
30
30
const char * hexdig = "0123456789abcdef" ;
31
31
char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
32
32
33
33
const char * in_utf8_str = RSTRING_PTR (in_string );
34
34
unsigned long in_utf8_len = RSTRING_LEN (in_string );
35
- bool in_is_ascii_only = rb_enc_str_asciionly_p (in_string );
36
35
37
36
unsigned long beg = 0 , pos ;
38
37
@@ -42,46 +41,196 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_
42
41
bool should_escape ;
43
42
44
43
/* UTF-8 decoding */
45
- if (in_is_ascii_only ) {
46
- ch = in_utf8_str [pos ];
47
- ch_len = 1 ;
48
- } else {
49
- short i ;
50
- if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
51
- else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
52
- else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
53
- else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
54
- else
55
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
56
- "source sequence is illegal/malformed utf-8" );
57
- if ((pos + ch_len ) > in_utf8_len )
58
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
59
- "partial character in source, but hit end" );
60
- for (i = 1 ; i < ch_len ; i ++ ) {
61
- if ((in_utf8_str [pos + i ] & 0xC0 ) != 0x80 ) /* leading 2 bits should be 0b10 */
62
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
63
- "source sequence is illegal/malformed utf-8" );
64
- ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
44
+ short i ;
45
+ if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
46
+ else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
47
+ else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
48
+ else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
49
+ else {
50
+ rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
51
+ }
52
+
53
+ for (i = 1 ; i < ch_len ; i ++ ) {
54
+ ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
55
+ }
56
+
57
+ /* JSON policy */
58
+ should_escape =
59
+ (ch < 0x20 ) ||
60
+ (ch == '"' ) ||
61
+ (ch == '\\' ) ||
62
+ (out_script_safe && (ch == '/' )) ||
63
+ (out_script_safe && (ch == 0x2028 )) ||
64
+ (out_script_safe && (ch == 0x2029 ));
65
+
66
+ /* JSON encoding */
67
+ if (should_escape ) {
68
+ if (pos > beg ) {
69
+ fbuffer_append (out_buffer , & in_utf8_str [beg ], pos - beg );
70
+ }
71
+
72
+ beg = pos + ch_len ;
73
+ switch (ch ) {
74
+ case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
75
+ case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
76
+ case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
77
+ case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
78
+ case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
79
+ case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
80
+ case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
81
+ case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
82
+ default :
83
+ if (ch <= 0xFFFF ) {
84
+ scratch [2 ] = hexdig [ch >> 12 ];
85
+ scratch [3 ] = hexdig [(ch >> 8 ) & 0xf ];
86
+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
87
+ scratch [5 ] = hexdig [ch & 0xf ];
88
+ fbuffer_append (out_buffer , scratch , 6 );
89
+ } else {
90
+ uint16_t hi , lo ;
91
+ ch -= 0x10000 ;
92
+ hi = 0xD800 + (uint16_t )(ch >> 10 );
93
+ lo = 0xDC00 + (uint16_t )(ch & 0x3FF );
94
+
95
+ scratch [2 ] = hexdig [hi >> 12 ];
96
+ scratch [3 ] = hexdig [(hi >> 8 ) & 0xf ];
97
+ scratch [4 ] = hexdig [(hi >> 4 ) & 0xf ];
98
+ scratch [5 ] = hexdig [hi & 0xf ];
99
+
100
+ scratch [8 ] = hexdig [lo >> 12 ];
101
+ scratch [9 ] = hexdig [(lo >> 8 ) & 0xf ];
102
+ scratch [10 ] = hexdig [(lo >> 4 ) & 0xf ];
103
+ scratch [11 ] = hexdig [lo & 0xf ];
104
+
105
+ fbuffer_append (out_buffer , scratch , 12 );
106
+ }
107
+ }
108
+ }
109
+
110
+ pos += ch_len ;
111
+ }
112
+
113
+ if (beg < in_utf8_len ) {
114
+ fbuffer_append (out_buffer , & in_utf8_str [beg ], in_utf8_len - beg );
115
+ }
116
+
117
+ RB_GC_GUARD (in_string );
118
+ }
119
+
120
+ static const bool escape_table [256 ] = {
121
+ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
122
+ 0 ,0 ,1 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* '"' */
123
+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 ,0 , /* '\\' */
124
+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
125
+ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
126
+ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
127
+ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
128
+ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1
129
+ };
130
+
131
+ static const bool script_safe_escape_table [256 ] = {
132
+ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
133
+ 0 ,0 ,1 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , /* '"' and '/' */
134
+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 ,0 , /* '\\' */
135
+ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,
136
+ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
137
+ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
138
+ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,
139
+ 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 , 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1
140
+ };
141
+
142
+ static void convert_ASCII_to_JSON (FBuffer * out_buffer , VALUE str , const bool escape_table [256 ])
143
+ {
144
+ const char * hexdig = "0123456789abcdef" ;
145
+ char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
146
+
147
+ const char * ptr = RSTRING_PTR (str );
148
+ unsigned long len = RSTRING_LEN (str );
149
+
150
+ unsigned long beg = 0 , pos ;
151
+
152
+ for (pos = 0 ; pos < len ;) {
153
+ unsigned char ch = ptr [pos ];
154
+ /* JSON encoding */
155
+ if (escape_table [ch ]) {
156
+ if (pos > beg ) {
157
+ fbuffer_append (out_buffer , & ptr [beg ], pos - beg );
158
+ }
159
+
160
+ beg = pos + 1 ;
161
+ switch (ch ) {
162
+ case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
163
+ case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
164
+ case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
165
+ case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
166
+ case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
167
+ case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
168
+ case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
169
+ case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
170
+ default :
171
+ scratch [2 ] = hexdig [ch >> 12 ];
172
+ scratch [3 ] = hexdig [(ch >> 8 ) & 0xf ];
173
+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
174
+ scratch [5 ] = hexdig [ch & 0xf ];
175
+ fbuffer_append (out_buffer , scratch , 6 );
65
176
}
66
- if (ch > 0x10FFFF )
67
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
68
- "source sequence is illegal/malformed utf-8" );
177
+ }
178
+
179
+ pos ++ ;
180
+ }
181
+
182
+ if (beg < len ) {
183
+ fbuffer_append (out_buffer , & ptr [beg ], len - beg );
184
+ }
185
+
186
+ RB_GC_GUARD (str );
187
+ }
188
+
189
+ static void convert_UTF8_to_ASCII_only_JSON (FBuffer * out_buffer , VALUE in_string , bool out_script_safe )
190
+ {
191
+ const char * hexdig = "0123456789abcdef" ;
192
+ char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
193
+
194
+ const char * in_utf8_str = RSTRING_PTR (in_string );
195
+ unsigned long in_utf8_len = RSTRING_LEN (in_string );
196
+
197
+ unsigned long beg = 0 , pos ;
198
+
199
+ for (pos = 0 ; pos < in_utf8_len ;) {
200
+ uint32_t ch ;
201
+ short ch_len ;
202
+ bool should_escape ;
203
+
204
+ /* UTF-8 decoding */
205
+ short i ;
206
+ if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
207
+ else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
208
+ else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
209
+ else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
210
+ else {
211
+ rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
212
+ }
213
+
214
+ for (i = 1 ; i < ch_len ; i ++ ) {
215
+ ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
69
216
}
70
217
71
218
/* JSON policy */
72
219
should_escape =
73
220
(ch < 0x20 ) ||
74
221
(ch == '"' ) ||
75
222
(ch == '\\' ) ||
76
- (out_ascii_only && ( ch > 0x7F ) ) ||
223
+ (ch > 0x7F ) ||
77
224
(out_script_safe && (ch == '/' )) ||
78
225
(out_script_safe && (ch == 0x2028 )) ||
79
226
(out_script_safe && (ch == 0x2029 ));
80
227
81
228
/* JSON encoding */
82
229
if (should_escape ) {
83
- if (pos > beg )
230
+ if (pos > beg ) {
84
231
fbuffer_append (out_buffer , & in_utf8_str [beg ], pos - beg );
232
+ }
233
+
85
234
beg = pos + ch_len ;
86
235
switch (ch ) {
87
236
case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
@@ -122,8 +271,11 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_
122
271
123
272
pos += ch_len ;
124
273
}
125
- if (beg < in_utf8_len )
274
+
275
+ if (beg < in_utf8_len ) {
126
276
fbuffer_append (out_buffer , & in_utf8_str [beg ], in_utf8_len - beg );
277
+ }
278
+
127
279
RB_GC_GUARD (in_string );
128
280
}
129
281
@@ -570,11 +722,27 @@ static int enc_utf8_compatible_p(int enc_idx)
570
722
571
723
static void generate_json_string (FBuffer * buffer , VALUE Vstate , JSON_Generator_State * state , VALUE obj )
572
724
{
573
- fbuffer_append_char (buffer , '"' );
574
725
if (!enc_utf8_compatible_p (RB_ENCODING_GET (obj ))) {
575
726
obj = rb_str_export_to_enc (obj , rb_utf8_encoding ());
576
727
}
577
- convert_UTF8_to_JSON (buffer , obj , state -> ascii_only , state -> script_safe );
728
+
729
+ fbuffer_append_char (buffer , '"' );
730
+
731
+ switch (rb_enc_str_coderange (obj )) {
732
+ case ENC_CODERANGE_7BIT :
733
+ convert_ASCII_to_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : escape_table );
734
+ break ;
735
+ case ENC_CODERANGE_VALID :
736
+ if (RB_UNLIKELY (state -> ascii_only )) {
737
+ convert_UTF8_to_ASCII_only_JSON (buffer , obj , state -> script_safe );
738
+ } else {
739
+ convert_UTF8_to_JSON (buffer , obj , state -> script_safe );
740
+ }
741
+ break ;
742
+ default :
743
+ rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
744
+ break ;
745
+ }
578
746
fbuffer_append_char (buffer , '"' );
579
747
}
580
748
0 commit comments