@@ -25,14 +25,13 @@ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend;
25
25
* Everything else (should be UTF-8) is just passed through and
26
26
* appended to the result.
27
27
*/
28
- static void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE in_string , bool out_ascii_only , bool out_script_safe )
28
+ static void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE in_string , bool out_script_safe )
29
29
{
30
30
const char * hexdig = "0123456789abcdef" ;
31
31
char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
32
32
33
33
const char * in_utf8_str = RSTRING_PTR (in_string );
34
34
unsigned long in_utf8_len = RSTRING_LEN (in_string );
35
- bool in_is_ascii_only = rb_enc_str_asciionly_p (in_string );
36
35
37
36
unsigned long beg = 0 , pos ;
38
37
@@ -42,46 +41,183 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_
42
41
bool should_escape ;
43
42
44
43
/* UTF-8 decoding */
45
- if (in_is_ascii_only ) {
46
- ch = in_utf8_str [pos ];
47
- ch_len = 1 ;
48
- } else {
49
- short i ;
50
- if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
51
- else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
52
- else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
53
- else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
54
- else
55
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
56
- "source sequence is illegal/malformed utf-8" );
57
- if ((pos + ch_len ) > in_utf8_len )
58
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
59
- "partial character in source, but hit end" );
60
- for (i = 1 ; i < ch_len ; i ++ ) {
61
- if ((in_utf8_str [pos + i ] & 0xC0 ) != 0x80 ) /* leading 2 bits should be 0b10 */
62
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
63
- "source sequence is illegal/malformed utf-8" );
64
- ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
44
+ short i ;
45
+ if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
46
+ else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
47
+ else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
48
+ else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
49
+ else {
50
+ rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
51
+ }
52
+
53
+ for (i = 1 ; i < ch_len ; i ++ ) {
54
+ ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
55
+ }
56
+
57
+ /* JSON policy */
58
+ should_escape =
59
+ (ch < 0x20 ) ||
60
+ (ch == '"' ) ||
61
+ (ch == '\\' ) ||
62
+ (out_script_safe && (ch == '/' )) ||
63
+ (out_script_safe && (ch == 0x2028 )) ||
64
+ (out_script_safe && (ch == 0x2029 ));
65
+
66
+ /* JSON encoding */
67
+ if (should_escape ) {
68
+ if (pos > beg ) {
69
+ fbuffer_append (out_buffer , & in_utf8_str [beg ], pos - beg );
70
+ }
71
+
72
+ beg = pos + ch_len ;
73
+ switch (ch ) {
74
+ case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
75
+ case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
76
+ case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
77
+ case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
78
+ case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
79
+ case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
80
+ case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
81
+ case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
82
+ default :
83
+ if (ch <= 0xFFFF ) {
84
+ scratch [2 ] = hexdig [ch >> 12 ];
85
+ scratch [3 ] = hexdig [(ch >> 8 ) & 0xf ];
86
+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
87
+ scratch [5 ] = hexdig [ch & 0xf ];
88
+ fbuffer_append (out_buffer , scratch , 6 );
89
+ } else {
90
+ uint16_t hi , lo ;
91
+ ch -= 0x10000 ;
92
+ hi = 0xD800 + (uint16_t )(ch >> 10 );
93
+ lo = 0xDC00 + (uint16_t )(ch & 0x3FF );
94
+
95
+ scratch [2 ] = hexdig [hi >> 12 ];
96
+ scratch [3 ] = hexdig [(hi >> 8 ) & 0xf ];
97
+ scratch [4 ] = hexdig [(hi >> 4 ) & 0xf ];
98
+ scratch [5 ] = hexdig [hi & 0xf ];
99
+
100
+ scratch [8 ] = hexdig [lo >> 12 ];
101
+ scratch [9 ] = hexdig [(lo >> 8 ) & 0xf ];
102
+ scratch [10 ] = hexdig [(lo >> 4 ) & 0xf ];
103
+ scratch [11 ] = hexdig [lo & 0xf ];
104
+
105
+ fbuffer_append (out_buffer , scratch , 12 );
106
+ }
65
107
}
66
- if (ch > 0x10FFFF )
67
- rb_raise (rb_path2class ("JSON::GeneratorError" ),
68
- "source sequence is illegal/malformed utf-8" );
69
108
}
70
109
110
+ pos += ch_len ;
111
+ }
112
+
113
+ if (beg < in_utf8_len ) {
114
+ fbuffer_append (out_buffer , & in_utf8_str [beg ], in_utf8_len - beg );
115
+ }
116
+
117
+ RB_GC_GUARD (in_string );
118
+ }
119
+
120
+ static void convert_ASCII_to_JSON (FBuffer * out_buffer , VALUE str , bool out_script_safe )
121
+ {
122
+ const char * hexdig = "0123456789abcdef" ;
123
+ char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
124
+
125
+ const char * ptr = RSTRING_PTR (str );
126
+ unsigned long len = RSTRING_LEN (str );
127
+
128
+ unsigned long beg = 0 , pos ;
129
+
130
+ for (pos = 0 ; pos < len ;) {
131
+ unsigned char ch = ptr [pos ];
132
+ bool should_escape ;
133
+
71
134
/* JSON policy */
72
135
should_escape =
73
136
(ch < 0x20 ) ||
74
137
(ch == '"' ) ||
75
138
(ch == '\\' ) ||
76
- (out_ascii_only && (ch > 0x7F )) ||
139
+ (out_script_safe && (ch == '/' ));
140
+
141
+ /* JSON encoding */
142
+ if (should_escape ) {
143
+ if (pos > beg ) {
144
+ fbuffer_append (out_buffer , & ptr [beg ], pos - beg );
145
+ }
146
+
147
+ beg = pos + 1 ;
148
+ switch (ch ) {
149
+ case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
150
+ case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
151
+ case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
152
+ case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
153
+ case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
154
+ case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
155
+ case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
156
+ case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
157
+ default :
158
+ scratch [2 ] = hexdig [ch >> 12 ];
159
+ scratch [3 ] = hexdig [(ch >> 8 ) & 0xf ];
160
+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
161
+ scratch [5 ] = hexdig [ch & 0xf ];
162
+ fbuffer_append (out_buffer , scratch , 6 );
163
+ }
164
+ }
165
+
166
+ pos ++ ;
167
+ }
168
+
169
+ if (beg < len ) {
170
+ fbuffer_append (out_buffer , & ptr [beg ], len - beg );
171
+ }
172
+
173
+ RB_GC_GUARD (str );
174
+ }
175
+
176
+ static void convert_UTF8_to_ASCII_only_JSON (FBuffer * out_buffer , VALUE in_string , bool out_script_safe )
177
+ {
178
+ const char * hexdig = "0123456789abcdef" ;
179
+ char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
180
+
181
+ const char * in_utf8_str = RSTRING_PTR (in_string );
182
+ unsigned long in_utf8_len = RSTRING_LEN (in_string );
183
+
184
+ unsigned long beg = 0 , pos ;
185
+
186
+ for (pos = 0 ; pos < in_utf8_len ;) {
187
+ uint32_t ch ;
188
+ short ch_len ;
189
+ bool should_escape ;
190
+
191
+ /* UTF-8 decoding */
192
+ short i ;
193
+ if ((in_utf8_str [pos ] & 0x80 ) == 0x00 ) { ch_len = 1 ; ch = in_utf8_str [pos ]; } /* leading 1 bit is 0b0 */
194
+ else if ((in_utf8_str [pos ] & 0xE0 ) == 0xC0 ) { ch_len = 2 ; ch = in_utf8_str [pos ] & 0x1F ; } /* leading 3 bits are 0b110 */
195
+ else if ((in_utf8_str [pos ] & 0xF0 ) == 0xE0 ) { ch_len = 3 ; ch = in_utf8_str [pos ] & 0x0F ; } /* leading 4 bits are 0b1110 */
196
+ else if ((in_utf8_str [pos ] & 0xF8 ) == 0xF0 ) { ch_len = 4 ; ch = in_utf8_str [pos ] & 0x07 ; } /* leading 5 bits are 0b11110 */
197
+ else {
198
+ rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
199
+ }
200
+
201
+ for (i = 1 ; i < ch_len ; i ++ ) {
202
+ ch = (ch <<6 ) | (in_utf8_str [pos + i ] & 0x3F );
203
+ }
204
+
205
+ /* JSON policy */
206
+ should_escape =
207
+ (ch < 0x20 ) ||
208
+ (ch == '"' ) ||
209
+ (ch == '\\' ) ||
210
+ (ch > 0x7F ) ||
77
211
(out_script_safe && (ch == '/' )) ||
78
212
(out_script_safe && (ch == 0x2028 )) ||
79
213
(out_script_safe && (ch == 0x2029 ));
80
214
81
215
/* JSON encoding */
82
216
if (should_escape ) {
83
- if (pos > beg )
217
+ if (pos > beg ) {
84
218
fbuffer_append (out_buffer , & in_utf8_str [beg ], pos - beg );
219
+ }
220
+
85
221
beg = pos + ch_len ;
86
222
switch (ch ) {
87
223
case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
@@ -122,8 +258,11 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE in_string, bool out_
122
258
123
259
pos += ch_len ;
124
260
}
125
- if (beg < in_utf8_len )
261
+
262
+ if (beg < in_utf8_len ) {
126
263
fbuffer_append (out_buffer , & in_utf8_str [beg ], in_utf8_len - beg );
264
+ }
265
+
127
266
RB_GC_GUARD (in_string );
128
267
}
129
268
@@ -570,11 +709,27 @@ static int enc_utf8_compatible_p(int enc_idx)
570
709
571
710
static void generate_json_string (FBuffer * buffer , VALUE Vstate , JSON_Generator_State * state , VALUE obj )
572
711
{
573
- fbuffer_append_char (buffer , '"' );
574
712
if (!enc_utf8_compatible_p (RB_ENCODING_GET (obj ))) {
575
713
obj = rb_str_export_to_enc (obj , rb_utf8_encoding ());
576
714
}
577
- convert_UTF8_to_JSON (buffer , obj , state -> ascii_only , state -> script_safe );
715
+
716
+ fbuffer_append_char (buffer , '"' );
717
+
718
+ switch (rb_enc_str_coderange (obj )) {
719
+ case ENC_CODERANGE_7BIT :
720
+ convert_ASCII_to_JSON (buffer , obj , state -> script_safe );
721
+ break ;
722
+ case ENC_CODERANGE_VALID :
723
+ if (RB_UNLIKELY (state -> ascii_only )) {
724
+ convert_UTF8_to_ASCII_only_JSON (buffer , obj , state -> script_safe );
725
+ } else {
726
+ convert_UTF8_to_JSON (buffer , obj , state -> script_safe );
727
+ }
728
+ break ;
729
+ default :
730
+ rb_raise (rb_path2class ("JSON::GeneratorError" ), "source sequence is illegal/malformed utf-8" );
731
+ break ;
732
+ }
578
733
fbuffer_append_char (buffer , '"' );
579
734
}
580
735
0 commit comments