1818
1919import java .io .IOException ;
2020import java .io .OutputStream ;
21+ import java .nio .charset .StandardCharsets ;
2122
2223/**
2324 * An encoder that reads from the given source and outputs its representation
@@ -46,6 +47,15 @@ final class StringEncoder extends ByteListTranscoder {
4647 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 9 , 0 , 0 , 0 , // '\\'
4748 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
4849 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
50+
51+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
52+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
53+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
54+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
55+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
56+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
57+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
58+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
4959 };
5060
5161 static final byte [] ASCII_ONLY_ESCAPE_TABLE = {
@@ -97,6 +107,8 @@ final class StringEncoder extends ByteListTranscoder {
97107 //First byte of a 4+ byte code point
98108 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 6 , 6 , 9 , 9 ,
99109 };
110+ private static final byte [] BACKSLASH_U2028 = "\\ u2028" .getBytes (StandardCharsets .US_ASCII );
111+ private static final byte [] BACKSLASH_U2029 = "\\ u2029" .getBytes (StandardCharsets .US_ASCII );
100112
101113 private final boolean asciiOnly , scriptSafe ;
102114
@@ -143,10 +155,12 @@ void generate(ThreadContext context, RubyString object, OutputStream buffer) thr
143155 append ('"' );
144156 switch (object .scanForCodeRange ()) {
145157 case StringSupport .CR_7BIT :
146- encodeASCII (context , byteList , buffer );
147- break ;
148158 case StringSupport .CR_VALID :
149- encode (context , byteList , buffer );
159+ if (asciiOnly ) {
160+ encodeASCII (byteList , scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ASCII_ONLY_ESCAPE_TABLE );
161+ } else {
162+ encode (byteList , scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ESCAPE_TABLE );
163+ }
150164 break ;
151165 default :
152166 throw Utils .buildGeneratorError (context , object , "source sequence is illegal/malformed utf-8" ).toThrowable ();
@@ -178,15 +192,85 @@ static RubyString ensureValidEncoding(ThreadContext context, RubyString str) {
178192 return str ;
179193 }
180194
181- void encode (ThreadContext context , ByteList src , OutputStream out ) throws IOException {
182- while (hasNext ()) {
183- handleChar (readUtf8Char (context ));
195+ // C: convert_UTF8_to_JSON
196+ void encode (ByteList src , byte [] escape_table ) throws IOException {
197+ byte [] hexdig = HEX ;
198+ byte [] scratch = aux ;
199+
200+ byte [] ptrBytes = src .unsafeBytes ();
201+ int ptr = src .begin ();
202+ int len = src .realSize ();
203+
204+ int beg = 0 ;
205+ int pos = 0 ;
206+
207+ while (pos < len ) {
208+ int ch = Byte .toUnsignedInt (ptrBytes [ptr + pos ]);
209+ int ch_len = escape_table [ch ];
210+ /* JSON encoding */
211+
212+ if (ch_len > 0 ) {
213+ switch (ch_len ) {
214+ case 9 : {
215+ beg = pos = flushPos (pos , beg , ptrBytes , ptr , 1 );
216+ switch (ch ) {
217+ case '"' : appendEscape (BACKSLASH_DOUBLEQUOTE ); break ;
218+ case '\\' : appendEscape (BACKSLASH_BACKSLASH ); break ;
219+ case '/' : appendEscape (BACKSLASH_FORWARDSLASH ); break ;
220+ case '\b' : appendEscape (BACKSLASH_B ); break ;
221+ case '\f' : appendEscape (BACKSLASH_F ); break ;
222+ case '\n' : appendEscape (BACKSLASH_N ); break ;
223+ case '\r' : appendEscape (BACKSLASH_R ); break ;
224+ case '\t' : appendEscape (BACKSLASH_T ); break ;
225+ default : {
226+ scratch [2 ] = '0' ;
227+ scratch [3 ] = '0' ;
228+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
229+ scratch [5 ] = hexdig [ch & 0xf ];
230+ append (scratch , 0 , 6 );
231+ break ;
232+ }
233+ }
234+ break ;
235+ }
236+ case 11 : {
237+ int b2 = Byte .toUnsignedInt (ptrBytes [ptr + pos + 1 ]);
238+ if (b2 == 0x80 ) {
239+ int b3 = Byte .toUnsignedInt (ptrBytes [ptr + pos + 2 ]);
240+ if (b3 == 0xA8 ) {
241+ beg = pos = flushPos (pos , beg , ptrBytes , ptr , 3 );
242+ append (BACKSLASH_U2028 , 0 , 6 );
243+ break ;
244+ } else if (b3 == 0xA9 ) {
245+ beg = pos = flushPos (pos , beg , ptrBytes , ptr , 3 );
246+ append (BACKSLASH_U2029 , 0 , 6 );
247+ break ;
248+ }
249+ }
250+ ch_len = 3 ;
251+ // fallthrough
252+ }
253+ default :
254+ pos += ch_len ;
255+ break ;
256+ }
257+ } else {
258+ pos ++;
259+ }
184260 }
261+
262+ if (beg < len ) {
263+ append (ptrBytes , ptr + beg , len - beg );
264+ }
265+ }
266+
267+ private int flushPos (int pos , int beg , byte [] ptrBytes , int ptr , int size ) throws IOException {
268+ if (pos > beg ) { append (ptrBytes , ptr + beg , pos - beg ); }
269+ return pos + size ;
185270 }
186271
187272 // C: convert_UTF8_to_ASCII_only_JSON
188- void encodeASCII (ThreadContext context , ByteList src , OutputStream out ) throws IOException {
189- byte [] escape_table = scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ASCII_ONLY_ESCAPE_TABLE ;
273+ void encodeASCII (ByteList src , byte [] escape_table ) throws IOException {
190274 byte [] hexdig = HEX ;
191275 byte [] scratch = aux ;
192276
@@ -198,13 +282,13 @@ void encodeASCII(ThreadContext context, ByteList src, OutputStream out) throws I
198282 int pos = 0 ;
199283
200284 while (pos < len ) {
201- byte ch = ptrBytes [ptr + pos ];
285+ int ch = Byte . toUnsignedInt ( ptrBytes [ptr + pos ]) ;
202286 int ch_len = escape_table [ch ];
203287
204288 if (ch_len != 0 ) {
205289 switch (ch_len ) {
206290 case 9 : {
207- if ( pos > beg ) { append ( ptrBytes , ptr + beg , pos - beg ); } pos += 1 ; beg = pos ; // FLUSH_POS
291+ beg = pos = flushPos ( pos , beg , ptrBytes , ptr , 1 );
208292 switch (ch ) {
209293 case '"' : appendEscape (BACKSLASH_DOUBLEQUOTE ); break ;
210294 case '\\' : appendEscape (BACKSLASH_BACKSLASH ); break ;
@@ -245,7 +329,7 @@ void encodeASCII(ThreadContext context, ByteList src, OutputStream out) throws I
245329 wchar = (wchar << 6 ) | (ptrBytes [ptr + pos +i ] & 0x3F );
246330 }
247331
248- if ( pos > beg ) { append ( ptrBytes , ptr + beg , pos - beg ); } pos += ch_len ; beg = pos ; // FLUSH_POS
332+ beg = pos = flushPos ( pos , beg , ptrBytes , ptr , ch_len );
249333
250334 if (wchar <= 0xFFFF ) {
251335 scratch [2 ] = hexdig [wchar >> 12 ];
0 commit comments