@@ -112,17 +112,16 @@ typedef struct _search_state {
112
112
FBuffer * buffer ;
113
113
114
114
#ifdef ENABLE_SIMD
115
- const char * returned_from ;
116
- unsigned char maybe_matches [ 16 ] ;
115
+ const char * chunk_base ;
116
+ uint8_t has_matches ;
117
117
118
118
#ifdef HAVE_SIMD_NEON
119
119
uint64_t matches_mask ;
120
- const char * chunk_base ;
121
- uint8_t has_matches ;
120
+ #elif HAVE_SIMD_SSE2
121
+ uint16_t matches_mask ;
122
+ #else
123
+ #error "Unknown SIMD Implementation."
122
124
#endif /* HAVE_SIMD_NEON */
123
-
124
- unsigned long current_match_index ;
125
- unsigned long maybe_match_length ;
126
125
#endif /* ENABLE_SIMD */
127
126
} search_state ;
128
127
@@ -263,29 +262,12 @@ static struct _simd_state simd_state;
263
262
#endif /* ENABLE_SIMD */
264
263
265
264
#ifdef ENABLE_SIMD
266
-
267
- static inline unsigned char search_escape_basic_simd_next_match (search_state * search ) {
268
- for (; search -> current_match_index < search -> maybe_match_length && search -> ptr < search -> end ; ) {
269
- unsigned char ch_len = search -> maybe_matches [search -> current_match_index ];
270
-
271
- if (RB_UNLIKELY (ch_len )) {
272
- search -> returned_from = search -> ptr ;
273
- search_flush (search );
274
- return 1 ;
275
- } else {
276
- search -> ptr ++ ;
277
- search -> current_match_index ++ ;
278
- }
279
- }
280
- return 0 ;
281
- }
282
-
283
265
#ifdef HAVE_SIMD_NEON
284
266
285
- static inline unsigned char neon_mask_next_match (search_state * search ) {
267
+ static inline unsigned char neon_next_match (search_state * search ) {
286
268
uint64_t mask = search -> matches_mask ;
287
269
if (mask > 0 ) {
288
- uint32_t index = trailing_zeros (mask ) >> 2 ;
270
+ uint32_t index = trailing_zeros64 (mask ) >> 2 ;
289
271
290
272
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
291
273
// If we want to use a similar approach for full escaping we'll need to ensure:
@@ -330,7 +312,7 @@ static inline unsigned char search_escape_basic_neon_advance_lut(search_state *s
330
312
search -> matches_mask = neon_match_mask (vceqq_u8 (result , vdupq_n_u8 (9 )));
331
313
search -> has_matches = 1 ;
332
314
search -> chunk_base = search -> ptr ;
333
- return neon_mask_next_match (search );
315
+ return neon_next_match (search );
334
316
}
335
317
336
318
// There are fewer than 16 bytes left.
@@ -436,7 +418,7 @@ static unsigned char search_escape_basic_neon_advance_rules(search_state *search
436
418
search -> matches_mask = neon_match_mask (needs_escape );
437
419
search -> has_matches = 1 ;
438
420
search -> chunk_base = search -> ptr ;
439
- return neon_mask_next_match (search );
421
+ return neon_next_match (search );
440
422
}
441
423
442
424
// There are fewer than 16 bytes left.
@@ -477,11 +459,11 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
477
459
if (RB_UNLIKELY (search -> has_matches )) {
478
460
// There are more matches if search->matches_mask > 0.
479
461
if (search -> matches_mask > 0 ) {
480
- if (RB_LIKELY (neon_mask_next_match (search ))) {
462
+ if (RB_LIKELY (neon_next_match (search ))) {
481
463
return 1 ;
482
464
}
483
465
} else {
484
- // neon_mask_next_match will only advance search->ptr up to the last matching character.
466
+ // neon_next_match will only advance search->ptr up to the last matching character.
485
467
// Skip over any characters in the last chunk that occur after the last match.
486
468
search -> has_matches = 0 ;
487
469
search -> ptr = search -> chunk_base + sizeof (uint8x16_t );
@@ -512,6 +494,26 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
512
494
// #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
513
495
// #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
514
496
497
+ static inline unsigned char sse2_next_match (search_state * search ) {
498
+ int mask = search -> matches_mask ;
499
+ if (mask > 0 ) {
500
+ int index = trailing_zeros (mask );
501
+
502
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
503
+ // If we want to use a similar approach for full escaping we'll need to ensure:
504
+ // search->chunk_base + index >= search->ptr
505
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
506
+ // is one byte after the previous match then:
507
+ // search->chunk_base + index == search->ptr
508
+ search -> ptr = search -> chunk_base + index ;
509
+ mask &= mask - 1 ;
510
+ search -> matches_mask = mask ;
511
+ search_flush (search );
512
+ return 1 ;
513
+ }
514
+ return 0 ;
515
+ }
516
+
515
517
#ifdef __GNUC__
516
518
#pragma GCC push_options
517
519
#pragma GCC target ("sse2")
@@ -545,11 +547,17 @@ static inline __m128i sse2_update(__m128i chunk) {
545
547
__attribute__((target ("sse2" )))
546
548
#endif /* __clang__ */
547
549
static unsigned char search_escape_basic_sse2 (search_state * search ) {
548
- if (RB_UNLIKELY (search -> returned_from != NULL )) {
549
- search -> current_match_index += (search -> ptr - search -> returned_from );
550
- search -> returned_from = NULL ;
551
- if (RB_UNLIKELY (search_escape_basic_simd_next_match (search ))) {
552
- return 1 ;
550
+ if (RB_UNLIKELY (search -> has_matches )) {
551
+ // There are more matches if search->matches_mask > 0.
552
+ if (search -> matches_mask > 0 ) {
553
+ if (RB_LIKELY (sse2_next_match (search ))) {
554
+ return 1 ;
555
+ }
556
+ } else {
557
+ // sse2_next_match will only advance search->ptr up to the last matching character.
558
+ // Skip over any characters in the last chunk that occur after the last match.
559
+ search -> has_matches = 0 ;
560
+ search -> ptr = search -> chunk_base + sizeof (__m128i );
553
561
}
554
562
}
555
563
@@ -564,12 +572,10 @@ static unsigned char search_escape_basic_sse2(search_state *search) {
564
572
continue ;
565
573
}
566
574
567
- // It doesn't matter the value of each byte in 'maybe_matches' as long as a match is non-zero.
568
- _mm_storeu_si128 ((__m128i * )search -> maybe_matches , needs_escape );
569
-
570
- search -> current_match_index = 0 ;
571
- search -> maybe_match_length = sizeof (__m128i );
572
- return search_escape_basic_simd_next_match (search );
575
+ search -> has_matches = 1 ;
576
+ search -> matches_mask = needs_escape_mask ;
577
+ search -> chunk_base = search -> ptr ;
578
+ return sse2_next_match (search );
573
579
}
574
580
575
581
// There are fewer than 16 bytes left.
@@ -1368,12 +1374,9 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
1368
1374
search .end = search .ptr + len ;
1369
1375
1370
1376
#ifdef ENABLE_SIMD
1371
- search .current_match_index = 0 ;
1372
- search .returned_from = NULL ;
1373
- #ifdef HAVE_NEON_SIMD
1374
1377
search .matches_mask = 0 ;
1375
1378
search .has_matches = 0 ;
1376
- #endif /* HAVE_NEON_SIMD */
1379
+ search . chunk_base = NULL ;
1377
1380
#endif /* ENABLE_SIMD */
1378
1381
1379
1382
switch (rb_enc_str_coderange (obj )) {
0 commit comments