@@ -112,17 +112,16 @@ typedef struct _search_state {
112112 FBuffer * buffer ;
113113
114114#ifdef ENABLE_SIMD
115- const char * returned_from ;
116- unsigned char maybe_matches [ 16 ] ;
115+ const char * chunk_base ;
116+ uint8_t has_matches ;
117117
118118#ifdef HAVE_SIMD_NEON
119119 uint64_t matches_mask ;
120- const char * chunk_base ;
121- uint8_t has_matches ;
120+ #elif HAVE_SIMD_SSE2
121+ uint16_t matches_mask ;
122+ #else
123+ #error "Unknown SIMD Implementation."
122124#endif /* HAVE_SIMD_NEON */
123-
124- unsigned long current_match_index ;
125- unsigned long maybe_match_length ;
126125#endif /* ENABLE_SIMD */
127126} search_state ;
128127
@@ -263,29 +262,12 @@ static struct _simd_state simd_state;
263262#endif /* ENABLE_SIMD */
264263
265264#ifdef ENABLE_SIMD
266-
267- static inline unsigned char search_escape_basic_simd_next_match (search_state * search ) {
268- for (; search -> current_match_index < search -> maybe_match_length && search -> ptr < search -> end ; ) {
269- unsigned char ch_len = search -> maybe_matches [search -> current_match_index ];
270-
271- if (RB_UNLIKELY (ch_len )) {
272- search -> returned_from = search -> ptr ;
273- search_flush (search );
274- return 1 ;
275- } else {
276- search -> ptr ++ ;
277- search -> current_match_index ++ ;
278- }
279- }
280- return 0 ;
281- }
282-
283265#ifdef HAVE_SIMD_NEON
284266
285- static inline unsigned char neon_mask_next_match (search_state * search ) {
267+ static inline unsigned char neon_next_match (search_state * search ) {
286268 uint64_t mask = search -> matches_mask ;
287269 if (mask > 0 ) {
288- uint32_t index = trailing_zeros (mask ) >> 2 ;
270+ uint32_t index = trailing_zeros64 (mask ) >> 2 ;
289271
290272 // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
291273 // If we want to use a similar approach for full escaping we'll need to ensure:
@@ -330,7 +312,7 @@ static inline unsigned char search_escape_basic_neon_advance_lut(search_state *s
330312 search -> matches_mask = neon_match_mask (vceqq_u8 (result , vdupq_n_u8 (9 )));
331313 search -> has_matches = 1 ;
332314 search -> chunk_base = search -> ptr ;
333- return neon_mask_next_match (search );
315+ return neon_next_match (search );
334316 }
335317
336318 // There are fewer than 16 bytes left.
@@ -436,7 +418,7 @@ static unsigned char search_escape_basic_neon_advance_rules(search_state *search
436418 search -> matches_mask = neon_match_mask (needs_escape );
437419 search -> has_matches = 1 ;
438420 search -> chunk_base = search -> ptr ;
439- return neon_mask_next_match (search );
421+ return neon_next_match (search );
440422 }
441423
442424 // There are fewer than 16 bytes left.
@@ -477,11 +459,11 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
477459 if (RB_UNLIKELY (search -> has_matches )) {
478460 // There are more matches if search->matches_mask > 0.
479461 if (search -> matches_mask > 0 ) {
480- if (RB_LIKELY (neon_mask_next_match (search ))) {
462+ if (RB_LIKELY (neon_next_match (search ))) {
481463 return 1 ;
482464 }
483465 } else {
484- // neon_mask_next_match will only advance search->ptr up to the last matching character.
466+ // neon_next_match will only advance search->ptr up to the last matching character.
485467 // Skip over any characters in the last chunk that occur after the last match.
486468 search -> has_matches = 0 ;
487469 search -> ptr = search -> chunk_base + sizeof (uint8x16_t );
@@ -512,6 +494,26 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
512494// #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
513495// #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
514496
497+ static inline unsigned char sse2_next_match (search_state * search ) {
498+ int mask = search -> matches_mask ;
499+ if (mask > 0 ) {
500+ int index = trailing_zeros (mask );
501+
502+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
503+ // If we want to use a similar approach for full escaping we'll need to ensure:
504+ // search->chunk_base + index >= search->ptr
505+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
506+ // is one byte after the previous match then:
507+ // search->chunk_base + index == search->ptr
508+ search -> ptr = search -> chunk_base + index ;
509+ mask &= mask - 1 ;
510+ search -> matches_mask = mask ;
511+ search_flush (search );
512+ return 1 ;
513+ }
514+ return 0 ;
515+ }
516+
515517#ifdef __GNUC__
516518#pragma GCC push_options
517519#pragma GCC target ("sse2")
@@ -545,11 +547,17 @@ static inline __m128i sse2_update(__m128i chunk) {
545547__attribute__((target ("sse2" )))
546548#endif /* __clang__ */
547549static unsigned char search_escape_basic_sse2 (search_state * search ) {
548- if (RB_UNLIKELY (search -> returned_from != NULL )) {
549- search -> current_match_index += (search -> ptr - search -> returned_from );
550- search -> returned_from = NULL ;
551- if (RB_UNLIKELY (search_escape_basic_simd_next_match (search ))) {
552- return 1 ;
550+ if (RB_UNLIKELY (search -> has_matches )) {
551+ // There are more matches if search->matches_mask > 0.
552+ if (search -> matches_mask > 0 ) {
553+ if (RB_LIKELY (sse2_next_match (search ))) {
554+ return 1 ;
555+ }
556+ } else {
557+ // sse2_next_match will only advance search->ptr up to the last matching character.
558+ // Skip over any characters in the last chunk that occur after the last match.
559+ search -> has_matches = 0 ;
560+ search -> ptr = search -> chunk_base + sizeof (__m128i );
553561 }
554562 }
555563
@@ -564,12 +572,10 @@ static unsigned char search_escape_basic_sse2(search_state *search) {
564572 continue ;
565573 }
566574
567- // It doesn't matter the value of each byte in 'maybe_matches' as long as a match is non-zero.
568- _mm_storeu_si128 ((__m128i * )search -> maybe_matches , needs_escape );
569-
570- search -> current_match_index = 0 ;
571- search -> maybe_match_length = sizeof (__m128i );
572- return search_escape_basic_simd_next_match (search );
575+ search -> has_matches = 1 ;
576+ search -> matches_mask = needs_escape_mask ;
577+ search -> chunk_base = search -> ptr ;
578+ return sse2_next_match (search );
573579 }
574580
575581 // There are fewer than 16 bytes left.
@@ -1368,12 +1374,9 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
13681374 search .end = search .ptr + len ;
13691375
13701376#ifdef ENABLE_SIMD
1371- search .current_match_index = 0 ;
1372- search .returned_from = NULL ;
1373- #ifdef HAVE_NEON_SIMD
13741377 search .matches_mask = 0 ;
13751378 search .has_matches = 0 ;
1376- #endif /* HAVE_NEON_SIMD */
1379+ search . chunk_base = NULL ;
13771380#endif /* ENABLE_SIMD */
13781381
13791382 switch (rb_enc_str_coderange (obj )) {
0 commit comments