Skip to content

Commit d4f5bf7

Browse files
committed
Use a maches mask to determine the location of the maching characters in the SSE2 implementation.
1 parent 13b2c4f commit d4f5bf7

File tree

2 files changed

+63
-46
lines changed

2 files changed

+63
-46
lines changed

ext/json/ext/generator/generator.c

Lines changed: 48 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -112,17 +112,16 @@ typedef struct _search_state {
112112
FBuffer *buffer;
113113

114114
#ifdef ENABLE_SIMD
115-
const char *returned_from;
116-
unsigned char maybe_matches[16];
115+
const char *chunk_base;
116+
uint8_t has_matches;
117117

118118
#ifdef HAVE_SIMD_NEON
119119
uint64_t matches_mask;
120-
const char *chunk_base;
121-
uint8_t has_matches;
120+
#elif HAVE_SIMD_SSE2
121+
uint16_t matches_mask;
122+
#else
123+
#error "Unknown SIMD Implementation."
122124
#endif /* HAVE_SIMD_NEON */
123-
124-
unsigned long current_match_index;
125-
unsigned long maybe_match_length;
126125
#endif /* ENABLE_SIMD */
127126
} search_state;
128127

@@ -263,29 +262,12 @@ static struct _simd_state simd_state;
263262
#endif /* ENABLE_SIMD */
264263

265264
#ifdef ENABLE_SIMD
266-
267-
static inline unsigned char search_escape_basic_simd_next_match(search_state *search) {
268-
for(; search->current_match_index < search->maybe_match_length && search->ptr < search->end; ) {
269-
unsigned char ch_len = search->maybe_matches[search->current_match_index];
270-
271-
if (RB_UNLIKELY(ch_len)) {
272-
search->returned_from = search->ptr;
273-
search_flush(search);
274-
return 1;
275-
} else {
276-
search->ptr++;
277-
search->current_match_index++;
278-
}
279-
}
280-
return 0;
281-
}
282-
283265
#ifdef HAVE_SIMD_NEON
284266

285-
static inline unsigned char neon_mask_next_match(search_state *search) {
267+
static inline unsigned char neon_next_match(search_state *search) {
286268
uint64_t mask = search->matches_mask;
287269
if (mask > 0) {
288-
uint32_t index = trailing_zeros(mask) >> 2;
270+
uint32_t index = trailing_zeros64(mask) >> 2;
289271

290272
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
291273
// If we want to use a similar approach for full escaping we'll need to ensure:
@@ -330,7 +312,7 @@ static inline unsigned char search_escape_basic_neon_advance_lut(search_state *s
330312
search->matches_mask = neon_match_mask(vceqq_u8(result, vdupq_n_u8(9)));
331313
search->has_matches = 1;
332314
search->chunk_base = search->ptr;
333-
return neon_mask_next_match(search);
315+
return neon_next_match(search);
334316
}
335317

336318
// There are fewer than 16 bytes left.
@@ -436,7 +418,7 @@ static unsigned char search_escape_basic_neon_advance_rules(search_state *search
436418
search->matches_mask = neon_match_mask(needs_escape);
437419
search->has_matches = 1;
438420
search->chunk_base = search->ptr;
439-
return neon_mask_next_match(search);
421+
return neon_next_match(search);
440422
}
441423

442424
// There are fewer than 16 bytes left.
@@ -477,11 +459,11 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
477459
if (RB_UNLIKELY(search->has_matches)) {
478460
// There are more matches if search->matches_mask > 0.
479461
if (search->matches_mask > 0) {
480-
if (RB_LIKELY(neon_mask_next_match(search))) {
462+
if (RB_LIKELY(neon_next_match(search))) {
481463
return 1;
482464
}
483465
} else {
484-
// neon_mask_next_match will only advance search->ptr up to the last matching character.
466+
// neon_next_match will only advance search->ptr up to the last matching character.
485467
// Skip over any characters in the last chunk that occur after the last match.
486468
search->has_matches = 0;
487469
search->ptr = search->chunk_base+sizeof(uint8x16_t);
@@ -512,6 +494,26 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
512494
// #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
513495
// #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
514496

497+
static inline unsigned char sse2_next_match(search_state *search) {
498+
int mask = search->matches_mask;
499+
if (mask > 0) {
500+
int index = trailing_zeros(mask);
501+
502+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
503+
// If we want to use a similar approach for full escaping we'll need to ensure:
504+
// search->chunk_base + index >= search->ptr
505+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
506+
// is one byte after the previous match then:
507+
// search->chunk_base + index == search->ptr
508+
search->ptr = search->chunk_base + index;
509+
mask &= mask - 1;
510+
search->matches_mask = mask;
511+
search_flush(search);
512+
return 1;
513+
}
514+
return 0;
515+
}
516+
515517
#ifdef __GNUC__
516518
#pragma GCC push_options
517519
#pragma GCC target ("sse2")
@@ -545,11 +547,17 @@ static inline __m128i sse2_update(__m128i chunk) {
545547
__attribute__((target("sse2")))
546548
#endif /* __clang__ */
547549
static unsigned char search_escape_basic_sse2(search_state *search) {
548-
if (RB_UNLIKELY(search->returned_from != NULL)) {
549-
search->current_match_index += (search->ptr - search->returned_from);
550-
search->returned_from = NULL;
551-
if (RB_UNLIKELY(search_escape_basic_simd_next_match(search))) {
552-
return 1;
550+
if (RB_UNLIKELY(search->has_matches)) {
551+
// There are more matches if search->matches_mask > 0.
552+
if (search->matches_mask > 0) {
553+
if (RB_LIKELY(sse2_next_match(search))) {
554+
return 1;
555+
}
556+
} else {
557+
// sse2_next_match will only advance search->ptr up to the last matching character.
558+
// Skip over any characters in the last chunk that occur after the last match.
559+
search->has_matches = 0;
560+
search->ptr = search->chunk_base+sizeof(__m128i);
553561
}
554562
}
555563

@@ -564,12 +572,10 @@ static unsigned char search_escape_basic_sse2(search_state *search) {
564572
continue;
565573
}
566574

567-
// It doesn't matter the value of each byte in 'maybe_matches' as long as a match is non-zero.
568-
_mm_storeu_si128((__m128i *)search->maybe_matches, needs_escape);
569-
570-
search->current_match_index = 0;
571-
search->maybe_match_length = sizeof(__m128i);
572-
return search_escape_basic_simd_next_match(search);
575+
search->has_matches = 1;
576+
search->matches_mask = needs_escape_mask;
577+
search->chunk_base = search->ptr;
578+
return sse2_next_match(search);
573579
}
574580

575581
// There are fewer than 16 bytes left.
@@ -1368,12 +1374,9 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
13681374
search.end = search.ptr + len;
13691375

13701376
#ifdef ENABLE_SIMD
1371-
search.current_match_index = 0;
1372-
search.returned_from = NULL;
1373-
#ifdef HAVE_NEON_SIMD
13741377
search.matches_mask = 0;
13751378
search.has_matches = 0;
1376-
#endif /* HAVE_NEON_SIMD */
1379+
search.chunk_base = NULL;
13771380
#endif /* ENABLE_SIMD */
13781381

13791382
switch(rb_enc_str_coderange(obj)) {

ext/json/ext/generator/simd.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ typedef enum {
2020
#define HAVE_BUILTIN_CTZLL 0
2121
#endif
2222

23-
static inline uint32_t trailing_zeros(uint64_t input) {
23+
static inline uint32_t trailing_zeros64(uint64_t input) {
2424
#if HAVE_BUILTIN_CTZLL
2525
return __builtin_ctzll(input);
2626
#else
@@ -34,6 +34,20 @@ static inline uint32_t trailing_zeros(uint64_t input) {
3434
#endif
3535
}
3636

37+
static inline int trailing_zeros(int input) {
38+
#if HAVE_BUILTIN_CTZLL
39+
return __builtin_ctz(input);
40+
#else
41+
int trailing_zeros = 0;
42+
int temp = input;
43+
while ((temp & 1) == 0 && temp > 0) {
44+
trailing_zeros++;
45+
temp >>= 1;
46+
}
47+
return trailing_zeros;
48+
#endif
49+
}
50+
3751
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
3852
#include <arm_neon.h>
3953

0 commit comments

Comments
 (0)