Skip to content

Commit 85734e7

Browse files
committed
Simplify the SIMD interface further
1 parent e50b5df commit 85734e7

File tree

1 file changed

+11
-17
lines changed

1 file changed

+11
-17
lines changed

ext/json/ext/generator/generator.c

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
316316
return mask & 0x8888888888888888ull;
317317
}
318318

319-
static inline FORCE_INLINE uint8x16_t neon_rules_update(const char *ptr)
319+
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
320320
{
321321
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
322322

@@ -329,7 +329,7 @@ static inline FORCE_INLINE uint8x16_t neon_rules_update(const char *ptr)
329329
uint8x16_t has_dblquote = vceqq_u8(chunk, dblquote);
330330
uint8x16_t needs_escape = vorrq_u8(too_low, vorrq_u8(has_backslash, has_dblquote));
331331

332-
return needs_escape;
332+
return neon_match_mask(needs_escape);
333333
}
334334

335335
static inline unsigned char search_escape_basic_neon(search_state *search)
@@ -387,8 +387,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
387387
* have at least one byte that needs to be escaped.
388388
*/
389389
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
390-
uint8x16_t needs_escape = neon_rules_update(search->ptr);
391-
uint64_t mask = neon_match_mask(needs_escape);
390+
uint64_t mask = neon_rules_update(search->ptr);
392391

393392
if (!mask) {
394393
search->ptr += sizeof(uint8x16_t);
@@ -406,8 +405,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
406405
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
407406
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
408407

409-
uint8x16_t needs_escape = neon_rules_update(s);
410-
uint64_t mask = neon_match_mask(needs_escape);
408+
uint64_t mask = neon_rules_update(s);
411409

412410
if (!mask) {
413411
// Nothing to escape, ensure search_flush doesn't do anything by setting
@@ -418,7 +416,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
418416
return 0;
419417
}
420418

421-
search->matches_mask = neon_match_mask(needs_escape);
419+
search->matches_mask = mask;
422420
search->has_matches = true;
423421
search->chunk_end = search->end;
424422
search->chunk_base = search->ptr;
@@ -465,8 +463,10 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
465463
#define TARGET_SSE2
466464
#endif
467465

468-
static inline TARGET_SSE2 FORCE_INLINE __m128i sse2_update(__m128i chunk)
466+
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
469467
{
468+
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
469+
470470
const __m128i lower_bound = _mm_set1_epi8(' ');
471471
const __m128i backslash = _mm_set1_epi8('\\');
472472
const __m128i dblquote = _mm_set1_epi8('\"');
@@ -475,7 +475,7 @@ static inline TARGET_SSE2 FORCE_INLINE __m128i sse2_update(__m128i chunk)
475475
__m128i has_backslash = _mm_cmpeq_epi8(chunk, backslash);
476476
__m128i has_dblquote = _mm_cmpeq_epi8(chunk, dblquote);
477477
__m128i needs_escape = _mm_or_si128(too_low, _mm_or_si128(has_backslash, has_dblquote));
478-
return needs_escape;
478+
return _mm_movemask_epi8(needs_escape);
479479
}
480480

481481
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
@@ -497,10 +497,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
497497
}
498498

499499
while (search->ptr + sizeof(__m128i) <= search->end) {
500-
__m128i chunk = _mm_loadu_si128((__m128i const*)search->ptr);
501-
__m128i needs_escape = sse2_update(chunk);
502-
503-
int needs_escape_mask = _mm_movemask_epi8(needs_escape);
500+
int needs_escape_mask = sse2_update(search->ptr);
504501

505502
if (needs_escape_mask == 0) {
506503
search->ptr += sizeof(__m128i);
@@ -518,10 +515,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
518515
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
519516
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
520517

521-
__m128i chunk = _mm_loadu_si128((__m128i const *) s);
522-
__m128i needs_escape = sse2_update(chunk);
523-
524-
int needs_escape_mask = _mm_movemask_epi8(needs_escape);
518+
int needs_escape_mask = sse2_update(s);
525519

526520
if (needs_escape_mask == 0) {
527521
// Nothing to escape, ensure search_flush doesn't do anything by setting

0 commit comments

Comments
 (0)