@@ -249,6 +249,7 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
249
249
#ifdef ENABLE_SIMD
250
250
251
251
#ifdef HAVE_SIMD_NEON
252
+ #ifdef USE_NEON_LUT
252
253
struct _simd_state {
253
254
254
255
struct {
@@ -257,6 +258,7 @@ struct _simd_state {
257
258
};
258
259
259
260
static struct _simd_state simd_state ;
261
+ #endif /* USE_NEON_LUT */
260
262
#endif /* HAVE_SIMD_NEON */
261
263
#endif /* ENABLE_SIMD */
262
264
@@ -307,6 +309,7 @@ static inline uint64_t neon_match_mask(uint8x16_t matches) {
307
309
return mask & 0x8888888888888888ull ;
308
310
}
309
311
312
+ #ifdef USE_NEON_LUT
310
313
static inline uint8x16_t neon_lut_update (uint8x16_t chunk ) {
311
314
uint8x16_t tmp1 = vqtbl4q_u8 (simd_state .neon .escape_table_basic [0 ], chunk );
312
315
uint8x16_t tmp2 = vqtbl4q_u8 (simd_state .neon .escape_table_basic [1 ], veorq_u8 (chunk , vdupq_n_u8 (0x40 )));
@@ -362,6 +365,8 @@ static inline unsigned char search_escape_basic_neon_advance_lut(search_state *s
362
365
return 0 ;
363
366
}
364
367
368
+ #else
369
+
365
370
static inline uint8x16_t neon_rules_update (uint8x16_t chunk ) {
366
371
const uint8x16_t lower_bound = vdupq_n_u8 (' ' );
367
372
const uint8x16_t backslash = vdupq_n_u8 ('\\' );
@@ -465,6 +470,7 @@ static unsigned char search_escape_basic_neon_advance_rules(search_state *search
465
470
466
471
return 0 ;
467
472
}
473
+ #endif /* USE_NEON_LUT */
468
474
469
475
static inline unsigned char search_escape_basic_neon (search_state * search )
470
476
{
@@ -481,18 +487,15 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
481
487
search -> ptr = search -> chunk_base + sizeof (uint8x16_t );
482
488
}
483
489
}
484
-
485
- // TODO Pick an implementation or make them configurable. Right now it looks like the "rules" based approach
486
- // might be a bit faster.
487
-
488
- // if (search_escape_basic_neon_advance_lut(search)) {
489
- // return 1;
490
- // }
491
-
490
+ #ifdef USE_NEON_LUT
491
+ if (search_escape_basic_neon_advance_lut (search )) {
492
+ return 1 ;
493
+ }
494
+ #else
492
495
if (search_escape_basic_neon_advance_rules (search )) {
493
496
return 1 ;
494
497
}
495
-
498
+ #endif /* USE_NEON_LUT */
496
499
if (search -> ptr < search -> end ) {
497
500
return search_escape_basic (search );
498
501
}
@@ -1535,10 +1538,12 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
1535
1538
#ifdef ENABLE_SIMD
1536
1539
1537
1540
#ifdef HAVE_SIMD_NEON
1541
+ #ifdef USE_NEON_LUT
1538
1542
static void initialize_simd_neon (void ) {
1539
1543
simd_state .neon .escape_table_basic [0 ] = load_uint8x16_4 (escape_table_basic );
1540
1544
simd_state .neon .escape_table_basic [1 ] = load_uint8x16_4 (escape_table_basic + 64 );
1541
1545
}
1546
+ #endif /* USE_NEON_LUT */
1542
1547
#endif /* HAVE_NEON_SIMD */
1543
1548
1544
1549
#endif
@@ -2206,7 +2211,9 @@ void Init_generator(void)
2206
2211
#ifdef HAVE_SIMD_NEON
2207
2212
case SIMD_NEON :
2208
2213
/* Initialize ARM Neon SIMD Implementation. */
2214
+ #ifdef USE_NEON_LUT
2209
2215
initialize_simd_neon ();
2216
+ #endif /* USE_NEON_LUT */
2210
2217
search_escape_basic_impl = search_escape_basic_neon ;
2211
2218
break ;
2212
2219
#endif /* HAVE_SIMD_NEON */
0 commit comments