Skip to content

Commit 045115a

Browse files
committed
Make the Neon implementation configurable based on a build parameter.
1 parent 4759254 commit 045115a

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

ext/json/ext/generator/extconf.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
}
2020
SRC
2121
$defs.push("-DENABLE_SIMD")
22+
23+
if enable_config('generator-use-neon-lut', default=false)
24+
$defs.push('-DUSE_NEON_LUT')
25+
end
2226
end
2327
end
2428

ext/json/ext/generator/generator.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
249249
#ifdef ENABLE_SIMD
250250

251251
#ifdef HAVE_SIMD_NEON
252+
#ifdef USE_NEON_LUT
252253
struct _simd_state {
253254

254255
struct {
@@ -257,6 +258,7 @@ struct _simd_state {
257258
};
258259

259260
static struct _simd_state simd_state;
261+
#endif /* USE_NEON_LUT */
260262
#endif /* HAVE_SIMD_NEON */
261263
#endif /* ENABLE_SIMD */
262264

@@ -307,6 +309,7 @@ static inline uint64_t neon_match_mask(uint8x16_t matches) {
307309
return mask & 0x8888888888888888ull;
308310
}
309311

312+
#ifdef USE_NEON_LUT
310313
static inline uint8x16_t neon_lut_update(uint8x16_t chunk) {
311314
uint8x16_t tmp1 = vqtbl4q_u8(simd_state.neon.escape_table_basic[0], chunk);
312315
uint8x16_t tmp2 = vqtbl4q_u8(simd_state.neon.escape_table_basic[1], veorq_u8(chunk, vdupq_n_u8(0x40)));
@@ -362,6 +365,8 @@ static inline unsigned char search_escape_basic_neon_advance_lut(search_state *s
362365
return 0;
363366
}
364367

368+
#else
369+
365370
static inline uint8x16_t neon_rules_update(uint8x16_t chunk) {
366371
const uint8x16_t lower_bound = vdupq_n_u8(' ');
367372
const uint8x16_t backslash = vdupq_n_u8('\\');
@@ -465,6 +470,7 @@ static unsigned char search_escape_basic_neon_advance_rules(search_state *search
465470

466471
return 0;
467472
}
473+
#endif /* USE_NEON_LUT */
468474

469475
static inline unsigned char search_escape_basic_neon(search_state *search)
470476
{
@@ -481,18 +487,15 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
481487
search->ptr = search->chunk_base+sizeof(uint8x16_t);
482488
}
483489
}
484-
485-
// TODO Pick an implementation or make them configurable. Right now it looks like the "rules" based approach
486-
// might be a bit faster.
487-
488-
// if (search_escape_basic_neon_advance_lut(search)) {
489-
// return 1;
490-
// }
491-
490+
#ifdef USE_NEON_LUT
491+
if (search_escape_basic_neon_advance_lut(search)) {
492+
return 1;
493+
}
494+
#else
492495
if (search_escape_basic_neon_advance_rules(search)) {
493496
return 1;
494497
}
495-
498+
#endif /* USE_NEON_LUT */
496499
if (search->ptr < search->end) {
497500
return search_escape_basic(search);
498501
}
@@ -1535,10 +1538,12 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
15351538
#ifdef ENABLE_SIMD
15361539

15371540
#ifdef HAVE_SIMD_NEON
1541+
#ifdef USE_NEON_LUT
15381542
static void initialize_simd_neon(void) {
15391543
simd_state.neon.escape_table_basic[0] = load_uint8x16_4(escape_table_basic);
15401544
simd_state.neon.escape_table_basic[1] = load_uint8x16_4(escape_table_basic+64);
15411545
}
1546+
#endif /* USE_NEON_LUT */
15421547
#endif /* HAVE_NEON_SIMD */
15431548

15441549
#endif
@@ -2206,7 +2211,9 @@ void Init_generator(void)
22062211
#ifdef HAVE_SIMD_NEON
22072212
case SIMD_NEON:
22082213
/* Initialize ARM Neon SIMD Implementation. */
2214+
#ifdef USE_NEON_LUT
22092215
initialize_simd_neon();
2216+
#endif /* USE_NEON_LUT */
22102217
search_escape_basic_impl = search_escape_basic_neon;
22112218
break;
22122219
#endif /* HAVE_SIMD_NEON */

0 commit comments

Comments
 (0)