|
133 | 133 | This callback is not called when an overrun is detected in
|
134 | 134 | during_frag. */
|
135 | 135 |
|
136 |
| -#if !FD_HAS_SSE |
137 |
| -#error "fd_stem requires SSE" |
138 |
| -#endif |
139 |
| - |
140 | 136 | #if !FD_HAS_ALLOCA
|
141 | 137 | #error "fd_stem requires alloca"
|
142 | 138 | #endif
|
@@ -533,20 +529,18 @@ STEM_(run1)( ulong in_cnt,
|
533 | 529 | ulong this_in_seq = this_in->seq;
|
534 | 530 | fd_frag_meta_t const * this_in_mline = this_in->mline; /* Already at appropriate line for this_in_seq */
|
535 | 531 |
|
| 532 | +#if FD_HAS_SSE |
536 | 533 | __m128i seq_sig = fd_frag_meta_seq_sig_query( this_in_mline );
|
537 |
| - #if FD_USING_CLANG |
538 |
| - /* TODO: Clang optimizes extremely aggressively which breaks the |
539 |
| - atomicity expected by seq_sig_query. In particular, it replaces |
540 |
| - the sequence query with a second load (immediately following |
541 |
| - vector load). The signature query a few lines down is still an |
542 |
| - extract from the vector which then means that effectively the |
543 |
| - signature is loaded before the sequence number. |
544 |
| - Adding this clobbers of the vector prevents this optimization by |
545 |
| - forcing the seq query to be an extract, but we probably want a |
546 |
| - better long term solution. */ |
547 |
| - __asm__( "" : "+x"(seq_sig) ); |
548 |
| - #endif |
549 | 534 | ulong seq_found = fd_frag_meta_sse0_seq( seq_sig );
|
| 535 | + ulong sig = fd_frag_meta_sse0_sig( seq_sig ); |
| 536 | +#else |
| 537 | + /* Without SSE, seq and sig might be read from different frags (due |
| 538 | + to overrun), which results in a before_frag and during_frag being |
| 539 | + issued with incorrect arguments, but not after_frag. */ |
| 540 | + ulong seq_found = FD_VOLATILE_CONST( this_in_mline->seq ); |
| 541 | + ulong sig = FD_VOLATILE_CONST( this_in_mline->sig ); |
| 542 | +#endif |
| 543 | + (void)sig; |
550 | 544 |
|
551 | 545 | long diff = fd_seq_diff( this_in_seq, seq_found );
|
552 | 546 | if( FD_UNLIKELY( diff ) ) { /* Caught up or overrun, optimize for new frag case */
|
@@ -575,7 +569,6 @@ STEM_(run1)( ulong in_cnt,
|
575 | 569 | continue;
|
576 | 570 | }
|
577 | 571 |
|
578 |
| - ulong sig = fd_frag_meta_sse0_sig( seq_sig ); (void)sig; |
579 | 572 | #ifdef STEM_CALLBACK_BEFORE_FRAG
|
580 | 573 | int filter = STEM_CALLBACK_BEFORE_FRAG( ctx, (ulong)this_in->idx, seq_found, sig );
|
581 | 574 | if( FD_UNLIKELY( filter<0 ) ) {
|
|
0 commit comments