Skip to content

Commit 92410ec

Browse files
riptlripatel-fd
authored andcommitted
Support fd_stem without x86 SSE
- Make fd_frag_meta_seq_sig_query atomic (replace _mm_load_si128 with a volatile load) - Add a seq_sig_query fallback for architectures without 128-bit atomic loads
1 parent bc4ef87 commit 92410ec

File tree

2 files changed

+11
-18
lines changed

2 files changed

+11
-18
lines changed

src/disco/stem/fd_stem.c

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,6 @@
133133
This callback is not called when an overrun is detected in
134134
during_frag. */
135135

136-
#if !FD_HAS_SSE
137-
#error "fd_stem requires SSE"
138-
#endif
139-
140136
#if !FD_HAS_ALLOCA
141137
#error "fd_stem requires alloca"
142138
#endif
@@ -533,20 +529,18 @@ STEM_(run1)( ulong in_cnt,
533529
ulong this_in_seq = this_in->seq;
534530
fd_frag_meta_t const * this_in_mline = this_in->mline; /* Already at appropriate line for this_in_seq */
535531

532+
#if FD_HAS_SSE
536533
__m128i seq_sig = fd_frag_meta_seq_sig_query( this_in_mline );
537-
#if FD_USING_CLANG
538-
/* TODO: Clang optimizes extremely aggressively which breaks the
539-
atomicity expected by seq_sig_query. In particular, it replaces
540-
the sequence query with a second load (immediately following
541-
vector load). The signature query a few lines down is still an
542-
extract from the vector which then means that effectively the
543-
signature is loaded before the sequence number.
544-
Adding this clobbers of the vector prevents this optimization by
545-
forcing the seq query to be an extract, but we probably want a
546-
better long term solution. */
547-
__asm__( "" : "+x"(seq_sig) );
548-
#endif
549534
ulong seq_found = fd_frag_meta_sse0_seq( seq_sig );
535+
ulong sig = fd_frag_meta_sse0_sig( seq_sig );
536+
#else
537+
/* Without SSE, seq and sig might be read from different frags (due
538+
to overrun), which results in a before_frag and during_frag being
539+
issued with incorrect arguments, but not after_frag. */
540+
ulong seq_found = FD_VOLATILE_CONST( this_in_mline->seq );
541+
ulong sig = FD_VOLATILE_CONST( this_in_mline->sig );
542+
#endif
543+
(void)sig;
550544

551545
long diff = fd_seq_diff( this_in_seq, seq_found );
552546
if( FD_UNLIKELY( diff ) ) { /* Caught up or overrun, optimize for new frag case */
@@ -575,7 +569,6 @@ STEM_(run1)( ulong in_cnt,
575569
continue;
576570
}
577571

578-
ulong sig = fd_frag_meta_sse0_sig( seq_sig ); (void)sig;
579572
#ifdef STEM_CALLBACK_BEFORE_FRAG
580573
int filter = STEM_CALLBACK_BEFORE_FRAG( ctx, (ulong)this_in->idx, seq_found, sig );
581574
if( FD_UNLIKELY( filter<0 ) ) {

src/tango/fd_tango_base.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ fd_frag_meta_seq_query( fd_frag_meta_t const * meta ) { /* Assumed non-NULL */
283283
static inline __m128i
284284
fd_frag_meta_seq_sig_query( fd_frag_meta_t const * meta ) { /* Assumed non-NULL */
285285
FD_COMPILER_MFENCE();
286-
__m128i sse0 = _mm_load_si128( &meta->sse0 );
286+
__m128i sse0 = FD_VOLATILE_CONST( meta->sse0 );
287287
FD_COMPILER_MFENCE();
288288
return sse0;
289289
}

0 commit comments

Comments
 (0)