From d4f11891c8d92bc8895e4480740eda2abb86faeb Mon Sep 17 00:00:00 2001 From: Richard Patel Date: Thu, 24 Apr 2025 20:17:53 +0000 Subject: [PATCH] snapshot: non-temporal memcpy --- src/flamenco/snapshot/fd_snapshot.c | 1 + src/flamenco/snapshot/fd_snapshot_restore.c | 73 ++++++++++++++++++- src/flamenco/snapshot/fd_snapshot_restore.h | 9 +++ .../snapshot/fd_snapshot_restore_private.h | 3 +- 4 files changed, 84 insertions(+), 2 deletions(-) diff --git a/src/flamenco/snapshot/fd_snapshot.c b/src/flamenco/snapshot/fd_snapshot.c index ede299965b..e9e6d6349a 100644 --- a/src/flamenco/snapshot/fd_snapshot.c +++ b/src/flamenco/snapshot/fd_snapshot.c @@ -225,6 +225,7 @@ void fd_snapshot_load_accounts( fd_snapshot_load_ctx_t * ctx ) { /* Now, that the manifest is done being read in. Read in the rest of the accounts. */ + fd_snapshot_enable_nt_copy( ctx->restore ); for(;;) { int err = fd_snapshot_loader_advance( ctx->loader ); if( err==-1 ) break; /* We have finished loading in the snapshot. */ diff --git a/src/flamenco/snapshot/fd_snapshot_restore.c b/src/flamenco/snapshot/fd_snapshot_restore.c index 90023790e6..1155313124 100644 --- a/src/flamenco/snapshot/fd_snapshot_restore.c +++ b/src/flamenco/snapshot/fd_snapshot_restore.c @@ -10,6 +10,10 @@ #include /* strncmp */ #include /* getrandom */ +#if FD_HAS_AVX512 || FD_HAS_AVX +#include +#endif + /* Snapshot Restore Buffer Handling ***********************************/ static void @@ -582,6 +586,72 @@ fd_snapshot_read_account_hdr_chunk( fd_snapshot_restore_t * restore, return end; } +void +fd_snapshot_enable_nt_copy( fd_snapshot_restore_t * restore ) { + restore->use_nt_copy = 1; +} + +static void +fd_snapshot_nt_memcpy( uchar * restrict dst, + uchar const * restrict src, + ulong sz ) { +#if FD_HAS_AVX512 || FD_HAS_AVX + if( sz<1024UL ) goto tail; + + /* Head copy */ + ulong dst_align = (ulong)fd_ulong_align_up( (ulong)dst, 64UL ); + ulong head_sz = dst_align - (ulong)dst; + fd_memcpy( dst, src, head_sz ); + src += head_sz; + dst += head_sz; + sz -= head_sz; + + /* NT copy */ + while( sz>=512 ) { +# if FD_HAS_AVX512 + _mm512_stream_si512( (void *)( dst+ 0UL ), _mm512_loadu_si512( (void const *)( src+ 0UL ) ) ); + _mm512_stream_si512( (void *)( dst+ 64UL ), _mm512_loadu_si512( (void const *)( src+ 64UL ) ) ); + _mm512_stream_si512( (void *)( dst+128UL ), _mm512_loadu_si512( (void const *)( src+128UL ) ) ); + _mm512_stream_si512( (void *)( dst+192UL ), _mm512_loadu_si512( (void const *)( src+192UL ) ) ); + _mm512_stream_si512( (void *)( dst+256UL ), _mm512_loadu_si512( (void const *)( src+256UL ) ) ); + _mm512_stream_si512( (void *)( dst+320UL ), _mm512_loadu_si512( (void const *)( src+320UL ) ) ); + _mm512_stream_si512( (void *)( dst+384UL ), _mm512_loadu_si512( (void const *)( src+384UL ) ) ); + _mm512_stream_si512( (void *)( dst+448UL ), _mm512_loadu_si512( (void const *)( src+448UL ) ) ); +# elif FD_HAS_AVX + _mm256_stream_si256( (void *)( dst+ 0UL ), _mm256_loadu_si256( (void const *)( src+ 0UL ) ) ); + _mm256_stream_si256( (void *)( dst+ 32UL ), _mm256_loadu_si256( (void const *)( src+ 32UL ) ) ); + _mm256_stream_si256( (void *)( dst+ 64UL ), _mm256_loadu_si256( (void const *)( src+ 64UL ) ) ); + _mm256_stream_si256( (void *)( dst+ 96UL ), _mm256_loadu_si256( (void const *)( src+ 96UL ) ) ); + _mm256_stream_si256( (void *)( dst+128UL ), _mm256_loadu_si256( (void const *)( src+128UL ) ) ); + _mm256_stream_si256( (void *)( dst+160UL ), _mm256_loadu_si256( (void const *)( src+160UL ) ) ); + _mm256_stream_si256( (void *)( dst+192UL ), _mm256_loadu_si256( (void const *)( src+192UL ) ) ); + _mm256_stream_si256( (void *)( dst+224UL ), _mm256_loadu_si256( (void const *)( src+224UL ) ) ); + _mm256_stream_si256( (void *)( dst+256UL ), _mm256_loadu_si256( (void const *)( src+256UL ) ) ); + _mm256_stream_si256( (void *)( dst+288UL ), _mm256_loadu_si256( (void const *)( src+288UL ) ) ); + _mm256_stream_si256( (void *)( dst+320UL ), _mm256_loadu_si256( (void const *)( src+320UL ) ) ); + _mm256_stream_si256( (void *)( dst+352UL ), _mm256_loadu_si256( (void const *)( src+352UL ) ) ); + _mm256_stream_si256( (void *)( dst+384UL ), _mm256_loadu_si256( (void const *)( src+384UL ) ) ); + _mm256_stream_si256( (void *)( dst+416UL ), _mm256_loadu_si256( (void const *)( src+416UL ) ) ); + _mm256_stream_si256( (void *)( dst+448UL ), _mm256_loadu_si256( (void const *)( src+448UL ) ) ); + _mm256_stream_si256( (void *)( dst+480UL ), _mm256_loadu_si256( (void const *)( src+480UL ) ) ); +# else +# error "Unsupported AVX/AVX512 configuration" +# endif + src += 512; + dst += 512; + sz -= 512; + } + + _mm_sfence(); +#endif + + /* Tail copy */ +tail: + if( FD_LIKELY( sz ) ) { + fd_memcpy( dst, src, sz ); + } +} + /* fd_snapshot_read_account_chunk reads partial account content. */ static uchar const * @@ -591,7 +661,8 @@ fd_snapshot_read_account_chunk( fd_snapshot_restore_t * restore, ulong data_sz = fd_ulong_min( restore->acc_sz, bufsz ); if( FD_LIKELY( restore->acc_data ) ) { - fd_memcpy( restore->acc_data, buf, data_sz ); + if( restore->use_nt_copy ) fd_snapshot_nt_memcpy( restore->acc_data, buf, data_sz ); + else fd_memcpy ( restore->acc_data, buf, data_sz ); restore->acc_data += data_sz; } if( FD_UNLIKELY( data_sz > restore->accv_sz ) ) diff --git a/src/flamenco/snapshot/fd_snapshot_restore.h b/src/flamenco/snapshot/fd_snapshot_restore.h index aa0d4e1d18..a1fbb9cb05 100644 --- a/src/flamenco/snapshot/fd_snapshot_restore.h +++ b/src/flamenco/snapshot/fd_snapshot_restore.h @@ -150,6 +150,15 @@ fd_snapshot_restore_chunk( void * restore, ulong fd_snapshot_restore_get_slot( fd_snapshot_restore_t * restore ); +/* fd_snapshot_enable_nt_copy enables non-temporal stores for account + data if the target has AVX2 or AVX512. This should only be enabled + if multiple gigabytes worth of account data are going to be restored + without reads to that same data until after the restore operation + completes. */ + +void +fd_snapshot_enable_nt_copy( fd_snapshot_restore_t * restore ); + extern fd_tar_read_vtable_t const fd_snapshot_restore_tar_vt; FD_PROTOTYPES_END diff --git a/src/flamenco/snapshot/fd_snapshot_restore_private.h b/src/flamenco/snapshot/fd_snapshot_restore_private.h index 1aa3a60b95..e6b5c9cc7e 100644 --- a/src/flamenco/snapshot/fd_snapshot_restore_private.h +++ b/src/flamenco/snapshot/fd_snapshot_restore_private.h @@ -70,8 +70,9 @@ struct fd_snapshot_restore { uchar state; uchar manifest_done; + uchar use_nt_copy : 1; uchar status_cache_done : 1; - uchar failed : 1; + uchar failed : 1; /* Buffer params. This buffer is used to gather file content into a contiguous byte array. Currently in use for the manifest and the