Skip to content

Commit 7a64f18

Browse files
committed
parameterize sub-block size
1 parent e4b8aec commit 7a64f18

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

verify/simd/many_facts.test.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ void facts_inplace(vector<int> &args) {
2828
}
2929
uint64_t b2x32 = (1ULL << 32) % mod;
3030
uint64_t fact = 1;
31-
for(uint64_t b = 0; b <= limit; b += 4 * block) {
32-
u64x4 cur[4];
33-
static array<u64x4, block / 4> prods[4];
34-
for(int z = 0; z < 4; z++) {
31+
const int K = 4;
32+
for(uint64_t b = 0; b <= limit; b += K * block) {
33+
u64x4 cur[K];
34+
static array<u64x4, block / 4> prods[K];
35+
for(int z = 0; z < K; z++) {
3536
for(int j = 0; j < 4; j++) {
3637
cur[z][j] = b + z * block + j * block / 4;
3738
prods[z][0][j] = cur[z][j] + !(b || z || j);
@@ -42,13 +43,13 @@ void facts_inplace(vector<int> &args) {
4243
}
4344
}
4445
for(int i = 1; i < block / 4; i++) {
45-
for(int z = 0; z < 4; z++) {
46+
for(int z = 0; z < K; z++) {
4647
cur[z] += b2x32;
4748
cur[z] = cur[z] >= mod ? cur[z] - mod : cur[z];
4849
prods[z][i] = montgomery_mul(prods[z][i - 1], cur[z], mod4, imod4);
4950
}
5051
}
51-
for(int z = 0; z < 4; z++) {
52+
for(int z = 0; z < K; z++) {
5253
uint64_t bl = b + z * block;
5354
for(auto i: args_per_block[bl / block]) {
5455
size_t x = args[i];

0 commit comments

Comments
 (0)