Skip to content

Commit fb687b7

Browse files
committed
Use a scalar prefixsum in fwbw for now due to numeric instability
1 parent bc0f9cb commit fb687b7

File tree

1 file changed

+18
-5
lines changed

1 file changed

+18
-5
lines changed

src/alignment/Fwbw.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,26 @@ inline void calculate_max4(float& max, float& term1, float& term2, float& term3,
4444
}
4545

4646
inline simd_float simdf32_prefixsum(simd_float a) {
47-
a = simdf32_add(a, simdi_i2fcast(simdi8_shiftl(simdf_f2icast(a), 4)));
48-
a = simdf32_add(a, simdi_i2fcast(simdi8_shiftl(simdf_f2icast(a), 8)));
49-
47+
// a = simdf32_add(a, simdi_i2fcast(simdi8_shiftl(simdf_f2icast(a), 4)));
48+
// a = simdf32_add(a, simdi_i2fcast(simdi8_shiftl(simdf_f2icast(a), 8)));
49+
// #ifdef AVX2
50+
// a = simdf32_add(a, simdi_i2fcast(simdi8_shiftl(simdf_f2icast(a), 16)));
51+
// #endif
52+
// return a;
53+
float buf[8];
54+
simdf32_storeu(buf, a);
55+
56+
buf[1] += buf[0];
57+
buf[2] += buf[1];
58+
buf[3] += buf[2];
5059
#ifdef AVX2
51-
a = simdf32_add(a, simdi_i2fcast(simdi8_shiftl(simdf_f2icast(a), 16)));
60+
buf[4] += buf[3];
61+
buf[5] += buf[4];
62+
buf[6] += buf[5];
63+
buf[7] += buf[6];
5264
#endif
53-
return a;
65+
66+
return simdf32_loadu(buf);
5467
}
5568

5669
// FwBwAligner Constructor for general case: use profile scoring matrix

0 commit comments

Comments
 (0)