@@ -32,5 +32,30 @@ namespace cp_algo {
32
32
[[gnu::always_inline]] inline uint64_t read_bits64 (char const * p) {
33
33
return read_bits (p) | (uint64_t (read_bits (p + 32 )) << 32 );
34
34
}
35
+
36
+ [[gnu::target(" avx2" ), gnu::always_inline]] inline void write_bits (char *p, uint32_t bits) {
37
+ auto bytes = u32x8 () + bits;
38
+ static constexpr u8x32 shuffler = {
39
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
40
+ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
41
+ 2 , 2 , 2 , 2 , 2 , 2 , 2 , 2 ,
42
+ 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3
43
+ };
44
+ auto shuffled = u8x32 (_mm256_shuffle_epi8 (__m256i () + bits, __m256i (shuffler)));
45
+ static constexpr u8x32 mask = {
46
+ 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 ,
47
+ 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 ,
48
+ 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128 ,
49
+ 1 , 2 , 4 , 8 , 16 , 32 , 64 , 128
50
+ };
51
+ u8x32 to_save = (shuffled & mask) ? ' 1' : ' 0' ;
52
+ for (int z = 0 ; z < 32 ; z++) {
53
+ p[z] = to_save[z];
54
+ }
55
+ }
56
+ [[gnu::target(" avx2" ), gnu::always_inline]] inline void write_bits64 (char *p, uint64_t bits) {
57
+ write_bits (p, uint32_t (bits));
58
+ write_bits (p + 32 , uint32_t (bits >> 32 ));
59
+ }
35
60
}
36
61
#endif // CP_ALGO_UTIL_BIT_HPP
0 commit comments