|
22 | 22 | #include <photon/common/alog.h> |
23 | 23 | #include <photon/fs/filesystem.h> |
24 | 24 | #include <photon/common/utility.h> |
| 25 | +#ifdef __x86_64__ |
| 26 | +#include <immintrin.h> |
| 27 | +#endif |
25 | 28 | using namespace std; |
26 | 29 |
|
27 | 30 | namespace LSMT { |
@@ -54,6 +57,132 @@ static inline size_t copy_n(IT begin, IT end, uint64_t end_offset, SegmentMappin |
54 | 57 |
|
55 | 58 | static bool verify_mapping_order(const SegmentMapping *pmappings, size_t n); |
56 | 59 |
|
| 60 | +bool is_avx512f_supported() { |
| 61 | +#if defined(__x86_64__) |
| 62 | + __builtin_cpu_init(); |
| 63 | + return __builtin_cpu_supports("avx512f"); |
| 64 | +#else |
| 65 | + return false; |
| 66 | +#endif |
| 67 | +} |
| 68 | + |
| 69 | +const static uint32_t ORDER = 8; |
| 70 | +const static uint32_t MAX_LEVEL = 10; |
| 71 | +static constexpr uint32_t NODES_PER_LEVEL[MAX_LEVEL] = {8, 72, 648, 5832, 52488, 472392, 4251528, 38263752, 344373768, 3099363912}; |
| 72 | +static constexpr uint32_t LEVEL_START_ID[MAX_LEVEL] = {0, 8, 80, 728, 6560, 59048, 531440, 4782968, 43046720, 387420488}; |
| 73 | + |
| 74 | + |
| 75 | +struct DefaultInnerSearch { |
| 76 | + static uint32_t inner_search(const uint64_t *base, uint64_t x) { |
| 77 | + return std::upper_bound(base, base + 8, x) - base; |
| 78 | + } |
| 79 | +}; |
| 80 | + |
| 81 | +#ifdef __x86_64__ |
| 82 | +struct Avx512InnerSearch { |
| 83 | +#ifdef __clang__ |
| 84 | +#pragma clang attribute push (__attribute__((target("avx512f"))), apply_to=function) |
| 85 | +#else // __GNUC__ |
| 86 | +#pragma GCC push_options |
| 87 | +#pragma GCC target ("avx512f") |
| 88 | +#endif |
| 89 | + |
| 90 | + static uint32_t inner_search(const uint64_t *base, uint64_t x) { |
| 91 | + __m512i vx = _mm512_set1_epi64(x); |
| 92 | + __m512i data = _mm512_load_si512(base); |
| 93 | + uint8_t mask = _mm512_cmp_epu64_mask(vx, data, _MM_CMPINT_GE); |
| 94 | + return __builtin_popcount(mask); |
| 95 | + } |
| 96 | + |
| 97 | +#ifdef __clang__ |
| 98 | +#pragma clang attribute pop |
| 99 | +#else // __GNUC__ |
| 100 | +#pragma GCC pop_options |
| 101 | +#endif |
| 102 | +}; |
| 103 | +#else // __x86_64__ |
| 104 | +using Avx512InnerSearch = DefaultInnerSearch; |
| 105 | +#endif |
| 106 | + |
| 107 | +class LinearizedBptree { |
| 108 | +public: |
| 109 | + uint64_t N; |
| 110 | + uint64_t *node = nullptr; |
| 111 | + int32_t DEPTH = -1; |
| 112 | + |
| 113 | + LinearizedBptree() {} |
| 114 | + |
| 115 | + ~LinearizedBptree() { |
| 116 | + free(node); |
| 117 | + } |
| 118 | + |
| 119 | + int build(const vector<SegmentMapping> &mapping) { |
| 120 | + if (mapping.empty()) { |
| 121 | + LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: empty mapping"); |
| 122 | + } |
| 123 | + if (mapping[0].offset != 0) { |
| 124 | + // In a real file system, mapping offset starts from 0. skip for some ut. |
| 125 | + LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: invalid start offset"); |
| 126 | + } |
| 127 | + size_t mapping_size = mapping.size(); |
| 128 | + for (uint32_t i = 0; i < MAX_LEVEL; i++) |
| 129 | + if (NODES_PER_LEVEL[i] >= mapping_size) { |
| 130 | + DEPTH = i+1; |
| 131 | + break; |
| 132 | + } |
| 133 | + if (DEPTH == -1) { |
| 134 | + LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: too many mappings"); |
| 135 | + } |
| 136 | + |
| 137 | + N = (LEVEL_START_ID[DEPTH-1] + mapping_size + ORDER - 1) / ORDER * ORDER; |
| 138 | + LOG_INFO("building Linearized B+tree ", VALUE(DEPTH), VALUE(mapping_size), VALUE(N)); |
| 139 | + auto ret = posix_memalign((void**)&node, 64, N*sizeof(uint64_t)); |
| 140 | + if (ret != 0) { |
| 141 | + LOG_ERRNO_RETURN(ENOBUFS, -1, "linearized bptree not used: failed to alloc memory"); |
| 142 | + } |
| 143 | + |
| 144 | + uint32_t leaf_start = LEVEL_START_ID[DEPTH - 1]; |
| 145 | + uint32_t leaf_size = NODES_PER_LEVEL[DEPTH - 1]; |
| 146 | + |
| 147 | + uint32_t p = leaf_start; |
| 148 | + |
| 149 | + for (auto &mp : mapping) |
| 150 | + node[p++] = mp.offset; |
| 151 | + |
| 152 | + while (p < N) |
| 153 | + node[p++] = -1; |
| 154 | + |
| 155 | + auto G = ORDER; |
| 156 | + for (auto level = DEPTH-1; level > 0; level--) { |
| 157 | + auto pos = LEVEL_START_ID[level - 1]; |
| 158 | + for (uint32_t i = 0; i < leaf_size; i += G * (ORDER + 1)) { |
| 159 | + for (uint32_t j = 1; j <= ORDER; j++) { |
| 160 | + uint32_t lower_id = leaf_start + i + G * j; |
| 161 | + node[pos++] = (lower_id < N) ? node[lower_id] : -1; |
| 162 | + } |
| 163 | + } |
| 164 | + G *= (ORDER + 1); |
| 165 | + } |
| 166 | + LOG_INFO("building Linearized B+tree done"); |
| 167 | + return 0; |
| 168 | + } |
| 169 | + |
| 170 | + template<typename InnerSearchImpl> |
| 171 | + uint32_t search(const uint64_t x) const { |
| 172 | + uint32_t res = 0; |
| 173 | +#pragma GCC unroll 20 |
| 174 | + for (int i = DEPTH; i > 1; --i) { |
| 175 | + auto node_base = node + res; |
| 176 | + uint32_t c = InnerSearchImpl::inner_search(node_base, x); |
| 177 | + res = (ORDER+1)*res + (c+1)*ORDER; |
| 178 | + } |
| 179 | + auto node_base = node + res; |
| 180 | + res += InnerSearchImpl::inner_search(node_base, x); |
| 181 | + res = res - 1 - LEVEL_START_ID[DEPTH-1]; |
| 182 | + return res; |
| 183 | + } |
| 184 | +}; |
| 185 | + |
57 | 186 | class Index : public IMemoryIndex { |
58 | 187 | public: |
59 | 188 | bool ownership = false; |
@@ -157,6 +286,64 @@ class Index : public IMemoryIndex { |
157 | 286 | UNIMPLEMENTED_POINTER(IMemoryIndex *make_read_only_index() const override); |
158 | 287 | }; |
159 | 288 |
|
| 289 | +class IndexLBPT : public Index { |
| 290 | +public: |
| 291 | + LinearizedBptree *lbpt = nullptr; |
| 292 | + |
| 293 | + ~IndexLBPT() { |
| 294 | + safe_delete(lbpt); |
| 295 | + } |
| 296 | + |
| 297 | + IndexLBPT(vector<SegmentMapping> &&m, uint64_t vsize, LinearizedBptree *lbpt) |
| 298 | + : Index(std::move(m), vsize), lbpt(lbpt) { |
| 299 | + } |
| 300 | + |
| 301 | + size_t lookup(Segment s, SegmentMapping *pm, size_t n) const override { |
| 302 | + if (s.length == 0) |
| 303 | + return 0; |
| 304 | + auto lb = pbegin + lbpt->search<DefaultInnerSearch>(s.offset);; |
| 305 | + if (lb->end() <= s.offset) |
| 306 | + lb++; |
| 307 | + |
| 308 | + auto m = copy_n(lb, pend, s.end(), pm, n); |
| 309 | + trim_edge_mappings(pm, m, s); |
| 310 | + return m; |
| 311 | + } |
| 312 | +}; |
| 313 | + |
| 314 | +class IndexLBPTAcc : public IndexLBPT { |
| 315 | +public: |
| 316 | + IndexLBPTAcc(vector<SegmentMapping> &&m, uint64_t vsize, LinearizedBptree *lbpt) |
| 317 | + : IndexLBPT(std::move(m), vsize, lbpt) { |
| 318 | + } |
| 319 | + |
| 320 | + size_t lookup(Segment s, SegmentMapping *pm, size_t n) const override { |
| 321 | + if (s.length == 0) |
| 322 | + return 0; |
| 323 | + auto lb = pbegin + lbpt->search<Avx512InnerSearch>(s.offset); |
| 324 | + if (lb->end() <= s.offset) |
| 325 | + lb++; |
| 326 | + auto m = copy_n(lb, pend, s.end(), pm, n); |
| 327 | + trim_edge_mappings(pm, m, s); |
| 328 | + return m; |
| 329 | + } |
| 330 | +}; |
| 331 | + |
| 332 | +static inline Index* new_index_with_lineriazed_bptree(vector<SegmentMapping> &&m, uint64_t vsize = 0) { |
| 333 | + auto tree = new LinearizedBptree(); |
| 334 | + if (tree->build(m) < 0) { |
| 335 | + delete tree; |
| 336 | + LOG_WARN("failed to build linearized b+tree, failover to binary search"); |
| 337 | + return new Index(std::move(m), vsize); |
| 338 | + } |
| 339 | + |
| 340 | + if (is_avx512f_supported()) { |
| 341 | + LOG_INFO("using accelerated search for linearized b+tree"); |
| 342 | + return new IndexLBPTAcc(std::move(m), vsize, tree); |
| 343 | + } |
| 344 | + return new IndexLBPT(std::move(m), vsize, tree); |
| 345 | +} |
| 346 | + |
160 | 347 | class LevelIndex : public Index { |
161 | 348 | public: |
162 | 349 | vector<vector<uint64_t>> level_mapping; |
@@ -694,6 +881,7 @@ IMemoryIndex *merge_memory_indexes(const IMemoryIndex **pindexes, size_t n) { |
694 | 881 | auto pi = (const Index **)pindexes; |
695 | 882 | mapping.reserve(pi[0]->size()); |
696 | 883 | merge_indexes(0, mapping, pi, n, 0, UINT64_MAX); |
697 | | - return new Index(std::move(mapping), pindexes[0]->vsize()); |
| 884 | + |
| 885 | + return new_index_with_lineriazed_bptree(std::move(mapping), pindexes[0]->vsize()); |
698 | 886 | } |
699 | 887 | } // namespace LSMT |
0 commit comments