Skip to content

Commit 5472a0e

Browse files
authored
Merge pull request #381 from liulanzheng/main
add linearized b+tree in lsmt
2 parents bcd84a9 + 28144f8 commit 5472a0e

File tree

3 files changed

+194
-4
lines changed

3 files changed

+194
-4
lines changed

.github/workflows/cmake.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ jobs:
5151
working-directory: ${{github.workspace}}/build
5252
shell: bash
5353
run: |
54+
lscpu
5455
sudo make install
5556
sudo cp ${{github.workspace}}/src/example_config/overlaybd-registryv2.json /etc/overlaybd/overlaybd.json
5657
sudo systemctl enable /opt/overlaybd/overlaybd-tcmu.service
@@ -87,6 +88,7 @@ jobs:
8788
ls obd_mp
8889
sudo diff -r --exclude "lost+found" test_data obd_mp
8990
sudo umount obd_mp
91+
cat /var/log/overlaybd.log
9092
- name: E2E Test turboOCIv1
9193
working-directory: ${{github.workspace}}/build
9294
shell: bash

src/overlaybd/lsmt/index.cpp

Lines changed: 189 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#include <photon/common/alog.h>
2323
#include <photon/fs/filesystem.h>
2424
#include <photon/common/utility.h>
25+
#ifdef __x86_64__
26+
#include <immintrin.h>
27+
#endif
2528
using namespace std;
2629

2730
namespace LSMT {
@@ -54,6 +57,132 @@ static inline size_t copy_n(IT begin, IT end, uint64_t end_offset, SegmentMappin
5457

5558
static bool verify_mapping_order(const SegmentMapping *pmappings, size_t n);
5659

60+
bool is_avx512f_supported() {
61+
#if defined(__x86_64__)
62+
__builtin_cpu_init();
63+
return __builtin_cpu_supports("avx512f");
64+
#else
65+
return false;
66+
#endif
67+
}
68+
69+
const static uint32_t ORDER = 8;
70+
const static uint32_t MAX_LEVEL = 10;
71+
static constexpr uint32_t NODES_PER_LEVEL[MAX_LEVEL] = {8, 72, 648, 5832, 52488, 472392, 4251528, 38263752, 344373768, 3099363912};
72+
static constexpr uint32_t LEVEL_START_ID[MAX_LEVEL] = {0, 8, 80, 728, 6560, 59048, 531440, 4782968, 43046720, 387420488};
73+
74+
75+
struct DefaultInnerSearch {
76+
static uint32_t inner_search(const uint64_t *base, uint64_t x) {
77+
return std::upper_bound(base, base + 8, x) - base;
78+
}
79+
};
80+
81+
#ifdef __x86_64__
82+
struct Avx512InnerSearch {
83+
#ifdef __clang__
84+
#pragma clang attribute push (__attribute__((target("avx512f"))), apply_to=function)
85+
#else // __GNUC__
86+
#pragma GCC push_options
87+
#pragma GCC target ("avx512f")
88+
#endif
89+
90+
static uint32_t inner_search(const uint64_t *base, uint64_t x) {
91+
__m512i vx = _mm512_set1_epi64(x);
92+
__m512i data = _mm512_load_si512(base);
93+
uint8_t mask = _mm512_cmp_epu64_mask(vx, data, _MM_CMPINT_GE);
94+
return __builtin_popcount(mask);
95+
}
96+
97+
#ifdef __clang__
98+
#pragma clang attribute pop
99+
#else // __GNUC__
100+
#pragma GCC pop_options
101+
#endif
102+
};
103+
#else // __x86_64__
104+
using Avx512InnerSearch = DefaultInnerSearch;
105+
#endif
106+
107+
class LinearizedBptree {
108+
public:
109+
uint64_t N;
110+
uint64_t *node = nullptr;
111+
int32_t DEPTH = -1;
112+
113+
LinearizedBptree() {}
114+
115+
~LinearizedBptree() {
116+
free(node);
117+
}
118+
119+
int build(const vector<SegmentMapping> &mapping) {
120+
if (mapping.empty()) {
121+
LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: empty mapping");
122+
}
123+
if (mapping[0].offset != 0) {
124+
// In a real file system, mapping offset starts from 0. skip for some ut.
125+
LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: invalid start offset");
126+
}
127+
size_t mapping_size = mapping.size();
128+
for (uint32_t i = 0; i < MAX_LEVEL; i++)
129+
if (NODES_PER_LEVEL[i] >= mapping_size) {
130+
DEPTH = i+1;
131+
break;
132+
}
133+
if (DEPTH == -1) {
134+
LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: too many mappings");
135+
}
136+
137+
N = (LEVEL_START_ID[DEPTH-1] + mapping_size + ORDER - 1) / ORDER * ORDER;
138+
LOG_INFO("building Linearized B+tree ", VALUE(DEPTH), VALUE(mapping_size), VALUE(N));
139+
auto ret = posix_memalign((void**)&node, 64, N*sizeof(uint64_t));
140+
if (ret != 0) {
141+
LOG_ERRNO_RETURN(ENOBUFS, -1, "linearized bptree not used: failed to alloc memory");
142+
}
143+
144+
uint32_t leaf_start = LEVEL_START_ID[DEPTH - 1];
145+
uint32_t leaf_size = NODES_PER_LEVEL[DEPTH - 1];
146+
147+
uint32_t p = leaf_start;
148+
149+
for (auto &mp : mapping)
150+
node[p++] = mp.offset;
151+
152+
while (p < N)
153+
node[p++] = -1;
154+
155+
auto G = ORDER;
156+
for (auto level = DEPTH-1; level > 0; level--) {
157+
auto pos = LEVEL_START_ID[level - 1];
158+
for (uint32_t i = 0; i < leaf_size; i += G * (ORDER + 1)) {
159+
for (uint32_t j = 1; j <= ORDER; j++) {
160+
uint32_t lower_id = leaf_start + i + G * j;
161+
node[pos++] = (lower_id < N) ? node[lower_id] : -1;
162+
}
163+
}
164+
G *= (ORDER + 1);
165+
}
166+
LOG_INFO("building Linearized B+tree done");
167+
return 0;
168+
}
169+
170+
template<typename InnerSearchImpl>
171+
uint32_t search(const uint64_t x) const {
172+
uint32_t res = 0;
173+
#pragma GCC unroll 20
174+
for (int i = DEPTH; i > 1; --i) {
175+
auto node_base = node + res;
176+
uint32_t c = InnerSearchImpl::inner_search(node_base, x);
177+
res = (ORDER+1)*res + (c+1)*ORDER;
178+
}
179+
auto node_base = node + res;
180+
res += InnerSearchImpl::inner_search(node_base, x);
181+
res = res - 1 - LEVEL_START_ID[DEPTH-1];
182+
return res;
183+
}
184+
};
185+
57186
class Index : public IMemoryIndex {
58187
public:
59188
bool ownership = false;
@@ -157,6 +286,64 @@ class Index : public IMemoryIndex {
157286
UNIMPLEMENTED_POINTER(IMemoryIndex *make_read_only_index() const override);
158287
};
159288

289+
class IndexLBPT : public Index {
290+
public:
291+
LinearizedBptree *lbpt = nullptr;
292+
293+
~IndexLBPT() {
294+
safe_delete(lbpt);
295+
}
296+
297+
IndexLBPT(vector<SegmentMapping> &&m, uint64_t vsize, LinearizedBptree *lbpt)
298+
: Index(std::move(m), vsize), lbpt(lbpt) {
299+
}
300+
301+
size_t lookup(Segment s, SegmentMapping *pm, size_t n) const override {
302+
if (s.length == 0)
303+
return 0;
304+
auto lb = pbegin + lbpt->search<DefaultInnerSearch>(s.offset);;
305+
if (lb->end() <= s.offset)
306+
lb++;
307+
308+
auto m = copy_n(lb, pend, s.end(), pm, n);
309+
trim_edge_mappings(pm, m, s);
310+
return m;
311+
}
312+
};
313+
314+
class IndexLBPTAcc : public IndexLBPT {
315+
public:
316+
IndexLBPTAcc(vector<SegmentMapping> &&m, uint64_t vsize, LinearizedBptree *lbpt)
317+
: IndexLBPT(std::move(m), vsize, lbpt) {
318+
}
319+
320+
size_t lookup(Segment s, SegmentMapping *pm, size_t n) const override {
321+
if (s.length == 0)
322+
return 0;
323+
auto lb = pbegin + lbpt->search<Avx512InnerSearch>(s.offset);
324+
if (lb->end() <= s.offset)
325+
lb++;
326+
auto m = copy_n(lb, pend, s.end(), pm, n);
327+
trim_edge_mappings(pm, m, s);
328+
return m;
329+
}
330+
};
331+
332+
static inline Index* new_index_with_lineriazed_bptree(vector<SegmentMapping> &&m, uint64_t vsize = 0) {
333+
auto tree = new LinearizedBptree();
334+
if (tree->build(m) < 0) {
335+
delete tree;
336+
LOG_WARN("failed to build linearized b+tree, failover to binary search");
337+
return new Index(std::move(m), vsize);
338+
}
339+
340+
if (is_avx512f_supported()) {
341+
LOG_INFO("using accelerated search for linearized b+tree");
342+
return new IndexLBPTAcc(std::move(m), vsize, tree);
343+
}
344+
return new IndexLBPT(std::move(m), vsize, tree);
345+
}
346+
160347
class LevelIndex : public Index {
161348
public:
162349
vector<vector<uint64_t>> level_mapping;
@@ -694,6 +881,7 @@ IMemoryIndex *merge_memory_indexes(const IMemoryIndex **pindexes, size_t n) {
694881
auto pi = (const Index **)pindexes;
695882
mapping.reserve(pi[0]->size());
696883
merge_indexes(0, mapping, pi, n, 0, UINT64_MAX);
697-
return new Index(std::move(mapping), pindexes[0]->vsize());
884+
885+
return new_index_with_lineriazed_bptree(std::move(mapping), pindexes[0]->vsize());
698886
}
699887
} // namespace LSMT

src/overlaybd/lsmt/index.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ IMemoryIndex -> IMemoryIndex0 -> IComboIndex -> Index0 ( set<SegmentMap> ) -> Co
2929
#include <sys/types.h>
3030

3131
namespace LSMT {
32-
struct Segment { // 48 + 18 == 64
33-
uint64_t offset : 50; // offset (0.5 PB if in sector)
34-
uint32_t length : 14; // length (8MB if in sector)
32+
struct Segment {
33+
uint64_t offset : 50;
34+
uint32_t length : 14;
3535
const static uint64_t MAX_OFFSET = (1UL << 50) - 1;
3636
const static uint32_t MAX_LENGTH = (1 << 14) - 1;
3737
const static uint64_t INVALID_OFFSET = MAX_OFFSET;

0 commit comments

Comments
 (0)