Skip to content

Commit 13efaf0

Browse files
authored
feat: Conversion to conventional bitset (#448)
* Conversion to conventional bitset implemented. * Minor cleaning. * Minor fixes * Adding bitset to amalgamation. * Various fixes * Adding C++ guards. * Various fixes * Make free functions safe for NULL ptr * Various fixes * Correcting typo in previous commit (duplicated line) * Corrected new typo * We renamed "hamming" to "roaring_hamming" * We are adopting roaring_trailing_zeroes * Let us modernize our Visual Studio 16 2019 builds. * Switching to windows-2019 * Various fixes... to the AVX-512 kernel, which is unexpected.
1 parent c69b17c commit 13efaf0

23 files changed

+1444
-203
lines changed

.github/workflows/vs16-arm-ci.yml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,21 @@ permissions:
88
jobs:
99
ci:
1010
name: windows-vs16
11-
runs-on: windows-latest
11+
runs-on: windows-2019
1212
strategy:
1313
fail-fast: false
1414
matrix:
1515
include:
16-
- {arch: ARM}
17-
- {arch: ARM64}
16+
- {gen: Visual Studio 16 2019, arch: ARM}
17+
- {gen: Visual Studio 16 2019, arch: ARM64}
1818
steps:
1919
- name: checkout
2020
uses: actions/checkout@v2
21-
- name: Use cmake
21+
- name: Configure
2222
run: |
23-
cmake -A ${{ matrix.arch }} -DCMAKE_CROSSCOMPILING=1 -B build &&
24-
cmake --build build --verbose
23+
mkdir build
24+
cd build && cmake -G "${{matrix.gen}}" -A ${{matrix.arch}} ..
25+
- name: Build
26+
run: cmake --build build --config Release
27+
- name: Build Debug
28+
run: cmake --build build --config Debug

.github/workflows/vs16-ci.yml

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,29 @@ permissions:
88
jobs:
99
ci:
1010
name: windows-vs16
11-
runs-on: windows-latest
11+
runs-on: windows-2019
12+
strategy:
13+
fail-fast: false
14+
matrix:
15+
include:
16+
- {gen: Visual Studio 16 2019, arch: Win32}
17+
- {gen: Visual Studio 16 2019, arch: x64}
1218
steps:
13-
- uses: actions/checkout@v2
14-
- name: 'Run CMake with VS16'
15-
uses: lukka/run-cmake@v2
16-
with:
17-
cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
18-
cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
19-
buildDirectory: "${{ github.workspace }}/../../_temp/windows"
20-
cmakeBuildType: Release
21-
buildWithCMake: true
22-
cmakeGenerator: VS16Win64
23-
cmakeAppendedArgs: -DROARING_BUILD_STATIC=ON
24-
buildWithCMakeArgs: --config Release
25-
26-
- name: 'Run CTest'
27-
run: ctest --verbose
28-
working-directory: "${{ github.workspace }}/../../_temp/windows"
19+
- name: checkout
20+
uses: actions/checkout@v2
21+
- name: Configure
22+
run: |
23+
mkdir build
24+
cd build && cmake -G "${{matrix.gen}}" -A ${{matrix.arch}} ..
25+
- name: Build
26+
run: cmake --build build --config Release
27+
- name: Run basic tests
28+
run: |
29+
cd build
30+
ctest -C Release --output-on-failure
31+
- name: Build Debug
32+
run: cmake --build build --config Debug
33+
- name: Run basic tests in Debug
34+
run: |
35+
cd build
36+
ctest -C Debug --output-on-failure

README.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,77 @@ int main() {
349349
}
350350
```
351351
352+
# Conventional bitsets (C)
353+
354+
We support convention bitsets (uncompressed) as part of the library.
355+
356+
Simple example:
357+
358+
```C
359+
bitset_t * b = bitset_create();
360+
bitset_set(b,10);
361+
bitset_get(b,10);// returns true
362+
bitset_free(b); // frees memory
363+
```
364+
365+
More advanced example:
366+
367+
```C
368+
bitset_t *b = bitset_create();
369+
for (int k = 0; k < 1000; ++k) {
370+
bitset_set(b, 3 * k);
371+
}
372+
// We have bitset_count(b) == 1000.
373+
// We have bitset_get(b, 3) is true
374+
// You can iterate through the values:
375+
size_t k = 0;
376+
for (size_t i = 0; bitset_next_set_bit(b, &i); i++) {
377+
// You will have i == k
378+
k += 3;
379+
}
380+
// We support a wide range of operations on two bitsets such as
381+
// bitset_inplace_symmetric_difference(b1,b2);
382+
// bitset_inplace_symmetric_difference(b1,b2);
383+
// bitset_inplace_difference(b1,b2);// should make no difference
384+
// bitset_inplace_union(b1,b2);
385+
// bitset_inplace_intersection(b1,b2);
386+
// bitsets_disjoint
387+
// bitsets_intersect
388+
```
389+
390+
In some instances, you may want to convert a Roaring bitmap into a conventional (uncompressed) bitset.
391+
Indeed, bitsets have advantages such as higher query performances in some cases. The following code
392+
illustrates how you may do so:
393+
394+
```C
395+
roaring_bitmap_t *r1 = roaring_bitmap_create();
396+
for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
397+
roaring_bitmap_add(r1, i);
398+
}
399+
for (uint32_t i = 100000; i < 500000; i+= 100) {
400+
roaring_bitmap_add(r1, i);
401+
}
402+
roaring_bitmap_add_range(r1, 500000, 600000);
403+
bitset_t * bitset = bitset_create();
404+
bool success = roaring_bitmap_to_bitset(r1, bitset);
405+
assert(success); // could fail due to memory allocation.
406+
assert(bitset_count(bitset) == roaring_bitmap_get_cardinality(r1));
407+
// You can then query the bitset:
408+
for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
409+
assert(bitset_get(bitset,i));
410+
}
411+
for (uint32_t i = 100000; i < 500000; i+= 100) {
412+
assert(bitset_get(bitset,i));
413+
}
414+
// you must free the memory:
415+
bitset_free(bitset);
416+
roaring_bitmap_free(r1);
417+
```
418+
419+
You should be aware that a convention bitset (`bitset_t *`) may use much more
420+
memory than a Roaring bitmap in some cases. You should run benchmarks to determine
421+
whether the conversion to a bitset has performance benefits in your case.
422+
352423
# Example (C++)
353424

354425

amalgamation.sh

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ DEMOCPP="amalgamation_demo.cpp"
3131
ALL_PUBLIC_H="
3232
$SCRIPTPATH/include/roaring/roaring_version.h
3333
$SCRIPTPATH/include/roaring/roaring_types.h
34+
$SCRIPTPATH/include/roaring/portability.h
35+
$SCRIPTPATH/include/roaring/bitset/bitset.h
3436
$SCRIPTPATH/include/roaring/roaring.h
3537
$SCRIPTPATH/include/roaring/memory.h
3638
"
@@ -47,7 +49,6 @@ $SCRIPTPATH/cpp/roaring64map.hh
4749
# need to be in this order.
4850
#
4951
ALL_PRIVATE_H="
50-
$SCRIPTPATH/include/roaring/portability.h
5152
$SCRIPTPATH/include/roaring/isadetection.h
5253
$SCRIPTPATH/include/roaring/containers/perfparameters.h
5354
$SCRIPTPATH/include/roaring/containers/container_defs.h
@@ -165,13 +166,21 @@ echo "Creating ${DEMOC}..."
165166

166167
cat <<< '
167168
#include <stdio.h>
169+
#include <stdlib.h>
168170
#include "roaring.c"
169171
int main() {
170172
roaring_bitmap_t *r1 = roaring_bitmap_create();
171173
for (uint32_t i = 100; i < 1000; i++) roaring_bitmap_add(r1, i);
172174
printf("cardinality = %d\n", (int) roaring_bitmap_get_cardinality(r1));
173175
roaring_bitmap_free(r1);
174-
return 0;
176+
177+
bitset_t *b = bitset_create();
178+
for (int k = 0; k < 1000; ++k) {
179+
bitset_set(b, 3 * k);
180+
}
181+
printf("%zu \n", bitset_count(b));
182+
bitset_free(b);
183+
return EXIT_SUCCESS;
175184
}
176185
'
177186
} > "${DEMOC}"
@@ -241,10 +250,10 @@ CPPBIN=${DEMOCPP%%.*}
241250
echo "The interface is found in the file 'include/roaring/roaring.h'."
242251
newline
243252
echo "For C, try:"
244-
echo "cc -march=native -O3 -std=c11 -o ${CBIN} ${DEMOC} && ./${CBIN} "
253+
echo "cc -O3 -std=c11 -o ${CBIN} ${DEMOC} && ./${CBIN} "
245254
newline
246255
echo "For C++, try:"
247-
echo "c++ -march=native -O3 -std=c++11 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} "
256+
echo "c++ -O3 -std=c++11 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} "
248257

249258
lowercase(){
250259
echo "$1" | tr 'A-Z' 'a-z'
@@ -256,8 +265,8 @@ newline
256265
echo "You can build a shared library with the following command:"
257266

258267
if [ $OS == "darwin" ]; then
259-
echo "cc -march=native -O3 -std=c11 -shared -o libroaring.dylib -fPIC roaring.c"
268+
echo "cc -O3 -std=c11 -shared -o libroaring.dylib -fPIC roaring.c"
260269
else
261-
echo "cc -march=native -O3 -std=c11 -shared -o libroaring.so -fPIC roaring.c"
270+
echo "cc -O3 -std=c11 -shared -o libroaring.so -fPIC roaring.c"
262271
fi
263272

0 commit comments

Comments
 (0)