From 49dd13190689c399235f58b7bf2aa0d69e7990e3 Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Tue, 15 Nov 2022 13:25:56 -0500 Subject: [PATCH] Add roaring_bitmap_memory_size_in_bytes(), with C++ interfaces The new roaring_bitmap_memory_size_in_bytes() function returns the number of in-memory bytes currently used by this Roaring bitmap. Add getMemorySizeInBytes() methods to the C++ Roaring and Roaring64Map classes. Note that the Roaring64Map result is somewhat guesswork since we can't accurately compute the memory used by the STL std::map implementation. --- cpp/roaring.hh | 8 +++++++- cpp/roaring64map.hh | 20 ++++++++++++++++++++ include/roaring/containers/array.h | 8 ++++++++ include/roaring/containers/bitset.h | 9 +++++++++ include/roaring/containers/containers.h | 21 +++++++++++++++++++++ include/roaring/containers/run.h | 8 ++++++++ include/roaring/roaring.h | 5 +++++ include/roaring/roaring_array.h | 5 +++++ src/roaring.c | 4 ++++ src/roaring_array.c | 12 ++++++++++++ 10 files changed, 99 insertions(+), 1 deletion(-) diff --git a/cpp/roaring.hh b/cpp/roaring.hh index 3853ae7b6..57a0d6c04 100644 --- a/cpp/roaring.hh +++ b/cpp/roaring.hh @@ -529,7 +529,6 @@ public: /** * Read a bitmap from a serialized version, reading no more than maxbytes * bytes. This is meant to be compatible with the Java and Go versions. - * */ static Roaring readSafe(const char *buf, size_t maxbytes) { roaring_bitmap_t * r = @@ -540,6 +539,13 @@ public: return Roaring(r); } + /** + * Bytes of memory used by this bitmap. + */ + size_t getMemorySizeInBytes() const { + return api::roaring_bitmap_memory_size_in_bytes(&roaring); + } + /** * How many bytes are required to serialize this bitmap (meant to be * compatible with Java and Go versions) diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index 1654aef53..791d37def 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -1051,6 +1051,26 @@ public: return result; } + /** + * Return the number of bytes of memory used by this bitmap + */ + size_t getMemorySizeInBytes() const { + // Figuring out how much memory is used by a std::map is guesswork. + // A common red/black tree implementation has 3 pointers plus 2 ints + // per element, plus the size of the pair. The size of the Roaring + // struct is included in roarings.getMemorySizeInBytes() so remove it. + constexpr size_t perEntry = 3 * sizeof(void*) + 2 * sizeof(int) + sizeof(std::pair) - sizeof(Roaring); + + return std::accumulate( + roarings.cbegin(), roarings.cend(), + sizeof(*this), + [=](size_t previous, + const std::pair &map_entry) { + // add bytes used by each Roaring std::map entry + return previous + perEntry + map_entry.second.getMemorySizeInBytes(); + }); + } + /** * Return the number of bytes required to serialize this bitmap (meant to * be compatible with Java and Go versions) diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h index 758703569..1b8c20c19 100644 --- a/include/roaring/containers/array.h +++ b/include/roaring/containers/array.h @@ -202,6 +202,14 @@ int32_t array_container_write(const array_container_t *container, char *buf); int32_t array_container_read(int32_t cardinality, array_container_t *container, const char *buf); +/** + * Return the size in bytes of the memory used by the container. + */ +static inline size_t array_container_memory_size_in_bytes( + const array_container_t *container) { + return sizeof(*container) + (container->capacity * sizeof(container->array[0])); +} + /** * Return the serialized size in bytes of a container (see * bitset_container_write) diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h index 2c9e53061..9744e9726 100644 --- a/include/roaring/containers/bitset.h +++ b/include/roaring/containers/bitset.h @@ -448,6 +448,15 @@ int32_t bitset_container_write(const bitset_container_t *container, char *buf); */ int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, const char *buf); + +/** + * Return the size in bytes of the memory used by the container. + */ +static inline size_t bitset_container_memory_size_in_bytes( + const bitset_container_t *container) { + return sizeof(*container) + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); +} + /** * Return the serialized size in bytes of a container (see * bitset_container_write). diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h index ce8f86283..24392ce55 100644 --- a/include/roaring/containers/containers.h +++ b/include/roaring/containers/containers.h @@ -401,6 +401,27 @@ static inline int32_t container_write( return 0; // unreached } +/** + * Get the size in bytes of memory used by the container, requires a + * typecode + */ +static inline size_t container_memory_size_in_bytes( + const container_t *c, uint8_t typecode +){ + c = container_unwrap_shared(c, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE: + return bitset_container_memory_size_in_bytes(const_CAST_bitset(c)); + case ARRAY_CONTAINER_TYPE: + return array_container_memory_size_in_bytes(const_CAST_array(c)); + case RUN_CONTAINER_TYPE: + return run_container_memory_size_in_bytes(const_CAST_run(c)); + } + assert(false); + __builtin_unreachable(); + return 0; // unreached +} + /** * Get the container size in bytes under portable serialization (see * container_write), requires a diff --git a/include/roaring/containers/run.h b/include/roaring/containers/run.h index 793fc01d8..a7f1ac1f5 100644 --- a/include/roaring/containers/run.h +++ b/include/roaring/containers/run.h @@ -474,6 +474,14 @@ int32_t run_container_write(const run_container_t *container, char *buf); int32_t run_container_read(int32_t cardinality, run_container_t *container, const char *buf); +/** + * Return the size in bytes of the memory used by the container. + */ +static inline size_t run_container_memory_size_in_bytes( + const run_container_t *container) { + return sizeof(*container) + (container->capacity * sizeof(container->runs[0])); +} + /** * Return the serialized size in bytes of a container (see run_container_write). * This is meant to be compatible with the Java and Go versions of Roaring. diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index 415152445..7a3ba6c23 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -462,6 +462,11 @@ bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); */ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); +/** + * Return the number of bytes of memory used by the bitmap. + */ +size_t roaring_bitmap_memory_size_in_bytes(const roaring_bitmap_t *r); + /** * Write the bitmap to an output pointer, this output buffer should refer to * at least `roaring_bitmap_size_in_bytes(r)` allocated bytes. diff --git a/include/roaring/roaring_array.h b/include/roaring/roaring_array.h index 24ce7cad2..9c115b983 100644 --- a/include/roaring/roaring_array.h +++ b/include/roaring/roaring_array.h @@ -202,6 +202,11 @@ void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans); bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans); +/** + * Return the number of bytes of memory used by the bitmap. + */ +size_t ra_memory_size_in_bytes(const roaring_array_t *ra); + /** * write a bitmap to a buffer. This is meant to be compatible with * the diff --git a/src/roaring.c b/src/roaring.c index 7479b4720..f503f3e80 100644 --- a/src/roaring.c +++ b/src/roaring.c @@ -1397,6 +1397,10 @@ bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { return answer; } +size_t roaring_bitmap_memory_size_in_bytes(const roaring_bitmap_t *r) { + return ra_memory_size_in_bytes(&r->high_low_container); +} + size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) { size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); uint64_t cardinality = roaring_bitmap_get_cardinality(r); diff --git a/src/roaring_array.c b/src/roaring_array.c index 5151e7062..342d58b8a 100644 --- a/src/roaring_array.c +++ b/src/roaring_array.c @@ -520,6 +520,18 @@ bool ra_has_run_container(const roaring_array_t *ra) { return false; } +size_t ra_memory_size_in_bytes(const roaring_array_t *ra) { + size_t count = sizeof(*ra) + ( + ra->allocation_size * (sizeof(void*) + sizeof(uint16_t) + sizeof(uint8_t))); + + for (int32_t k = 0; k < ra->size; ++k) { + count += container_memory_size_in_bytes(ra->containers[k], + ra->typecodes[k]); + } + + return count; +} + uint32_t ra_portable_header_size(const roaring_array_t *ra) { if (ra_has_run_container(ra)) { if (ra->size <