Skip to content

Commit 1d5eb99

Browse files
authored
Deserialization/validation unit tests (#665)
* test: additional testing for deserialization Add tests to validate that deserialization and validation will catch invalid bitmaps. * fix: don't allow negative container counts getting deseriaize_size If we allow negative container counts, we can end up walking backwards before the beginning of the buffer. * test: run container deserialization unit tests
1 parent 22e7d57 commit 1d5eb99

File tree

2 files changed

+278
-35
lines changed

2 files changed

+278
-35
lines changed

src/roaring_array.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
637637
memcpy(&size, buf, sizeof(int32_t));
638638
buf += sizeof(uint32_t);
639639
}
640-
if (size > (1 << 16)) {
640+
if (size > (1 << 16) || size < 0) {
641641
return 0;
642642
}
643643
char *bitmapOfRunContainers = NULL;

tests/robust_deserialization_unit.c

Lines changed: 277 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "config.h"
1515
#include "test.h"
1616

17+
#define MAX_CONTAINERS (1 << 16)
18+
1719
long filesize(FILE* fp) {
1820
fseek(fp, 0L, SEEK_END);
1921
return ftell(fp);
@@ -94,66 +96,294 @@ int test_deserialize(const char* filename) {
9496
}
9597

9698
DEFINE_TEST(test_robust_deserialize1) {
97-
char filename[1024];
99+
test_deserialize(TEST_DATA_DIR "crashproneinput1.bin");
100+
}
98101

99-
strcpy(filename, TEST_DATA_DIR);
100-
strcat(filename, "crashproneinput1.bin");
102+
DEFINE_TEST(test_robust_deserialize2) {
103+
test_deserialize(TEST_DATA_DIR "crashproneinput2.bin");
104+
}
101105

102-
test_deserialize(filename);
106+
DEFINE_TEST(test_robust_deserialize3) {
107+
test_deserialize(TEST_DATA_DIR "crashproneinput3.bin");
103108
}
104109

105-
DEFINE_TEST(test_robust_deserialize2) {
106-
char filename[1024];
110+
DEFINE_TEST(test_robust_deserialize4) {
111+
test_deserialize(TEST_DATA_DIR "crashproneinput4.bin");
112+
}
107113

108-
strcpy(filename, TEST_DATA_DIR);
109-
strcat(filename, "crashproneinput2.bin");
114+
DEFINE_TEST(test_robust_deserialize5) {
115+
test_deserialize(TEST_DATA_DIR "crashproneinput5.bin");
116+
}
110117

111-
test_deserialize(filename);
118+
DEFINE_TEST(test_robust_deserialize6) {
119+
test_deserialize(TEST_DATA_DIR "crashproneinput6.bin");
112120
}
113121

114-
DEFINE_TEST(test_robust_deserialize3) {
115-
char filename[1024];
122+
DEFINE_TEST(test_robust_deserialize7) {
123+
test_deserialize(TEST_DATA_DIR "crashproneinput7.bin");
124+
}
116125

117-
strcpy(filename, TEST_DATA_DIR);
118-
strcat(filename, "crashproneinput3.bin");
126+
static void invalid_deserialize_test(const void* data, size_t size,
127+
const char* description) {
128+
// Ensure that the data _looks_ like a valid bitmap, but is not.
129+
size_t serialized_size =
130+
roaring_bitmap_portable_deserialize_size(data, size);
131+
if (serialized_size != size) {
132+
fail_msg("expected size %zu, got %zu", size, serialized_size);
133+
}
134+
// If we truncate the data by one byte, we should get a size of 0
135+
assert_int_equal(roaring_bitmap_portable_deserialize_size(data, size - 1),
136+
0);
137+
roaring_bitmap_t* bitmap =
138+
roaring_bitmap_portable_deserialize_safe(data, size);
139+
if (bitmap != NULL) {
140+
if (roaring_bitmap_internal_validate(bitmap, NULL)) {
141+
fail_msg("Validation must fail if a bitmap was returned, %s",
142+
description);
143+
}
144+
roaring_bitmap_free(bitmap);
145+
}
146+
// Truncated data will never return a bitmap
147+
bitmap = roaring_bitmap_portable_deserialize_safe(data, size - 1);
148+
assert_null(bitmap);
149+
}
119150

120-
test_deserialize(filename);
151+
static void valid_deserialize_test(const void* data, size_t size) {
152+
// Ensure that the data _looks_ like a valid bitmap, but is not.
153+
size_t serialized_size =
154+
roaring_bitmap_portable_deserialize_size(data, size);
155+
if (serialized_size != size) {
156+
fail_msg("expected size %zu, got %zu", size, serialized_size);
157+
}
158+
// If we truncate the data by one byte, we should get a size of 0
159+
assert_int_equal(roaring_bitmap_portable_deserialize_size(data, size - 1),
160+
0);
161+
roaring_bitmap_t* bitmap =
162+
roaring_bitmap_portable_deserialize_safe(data, size);
163+
assert_non_null(bitmap);
164+
assert_true(roaring_bitmap_internal_validate(bitmap, NULL));
165+
roaring_bitmap_free(bitmap);
121166
}
122167

123-
DEFINE_TEST(test_robust_deserialize4) {
124-
char filename[1024];
168+
DEFINE_TEST(deserialize_negative_container_count) {
169+
// clang-format off
170+
const uint8_t data[] = {
171+
0x3A, 0x30, 0, 0, // Serial Cookie No Run Container
172+
0x00, 0x00, 0x00, 0x80, // Container count (NEGATIVE)
173+
};
174+
// clang-format on
175+
assert_int_equal(roaring_bitmap_portable_deserialize_size((const char*)data,
176+
sizeof(data)),
177+
0);
178+
roaring_bitmap_t* bitmap = roaring_bitmap_portable_deserialize_safe(
179+
(const char*)data, sizeof(data));
180+
assert_null(bitmap);
181+
}
125182

126-
strcpy(filename, TEST_DATA_DIR);
127-
strcat(filename, "crashproneinput4.bin");
183+
DEFINE_TEST(deserialize_huge_container_count) {
184+
// clang-format off
185+
const uint8_t data_begin[] = {
186+
0x3A, 0x30, 0, 0, // Serial Cookie No Run Container
187+
0x00, 0x00, 0x01, 0x00, // Container count (MAX_CONTAINERS + 1)
188+
};
189+
// clang-format on
190+
#define EXTRA_DATA(num_containers) \
191+
((num_containers) * (3 * sizeof(uint16_t) + sizeof(uint32_t)))
192+
193+
// For each container, 32 bits for container offset,
194+
// 16 bits each for a key, cardinality - 1, and a value
195+
uint8_t data[sizeof(data_begin) + EXTRA_DATA(MAX_CONTAINERS + 1)];
196+
memcpy(data, data_begin, sizeof(data_begin));
197+
memset(data + sizeof(data_begin), 0, EXTRA_DATA(MAX_CONTAINERS + 1));
198+
199+
size_t valid_size = sizeof(data_begin) + EXTRA_DATA(MAX_CONTAINERS);
200+
assert_int_equal(
201+
roaring_bitmap_portable_deserialize_size((const char*)data, valid_size),
202+
valid_size);
203+
204+
// Add an extra container
205+
data[4] += 1;
206+
assert_int_equal(roaring_bitmap_portable_deserialize_size((const char*)data,
207+
sizeof(data)),
208+
0);
209+
roaring_bitmap_t* bitmap = roaring_bitmap_portable_deserialize_safe(
210+
(const char*)data, sizeof(data));
211+
assert_null(bitmap);
212+
#undef EXTRA_DATA
213+
}
128214

129-
test_deserialize(filename);
215+
DEFINE_TEST(deserialize_run_container_empty) {
216+
// clang-format off
217+
const uint8_t data[] = {
218+
0x3B, 0x30, // Serial Cookie
219+
0, 0, // Container count - 1
220+
0x01, // Run Flag Bitset (single container is a run)
221+
0, 0, // Upper 16 bits of the first container
222+
0, 0, // Cardinality - 1 of the first container
223+
0, 0, // First Container - Number of runs
224+
};
225+
// clang-format on
226+
invalid_deserialize_test(data, sizeof(data), "empty run container");
130227
}
131228

132-
DEFINE_TEST(test_robust_deserialize5) {
133-
char filename[1024];
229+
DEFINE_TEST(deserialize_run_container_should_combine) {
230+
// clang-format off
231+
const uint8_t data[] = {
232+
0x3B, 0x30, // Serial Cookie
233+
0, 0, // Container count - 1
234+
0x01, // Run Flag Bitset (single container is a run)
235+
0, 0, // Upper 16 bits of the first container
236+
1, 0, // Cardinality - 1 of the first container
237+
2, 0, // First Container - Number of runs
238+
0, 0, // First run start
239+
0, 0, // First run length - 1
240+
1, 0, // Second run start (STARTS AT THE END OF THE FIRST)
241+
0, 0, // Second run length - 1
242+
};
243+
// clang-format on
244+
invalid_deserialize_test(data, sizeof(data),
245+
"ranges shouldn't be contiguous");
246+
}
134247

135-
strcpy(filename, TEST_DATA_DIR);
136-
strcat(filename, "crashproneinput5.bin");
248+
DEFINE_TEST(deserialize_run_container_overlap) {
249+
// clang-format off
250+
const uint8_t data[] = {
251+
0x3B, 0x30, // Serial Cookie
252+
0, 0, // Container count - 1
253+
0x01, // Run Flag Bitset (single container is a run)
254+
0, 0, // Upper 16 bits of the first container
255+
4, 0, // Cardinality - 1 of the first container
256+
2, 0, // First Container - Number of runs
257+
0, 0, // First run start
258+
4, 0, // First run length - 1
259+
1, 0, // Second run start (STARTS INSIDE THE FIRST)
260+
0, 0, // Second run length - 1
261+
};
262+
// clang-format on
263+
invalid_deserialize_test(data, sizeof(data), "overlapping ranges");
264+
}
137265

138-
test_deserialize(filename);
266+
DEFINE_TEST(deserialize_run_container_overflow) {
267+
// clang-format off
268+
const uint8_t data[] = {
269+
0x3B, 0x30, // Serial Cookie
270+
0, 0, // Container count - 1
271+
0x01, // Run Flag Bitset (single container is a run)
272+
0, 0, // Upper 16 bits of the first container
273+
4, 0, // Cardinality - 1 of the first container
274+
1, 0, // First Container - Number of runs
275+
0xFE, 0xFF, // First run start
276+
4, 0, // First run length - 1 (OVERFLOW)
277+
};
278+
// clang-format on
279+
invalid_deserialize_test(data, sizeof(data), "run length overflow");
139280
}
140281

141-
DEFINE_TEST(test_robust_deserialize6) {
142-
char filename[1024];
282+
DEFINE_TEST(deserialize_run_container_incorrect_cardinality_still_allowed) {
283+
// clang-format off
284+
const uint8_t data[] = {
285+
0x3B, 0x30, // Serial Cookie
286+
0, 0, // Container count - 1
287+
0x01, // Run Flag Bitset (single container is a run)
288+
0, 0, // Upper 16 bits of the first container
289+
0, 0, // Cardinality - 1 of the first container
290+
1, 0, // First Container - Number of runs
291+
0, 0, // First run start
292+
8, 0, // First run length - 1 (9 items, but cardinality is 1)
293+
};
294+
// clang-format on
143295

144-
strcpy(filename, TEST_DATA_DIR);
145-
strcat(filename, "crashproneinput6.bin");
296+
// The cardinality doesn't match the actual number of items in the run,
297+
// but the implementation ignores the cardinality field.
298+
valid_deserialize_test(data, sizeof(data));
299+
}
146300

147-
test_deserialize(filename);
301+
DEFINE_TEST(deserialize_duplicate_keys) {
302+
// clang-format off
303+
const char data[] = {
304+
0x3B, 0x30, // Serial Cookie
305+
1, 0, // Container count - 1
306+
0, // Run Flag Bitset (no runs)
307+
0, 0, // Upper 16 bits of the first container
308+
0, 0, // Cardinality - 1 of the first container
309+
0, 0, // Upper 16 bits of the second container - DUPLICATE
310+
0, 0, // Cardinality - 1 of the second container
311+
0, 0, // Only value of first container
312+
0, 0, // Only value of second container
313+
};
314+
// clang-format on
315+
invalid_deserialize_test(data, sizeof(data), "overlapping keys");
148316
}
149317

150-
DEFINE_TEST(test_robust_deserialize7) {
151-
char filename[1024];
318+
DEFINE_TEST(deserialize_unsorted_keys) {
319+
// clang-format off
320+
const char data[] = {
321+
0x3B, 0x30, // Serial Cookie
322+
1, 0, // Container count - 1
323+
0, // Run Flag Bitset (no runs)
324+
1, 0, // Upper 16 bits of the first container
325+
0, 0, // Cardinality - 1 of the first container
326+
0, 0, // Upper 16 bits of the second container (LESS THAN FIRST)
327+
0, 0, // Cardinality - 1 of the second container
328+
0, 0, // Only value of first container
329+
0, 0, // Only value of second container
330+
};
331+
// clang-format on
332+
invalid_deserialize_test(data, sizeof(data), "unsorted keys");
333+
}
334+
335+
DEFINE_TEST(deserialize_duplicate_array) {
336+
// clang-format off
337+
const char data[] = {
338+
0x3B, 0x30, // Serial Cookie
339+
0, 0, // Container count - 1
340+
0, // Run Flag Bitset (no runs)
341+
0, 0, // Upper 16 bits of the first container
342+
1, 0, // Cardinality - 1 of the first container
343+
1, 0, // first value of first container
344+
0, 0, // second value of first container (LESS THAN FIRST)
345+
};
346+
// clang-format on
347+
invalid_deserialize_test(data, sizeof(data), "duplicate array values");
348+
}
349+
350+
DEFINE_TEST(deserialize_unsorted_array) {
351+
// clang-format off
352+
const char data[] = {
353+
0x3B, 0x30, // Serial Cookie
354+
0, 0, // Container count - 1
355+
0, // Run Flag Bitset (no runs)
356+
0, 0, // Upper 16 bits of the first container
357+
1, 0, // Cardinality - 1 of the first container
358+
0, 0, // first value of first container
359+
0, 0, // second value of first container (DUPLICATE)
360+
};
361+
// clang-format on
362+
invalid_deserialize_test(data, sizeof(data), "duplicate array values");
363+
}
364+
365+
DEFINE_TEST(deserialize_bitset_incorrect_cardinality) {
366+
// clang-format off
367+
const uint8_t data_begin[] = {
368+
0x3B, 0x30, // Serial Cookie
369+
0, 0, // Container count - 1
370+
0, // Run Flag Bitset (no runs)
371+
0, 0, // Upper 16 bits of the first container
372+
0xFF, 0xFF, // Cardinality - 1 of the first container.
373+
374+
// First container is a bitset, should be followed by 1 << 16 bits
375+
};
376+
// clang-format on
377+
uint8_t data[sizeof(data_begin) + (1 << 16) / 8];
378+
379+
memcpy(data, data_begin, sizeof(data_begin));
380+
memset(data + sizeof(data_begin), 0xFF, (1 << 16) / 8);
152381

153-
strcpy(filename, TEST_DATA_DIR);
154-
strcat(filename, "crashproneinput7.bin");
382+
valid_deserialize_test(data, sizeof(data));
155383

156-
test_deserialize(filename);
384+
data[sizeof(data) - 1] = 0xFE;
385+
invalid_deserialize_test(data, sizeof(data),
386+
"Incorrect bitset cardinality");
157387
}
158388

159389
int main() {
@@ -170,6 +400,19 @@ int main() {
170400
cmocka_unit_test(test_robust_deserialize5),
171401
cmocka_unit_test(test_robust_deserialize6),
172402
cmocka_unit_test(test_robust_deserialize7),
403+
cmocka_unit_test(deserialize_negative_container_count),
404+
cmocka_unit_test(deserialize_huge_container_count),
405+
cmocka_unit_test(deserialize_duplicate_keys),
406+
cmocka_unit_test(deserialize_unsorted_keys),
407+
cmocka_unit_test(deserialize_duplicate_array),
408+
cmocka_unit_test(deserialize_unsorted_array),
409+
cmocka_unit_test(deserialize_bitset_incorrect_cardinality),
410+
cmocka_unit_test(deserialize_run_container_empty),
411+
cmocka_unit_test(deserialize_run_container_should_combine),
412+
cmocka_unit_test(deserialize_run_container_overlap),
413+
cmocka_unit_test(deserialize_run_container_overflow),
414+
cmocka_unit_test(
415+
deserialize_run_container_incorrect_cardinality_still_allowed),
173416
};
174417

175418
return cmocka_run_group_tests(tests, NULL, NULL);

0 commit comments

Comments
 (0)