From 5c9c4d818b8dedf5a87bf1d87e4592a258e7f6ae Mon Sep 17 00:00:00 2001 From: Angus Stewart Date: Wed, 16 Jul 2025 00:22:03 +0200 Subject: [PATCH] Add CRC checks and tests --- src/pybag/mcap/crc_utils.py | 13 ++++++++++ src/pybag/mcap/record_reader.py | 4 +++ src/pybag/mcap_reader.py | 12 ++++++--- tests/test_mcap_records.py | 45 +++++++++++++++++++++++++++++++-- 4 files changed, 69 insertions(+), 5 deletions(-) create mode 100644 src/pybag/mcap/crc_utils.py diff --git a/src/pybag/mcap/crc_utils.py b/src/pybag/mcap/crc_utils.py new file mode 100644 index 0000000..0402e30 --- /dev/null +++ b/src/pybag/mcap/crc_utils.py @@ -0,0 +1,13 @@ +from crc import Calculator, Crc32 + +__all__ = ["crc32c", "CrcMismatchError"] + +_calculator = Calculator(Crc32.CRC32C) + +class CrcMismatchError(Exception): + """Raised when a CRC check fails.""" + + +def crc32c(data: bytes) -> int: + """Compute the CRC-32C checksum for the given bytes.""" + return _calculator.checksum(data) diff --git a/src/pybag/mcap/record_reader.py b/src/pybag/mcap/record_reader.py index 57effe6..b5cba1b 100644 --- a/src/pybag/mcap/record_reader.py +++ b/src/pybag/mcap/record_reader.py @@ -4,6 +4,7 @@ from typing import Any, Callable, Iterator from pybag.io.raw_reader import BaseReader +from pybag.mcap.crc_utils import CrcMismatchError, crc32c from pybag.mcap.records import ( AttachmentIndexRecord, AttachmentRecord, @@ -346,6 +347,9 @@ def parse_attachment(cls, file: BaseReader) -> AttachmentRecord: _, data_bytes = cls._parse_bytes(file, data_bytes_length) _, crc = cls._parse_uint32(file) + if crc32c(data_bytes) != crc: + raise CrcMismatchError("Attachment CRC does not match") + return AttachmentRecord(log_time, create_time, name, media_type, data_bytes, crc) diff --git a/src/pybag/mcap_reader.py b/src/pybag/mcap_reader.py index 09f602d..bee6528 100644 --- a/src/pybag/mcap_reader.py +++ b/src/pybag/mcap_reader.py @@ -7,6 +7,7 @@ from pybag.encoding.cdr import CdrDecoder from pybag.io.raw_reader import BaseReader, BytesReader, FileReader +from pybag.mcap.crc_utils import CrcMismatchError, crc32c from pybag.mcap.record_reader import ( FOOTER_SIZE, MAGIC_BYTES_SIZE, @@ -70,16 +71,21 @@ def decompress_chunk(chunk: ChunkRecord) -> bytes: """Decompress the records field of a chunk.""" if chunk.compression == 'zstd': import zstandard as zstd - return zstd.ZstdDecompressor().decompress(chunk.records) + data = zstd.ZstdDecompressor().decompress(chunk.records) elif chunk.compression == 'lz4': import lz4.frame - return lz4.frame.decompress(chunk.records) + data = lz4.frame.decompress(chunk.records) elif chunk.compression == '': - return chunk.records + data = chunk.records else: error_msg = f'Unknown compression type: {chunk.compression}' raise McapUnknownCompressionError(error_msg) + if crc32c(data) != chunk.uncompressed_crc: + raise CrcMismatchError('Chunk CRC does not match') + + return data + def decode_message(message: MessageRecord, schema: SchemaRecord) -> dict: """Decode a message using a schema.""" diff --git a/tests/test_mcap_records.py b/tests/test_mcap_records.py index 690d217..9d3147f 100644 --- a/tests/test_mcap_records.py +++ b/tests/test_mcap_records.py @@ -1,5 +1,6 @@ from pybag.io.raw_reader import BytesReader from pybag.io.raw_writer import BytesWriter +from pybag.mcap.crc_utils import CrcMismatchError, crc32c from pybag.mcap.record_reader import McapRecordReader from pybag.mcap.record_writer import McapRecordWriter from pybag.mcap.records import ( @@ -19,6 +20,7 @@ StatisticsRecord, SummaryOffsetRecord ) +from pybag.mcap_reader import decompress_chunk def test_header_encode_decode(): @@ -71,7 +73,7 @@ def test_chunk_encode_decode(): message_start_time=1, message_end_time=2, uncompressed_size=3, - uncompressed_crc=4, + uncompressed_crc=crc32c(b"records"), compression="", records=b"records", ) @@ -82,6 +84,29 @@ def test_chunk_encode_decode(): assert parsed == record +def test_decompress_chunk_crc() -> None: + record = ChunkRecord( + message_start_time=1, + message_end_time=2, + uncompressed_size=3, + uncompressed_crc=crc32c(b"records"), + compression="", + records=b"records", + ) + assert decompress_chunk(record) == b"records" + + bad = ChunkRecord( + message_start_time=1, + message_end_time=2, + uncompressed_size=3, + uncompressed_crc=0, + compression="", + records=b"records", + ) + with pytest.raises(CrcMismatchError): + decompress_chunk(bad) + + def test_message_index_encode_decode(): record = MessageIndexRecord(channel_id=1, records=[(1, 2), (3, 4)]) writer = BytesWriter() @@ -117,7 +142,7 @@ def test_attachment_encode_decode(): name="file", media_type="text/plain", data=b"payload", - crc=3, + crc=crc32c(b"payload"), ) writer = BytesWriter() McapRecordWriter.write_attachment(writer, record) @@ -126,6 +151,22 @@ def test_attachment_encode_decode(): assert parsed == record +def test_attachment_crc_mismatch() -> None: + record = AttachmentRecord( + log_time=1, + create_time=2, + name="file", + media_type="text/plain", + data=b"bad", + crc=0, + ) + writer = BytesWriter() + McapRecordWriter.write_attachment(writer, record) + reader = BytesReader(writer.as_bytes()) + with pytest.raises(CrcMismatchError): + McapRecordReader.parse_attachment(reader) + + def test_metadata_encode_decode(): record = MetadataRecord(name="meta", metadata={"k": "v"}) writer = BytesWriter()