Skip to content

Commit b6e5ccf

Browse files
committed
feat: Add support for BrotliDecode filter (PDF 2.0)
Implements the BrotliDecode filter as specified in ISO 32000-2:2020, Section 7.4.11. Adds necessary constants, integrates the filter into the decoding logic, includes brotli as an optional dependency, adds unit tests, and updates documentation. Closes py-pdf#3223
1 parent 96ba79c commit b6e5ccf

File tree

7 files changed

+103
-2
lines changed

7 files changed

+103
-2
lines changed

CHANGELOG.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# CHANGELOG
22

3+
## Unreleased
4+
5+
### New Features (ENH)
6+
- Add support for BrotliDecode filter (PDF 2.0) (#3223)
7+
38
## Version 5.4.0, 2025-03-16
49

510
### New Features (ENH)
@@ -1680,7 +1685,7 @@ e.g. Russian / Chinese / Japanese / Korean / Arabic.
16801685
### Documentation (DOC)
16811686
- Notes on annotations (#982)
16821687
- Who uses PyPDF2
1683-
- intendet \xe2\x9e\x94 in robustness page (#958)
1688+
- inteted \xe2\x9e\x94 in robustness page (#958)
16841689

16851690
### Maintenance (MAINT)
16861691
- pre-commit / requirements.txt updates (#977)

docs/modules/constants.rst

+15
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
Constants
22
---------
33

4+
.. autoclass:: pypdf.constants.StrEnum
5+
:members:
6+
:undoc-members:
7+
:show-inheritance:
8+
49
.. autoclass:: pypdf.constants.AnnotationFlag
510
:members:
611
:undoc-members:
@@ -26,3 +31,13 @@ Constants
2631
:undoc-members:
2732
:exclude-members: FT, Parent, Kids, T, TU, TM, V, DV, AA, Opt, attributes, attributes_dict
2833
:show-inheritance:
34+
35+
.. autoclass:: pypdf.constants.FilterTypes
36+
:members:
37+
:undoc-members:
38+
:show-inheritance:
39+
40+
.. autoclass:: pypdf.constants.FilterTypeAbbreviations
41+
:members:
42+
:undoc-members:
43+
:show-inheritance:

pypdf/constants.py

+2
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ class FilterTypes(StrEnum):
245245
CCITT_FAX_DECODE = "/CCITTFaxDecode" # abbreviation: CCF
246246
DCT_DECODE = "/DCTDecode" # abbreviation: DCT
247247
JPX_DECODE = "/JPXDecode"
248+
BROTLI_DECODE = "/BrotliDecode" # abbreviation: Br, PDF 2.0
248249

249250

250251
class FilterTypeAbbreviations:
@@ -257,6 +258,7 @@ class FilterTypeAbbreviations:
257258
RL = "/RL"
258259
CCF = "/CCF"
259260
DCT = "/DCT"
261+
BR = "/Br" # BrotliDecode
260262

261263

262264
class LzwFilterParameters:

pypdf/filters.py

+51
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@
6565
NullObject,
6666
)
6767

68+
try:
69+
import brotli
70+
except ImportError:
71+
brotli = None
72+
6873

6974
def decompress(data: bytes) -> bytes:
7075
"""
@@ -481,6 +486,50 @@ def decode(
481486
return data
482487

483488

489+
class BrotliDecode:
490+
"""Decodes Brotli-compressed data."""
491+
@staticmethod
492+
def decode(
493+
data: bytes,
494+
decode_parms: Optional[DictionaryObject] = None,
495+
**kwargs: Any,
496+
) -> bytes:
497+
"""
498+
Decode Brotli-compressed data.
499+
500+
Args:
501+
data: Brotli-compressed data.
502+
decode_parms: Optional parameters (unused).
503+
504+
Returns:
505+
Decompressed data.
506+
507+
Raises:
508+
PdfStreamError: If brotli library is not installed.
509+
"""
510+
if brotli is None:
511+
raise PdfStreamError("Brotli library not installed. Required for BrotliDecode filter.")
512+
return brotli.decompress(data)
513+
514+
@staticmethod
515+
def encode(data: bytes, **kwargs: Any) -> bytes:
516+
"""
517+
Encode data using Brotli compression.
518+
519+
Args:
520+
data: Data to compress.
521+
522+
Returns:
523+
Compressed data.
524+
525+
Raises:
526+
PdfStreamError: If brotli library is not installed.
527+
"""
528+
if brotli is None:
529+
raise PdfStreamError("Brotli library not installed. Required for BrotliDecode filter.")
530+
return brotli.compress(data)
531+
532+
484533
@dataclass
485534
class CCITTParameters:
486535
"""§7.4.6, optional parameters for the CCITTFaxDecode filter."""
@@ -666,6 +715,8 @@ def decode_stream_data(stream: Any) -> bytes:
666715
data = DCTDecode.decode(data)
667716
elif filter_name == FT.JPX_DECODE:
668717
data = JPXDecode.decode(data)
718+
elif filter_name == FT.BROTLI_DECODE: # Add BrotliDecode
719+
data = BrotliDecode.decode(data)
669720
elif filter_name == "/Crypt":
670721
if "/Name" in params or "/Type" in params:
671722
raise NotImplementedError(

pyproject.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,11 @@ Source = "https://github.com/py-pdf/pypdf"
4242
crypto = ["cryptography"]
4343
cryptodome = ["PyCryptodome"]
4444
image = ["Pillow>=8.0.0"]
45+
brotli = ["Brotli"] # Add brotli dependency
4546
full = [
4647
"cryptography",
47-
"Pillow>=8.0.0"
48+
"Pillow>=8.0.0",
49+
"Brotli", # Add brotli to full dependencies
4850
]
4951
dev = [
5052
"black",

requirements/dev.in

+1
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ pre-commit
44
pytest-cov
55
flit
66
wheel
7+
brotli

tests/test_filters.py

+25
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from itertools import product as cartesian_product
88
from pathlib import Path
99

10+
import brotli # noqa: F401
1011
import pytest
1112
from PIL import Image, ImageOps
1213

@@ -15,6 +16,7 @@
1516
from pypdf.filters import (
1617
ASCII85Decode,
1718
ASCIIHexDecode,
19+
BrotliDecode, # Add BrotliDecode
1820
CCITParameters,
1921
CCITTFaxDecode,
2022
CCITTParameters,
@@ -52,6 +54,29 @@ def test_flate_decode_encode(predictor, s):
5254
assert codec.decode(encoded, DictionaryObject({"/Predictor": predictor})) == s
5355

5456

57+
@pytest.mark.parametrize("s", filter_inputs)
58+
def test_brotli_decode_encode(s):
59+
"""BrotliDecode encode() and decode() methods work as expected."""
60+
codec = BrotliDecode()
61+
s_bytes = s.encode()
62+
encoded = codec.encode(s_bytes)
63+
assert encoded != s_bytes # Ensure encoding actually happened
64+
decoded = codec.decode(encoded)
65+
assert decoded == s_bytes
66+
67+
68+
def test_brotli_decode_without_brotli_installed(monkeypatch):
69+
"""Verify BrotliDecode raises PdfReadError if brotli is not installed."""
70+
# Simulate brotli not being installed within the filters module
71+
monkeypatch.setattr("pypdf.filters.brotli", None)
72+
73+
codec = BrotliDecode()
74+
with pytest.raises(PdfReadError) as exc_info:
75+
codec.decode(b"test data")
76+
77+
assert "Brotli library not installed. Required for BrotliDecode filter." in str(exc_info.value)
78+
79+
5580
def test_flatedecode_unsupported_predictor():
5681
"""
5782
FlateDecode raises PdfReadError for unsupported predictors.

0 commit comments

Comments
 (0)