Skip to content

Commit 7cff6e0

Browse files
ROB: Attempt to deal with non-rectangular FlateDecode streams (py-pdf#3245)
Closes py-pdf#3241.
1 parent 3841eae commit 7cff6e0

File tree

2 files changed

+25
-5
lines changed

2 files changed

+25
-5
lines changed

pypdf/filters.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,10 @@ def decode(
179179
@staticmethod
180180
def _decode_png_prediction(data: bytes, columns: int, rowlength: int) -> bytes:
181181
# PNG prediction can vary from row to row
182-
if len(data) % rowlength != 0:
183-
raise PdfReadError("Image data is not rectangular")
182+
if (remainder := len(data) % rowlength) != 0:
183+
logger_warning("Image data is not rectangular. Adding padding.", __name__)
184+
data += b"\x00" * (rowlength - remainder)
185+
assert len(data) % rowlength == 0
184186
output = []
185187
prev_rowdata = (0,) * rowlength
186188
bpp = (rowlength - 1) // columns # recomputed locally to not change params

tests/test_filters.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -374,9 +374,8 @@ def test_iss1787():
374374
obj = data.indirect_reference.get_object()
375375
obj["/DecodeParms"][NameObject("/Columns")] = NumberObject(1000)
376376
obj.decoded_self = None
377-
with pytest.raises(PdfReadError) as exc:
378-
reader.pages[0].images[0]
379-
assert exc.value.args[0] == "Image data is not rectangular"
377+
with pytest.raises(expected_exception=PdfReadError, match="^Unsupported PNG filter 244$"):
378+
_ = reader.pages[0].images[0]
380379

381380

382381
@pytest.mark.enable_socket
@@ -672,3 +671,22 @@ def test_flate_decode__image_is_none_due_to_size_limit(caplog):
672671
"Failed loading image: Image size (180000000 pixels) exceeds limit of "
673672
"178956970 pixels, could be decompression bomb DOS attack."
674673
) in caplog.messages
674+
675+
676+
@pytest.mark.enable_socket
677+
def test_flate_decode__not_rectangular(caplog):
678+
url = "https://github.com/user-attachments/files/19663603/issue3241_compressed.txt"
679+
name = "issue3241.txt"
680+
data = get_data_from_url(url, name=name)
681+
decode_parms = DictionaryObject()
682+
decode_parms[NameObject("/Predictor")] = NumberObject(15)
683+
decode_parms[NameObject("/Columns")] = NumberObject(4881)
684+
actual = FlateDecode.decode(data=data, decode_parms=decode_parms)
685+
actual_image = BytesIO()
686+
Image.frombytes(mode="1", size=(4881, 81), data=actual).save(actual_image, format="png")
687+
688+
url = "https://github.com/user-attachments/assets/c5695850-c076-4255-ab72-7c86851a4a04"
689+
name = "issue3241.png"
690+
expected = get_data_from_url(url, name=name)
691+
assert actual_image.getvalue() == expected
692+
assert caplog.messages == ["Image data is not rectangular. Adding padding."]

0 commit comments

Comments
 (0)