From 7c5cff3696d1b0f76f5860dd50fa41b733a5e26e Mon Sep 17 00:00:00 2001 From: PJBrs Date: Sat, 13 Sep 2025 16:29:18 +0200 Subject: [PATCH 01/14] ROB: _font: Always returns a FontDescriptor; fix typing mypy complained that the .from_font_resource method's return type is Optional[FontDescriptor]. Change the code to not confuse mypy. --- pypdf/_font.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pypdf/_font.py b/pypdf/_font.py index 06f78ea77..79fc0f96b 100644 --- a/pypdf/_font.py +++ b/pypdf/_font.py @@ -1,5 +1,4 @@ from dataclasses import dataclass, field -from typing import Optional from pypdf.generic import DictionaryObject @@ -29,10 +28,10 @@ class FontDescriptor: character_widths: dict[str, int] = field(default_factory=dict) @classmethod - def from_font_resource(cls, pdf_font_dict: DictionaryObject) -> "Optional[FontDescriptor]": + def from_font_resource(cls, pdf_font_dict: DictionaryObject) -> "FontDescriptor": from pypdf._codecs.core_fontmetrics import CORE_FONT_METRICS # noqa: PLC0415 # Prioritize information from the PDF font dictionary - font_name = pdf_font_dict.get("/BaseFont", "Unknown") - if font_name[1:] in CORE_FONT_METRICS: - return CORE_FONT_METRICS.get(font_name[1:]) + font_name = pdf_font_dict.get("/BaseFont", "Unknown").removeprefix("/") + if font_name in CORE_FONT_METRICS: + return CORE_FONT_METRICS[font_name] return cls(name=font_name) From 90f0ed8ca54c9c183b79539266bd207ef3e7c91a Mon Sep 17 00:00:00 2001 From: PJBrs Date: Sun, 14 Sep 2025 16:15:54 +0200 Subject: [PATCH 02/14] ENH: _font: Add method to calculate text width This adds a method to calculate the width of a text string. This method can later be used to wrap text at a certain length. Code blatantly copied from the _font.py file in the text extractor code. --- pypdf/_font.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pypdf/_font.py b/pypdf/_font.py index 79fc0f96b..5f8d98d2f 100644 --- a/pypdf/_font.py +++ b/pypdf/_font.py @@ -35,3 +35,9 @@ def from_font_resource(cls, pdf_font_dict: DictionaryObject) -> "FontDescriptor" if font_name in CORE_FONT_METRICS: return CORE_FONT_METRICS[font_name] return cls(name=font_name) + + def text_width(self, text: str) -> float: + """Sum of character widths specified in PDF font for the supplied text.""" + return sum( + [self.character_widths.get(char, self.character_widths.get("default", 0)) for char in text], 0.0 + ) From 733852b34af313c843f3af5608eca99d245c24f0 Mon Sep 17 00:00:00 2001 From: PJBrs Date: Wed, 24 Sep 2025 18:12:57 +0200 Subject: [PATCH 03/14] ENH: TextAppearanceStream: Add method to scale and wrap text This patch adds a method to scale and wrap text, depending on whether or not text is allowed to be wrapped. It takes a couple of arguments, including the text string itself, field width and height, font size, a FontDescriptor with character widths, and a bool specifying whether or not text is allowed to wrap. Returns the text in in the form of list of tuples, each tuple containing the length of a line and its contents, and the font size for these lines and lengths. --- pypdf/generic/_appearance_stream.py | 99 +++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/pypdf/generic/_appearance_stream.py b/pypdf/generic/_appearance_stream.py index 43e9c1657..f6ce3166e 100644 --- a/pypdf/generic/_appearance_stream.py +++ b/pypdf/generic/_appearance_stream.py @@ -2,6 +2,7 @@ from typing import Any, Optional, Union, cast from .._cmap import _default_fonts_space_width, build_char_map_from_dict +from .._font import FontDescriptor from .._utils import logger_warning from ..constants import AnnotationDictionaryAttributes, FieldDictionaryAttributes from ..generic import ( @@ -25,6 +26,104 @@ class TextStreamAppearance(DecodedStreamObject): like font, font size, color, multiline text, and text selection highlighting. """ + def _scale_text( + self, + font_descriptor: FontDescriptor, + font_size: float, + field_width: float, + field_height: float, + text: str, + is_multiline: bool, + min_font_size: float = 4.0, # Minimum font size to attempt + font_size_step: float = 0.2 # How much to decrease font size by each step + ) -> tuple[list[tuple[float, str]], float]: + """ + Takes a piece of text and scales it to field_width or field_height, given font_name + and font_size. For multiline fields, adds newlines to wrap the text. + + Args: + font_descriptor: A FontDescriptor for the font to be used. + font_size: The font size in points. + field_width: The width of the field in which to fit the text. + field_height: The height of the field in which to fit the text. + text: The text to fit with the field. + is_multiline: Whether to scale and wrap the text, or only to scale. + min_font_size: The minimum font size at which to scale the text. + font_size_step: The amount by which to decrement font size per step while scaling. + + Returns: + The text in the form of list of tuples, each tuple containing the length of a line + and its contents, and the font_size for these lines and lengths. + """ + # Single line: + if not is_multiline: + test_width = font_descriptor.text_width(text) * font_size / 1000 + if test_width > field_width or font_size > field_height: + new_font_size = font_size - font_size_step + if new_font_size >= min_font_size: + # Text overflows height; Retry with smaller font size. + return self._scale_text( + font_descriptor, + round(new_font_size, 1), + field_width, + field_height, + text, + is_multiline, + min_font_size, + font_size_step + ) + return [(test_width, text)], font_size + # Multiline: + orig_text = text + paragraphs = text.replace("\n", "\r").split("\r") + wrapped_lines = [] + current_line_words: list[str] = [] + current_line_width: float = 0 + space_width = font_descriptor.text_width(" ") * font_size / 1000 + for paragraph in paragraphs: + if not paragraph.strip(): + wrapped_lines.append((0.0, "")) + continue + words = paragraph.split(" ") + for i, word in enumerate(words): + word_width = font_descriptor.text_width(word) * font_size / 1000 + test_width = current_line_width + word_width + (space_width if i else 0) + if test_width > field_width and current_line_words: + wrapped_lines.append((current_line_width, " ".join(current_line_words))) + current_line_words = [word] + current_line_width = word_width + elif not current_line_words and word_width > field_width: + wrapped_lines.append((word_width, word)) + current_line_words = [] + current_line_width = 0 + else: + if current_line_words: + current_line_width += space_width + current_line_words.append(word) + current_line_width += word_width + if current_line_words: + wrapped_lines.append((current_line_width, " ".join(current_line_words))) + current_line_words = [] + current_line_width = 0 + # Estimate total height. + # Assumes line spacing of 1.4 + estimated_total_height = font_size + (len(wrapped_lines) - 1) * 1.4 * font_size + if estimated_total_height > field_height: + # Text overflows height; Retry with smaller font size. + new_font_size = font_size - font_size_step + if new_font_size >= min_font_size: + return self._scale_text( + font_descriptor, + round(new_font_size, 1), + field_width, + field_height, + orig_text, + is_multiline, + min_font_size, + font_size_step + ) + return wrapped_lines, font_size + def _generate_appearance_stream_data( self, text: str = "", From 148418f31a8151d3a029cb4182ce346a1fbeff36 Mon Sep 17 00:00:00 2001 From: PJBrs Date: Wed, 24 Sep 2025 20:13:53 +0200 Subject: [PATCH 04/14] ENH: TextAppearanceStream: Scale and wrap text This patch scales and/or wrap text that does not fit into a text field unaltered, under the condition that font size was set to 0 in the default appearance stream. We only wrap text if the multiline bit was set in the corresponding annotation's field flags, otherwise we just scale the font until it fits. We move the escaping of parentheses below, so that it does not interfere with calculating the width of a text string. --- pypdf/generic/_appearance_stream.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/pypdf/generic/_appearance_stream.py b/pypdf/generic/_appearance_stream.py index f6ce3166e..f7b6c6544 100644 --- a/pypdf/generic/_appearance_stream.py +++ b/pypdf/generic/_appearance_stream.py @@ -2,6 +2,7 @@ from typing import Any, Optional, Union, cast from .._cmap import _default_fonts_space_width, build_char_map_from_dict +from .._codecs.core_fontmetrics import CORE_FONT_METRICS from .._font import FontDescriptor from .._utils import logger_warning from ..constants import AnnotationDictionaryAttributes, FieldDictionaryAttributes @@ -129,6 +130,7 @@ def _generate_appearance_stream_data( text: str = "", selection: Optional[list[str]] = None, rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0), + font_descriptor: Optional[FontDescriptor] = None, font_glyph_byte_map: Optional[dict[str, bytes]] = None, font_name: str = "/Helv", font_size: float = 0.0, @@ -163,13 +165,31 @@ def _generate_appearance_stream_data( font_glyph_byte_map = font_glyph_byte_map or {} if isinstance(rectangle, tuple): rectangle = RectangleObject(rectangle) + font_descriptor = cast(FontDescriptor, font_descriptor) # If font_size is 0, apply the logic for multiline or large-as-possible font if font_size == 0: + if selection: # Don't wrap text when dealing with a /Ch field, in order to prevent problems + is_multiline = False # with matching "selection" with "line" later on. if is_multiline: font_size = DEFAULT_FONT_SIZE_IN_MULTILINE else: font_size = rectangle.height - 2 + lines, font_size = self._scale_text( + font_descriptor, + font_size, + rectangle.width - 3, # One point margin left and right, and an additional point because the first + # offset takes one extra point (see below, under "line_number == 0:") + rectangle.height - 3, # One point margin for top and bottom, one point extra for the first line + # (see y_offset) + text, + is_multiline, + ) + else: + lines = [( + font_descriptor.text_width(line) * font_size / 1000, + line + ) for line in text.replace("\n", "\r").split("\r")] # Set the vertical offset y_offset = rectangle.height - 1 - font_size @@ -180,7 +200,7 @@ def _generate_appearance_stream_data( f"re\nW\nBT\n{default_appearance}\n" ).encode() - for line_number, line in enumerate(text.replace("\n", "\r").split("\r")): + for line_number, (line_width, line) in enumerate(lines): if selection and line in selection: # Might be improved, but cannot find how to get fill working => replaced with lined box ap_stream += ( @@ -237,6 +257,7 @@ def __init__( # If a font resource was added, get the font character map if font_resource: font_resource = cast(DictionaryObject, font_resource.get_object()) + font_descriptor = FontDescriptor.from_font_resource(font_resource) _font_subtype, _, font_encoding, font_map = build_char_map_from_dict( 200, font_resource ) @@ -257,11 +278,13 @@ def __init__( else: logger_warning(f"Font dictionary for {font_name} not found.", __name__) font_glyph_byte_map = {} + font_descriptor = FontDescriptor() ap_stream_data = self._generate_appearance_stream_data( text, selection, rectangle, + font_descriptor, font_glyph_byte_map, font_name, font_size, From d928c4be4108f3a7afd214b585c6e6223c14b318 Mon Sep 17 00:00:00 2001 From: PJBrs Date: Thu, 25 Sep 2025 11:11:08 +0200 Subject: [PATCH 05/14] ROB: TextAppearanceStream: Add default font resource Make sure that we always have Helvetica as a viable font resource, for which we surely have all necessary font metrics needed for text wrapping. --- pypdf/generic/_appearance_stream.py | 60 ++++++++++++++++------------- tests/test_writer.py | 4 +- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/pypdf/generic/_appearance_stream.py b/pypdf/generic/_appearance_stream.py index f7b6c6544..e4981ad4a 100644 --- a/pypdf/generic/_appearance_stream.py +++ b/pypdf/generic/_appearance_stream.py @@ -258,27 +258,36 @@ def __init__( if font_resource: font_resource = cast(DictionaryObject, font_resource.get_object()) font_descriptor = FontDescriptor.from_font_resource(font_resource) - _font_subtype, _, font_encoding, font_map = build_char_map_from_dict( - 200, font_resource - ) - try: # remove width stored in -1 key - del font_map[-1] - except KeyError: - pass - font_glyph_byte_map: dict[str, bytes] - if isinstance(font_encoding, str): - font_glyph_byte_map = { - v: k.encode(font_encoding) for k, v in font_map.items() - } - else: - font_glyph_byte_map = {v: bytes((k,)) for k, v in font_encoding.items()} - font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()} - for key, value in font_map.items(): - font_glyph_byte_map[value] = font_encoding_rev.get(key, key) else: - logger_warning(f"Font dictionary for {font_name} not found.", __name__) - font_glyph_byte_map = {} - font_descriptor = FontDescriptor() + logger_warning(f"Font dictionary for {font_name} not found; defaulting to Helvetica.", __name__) + font_name = "/Helv" + font_resource = DictionaryObject({ + NameObject("/Subtype"): NameObject("/Type1"), + NameObject("/Name"): NameObject("/Helv"), + NameObject("/Type"): NameObject("/Font"), + NameObject("/BaseFont"): NameObject("/Helvetica"), + NameObject("/Encoding"): NameObject("/WinAnsiEncoding") + }) + font_descriptor = CORE_FONT_METRICS["Helvetica"] + + # Get the font glyph data + _font_subtype, _, font_encoding, font_map = build_char_map_from_dict( + 200, font_resource + ) + try: # remove width stored in -1 key + del font_map[-1] + except KeyError: + pass + font_glyph_byte_map: dict[str, bytes] + if isinstance(font_encoding, str): + font_glyph_byte_map = { + v: k.encode(font_encoding) for k, v in font_map.items() + } + else: + font_glyph_byte_map = {v: bytes((k,)) for k, v in font_encoding.items()} + font_encoding_rev = {v: bytes((k,)) for k, v in font_encoding.items()} + for key, value in font_map.items(): + font_glyph_byte_map[value] = font_encoding_rev.get(key, key) ap_stream_data = self._generate_appearance_stream_data( text, @@ -297,13 +306,12 @@ def __init__( self[NameObject("/BBox")] = RectangleObject(rectangle) self.set_data(ByteStringObject(ap_stream_data)) self[NameObject("/Length")] = NumberObject(len(ap_stream_data)) - # Update Resources with font information if necessary - if font_resource is not None: - self[NameObject("/Resources")] = DictionaryObject({ - NameObject("/Font"): DictionaryObject({ - NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource) - }) + # Update Resources with font information + self[NameObject("/Resources")] = DictionaryObject({ + NameObject("/Font"): DictionaryObject({ + NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource) }) + }) @classmethod def from_text_annotation( diff --git a/tests/test_writer.py b/tests/test_writer.py index c76f76a44..be4c7d9c5 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2427,7 +2427,7 @@ def test_no_resource_for_14_std_fonts(caplog): writer.update_page_form_field_values( p, {a["/T"]: "Brooks"}, auto_regenerate=False ) - assert "Font dictionary for /Helvetica not found." in caplog.text + assert "Font dictionary for /Helvetica not found; defaulting to Helvetica." in caplog.text @pytest.mark.enable_socket @@ -2439,7 +2439,7 @@ def test_field_box_upside_down(): writer.update_page_form_field_values(None, {"FreightTrainMiles": "0"}) assert writer.pages[0]["/Annots"][13].get_object()["/AP"]["/N"].get_data() == ( b"q\n/Tx BMC \nq\n1 1 105.29520000000001 10.835000000000036 re\n" - b"W\nBT\n/Arial 8.0 Tf 0 g\n2 2.8350000000000364 Td\n(0) Tj\nET\n" + b"W\nBT\n/Helv 8.0 Tf 0 g\n2 2.8350000000000364 Td\n(0) Tj\nET\n" b"Q\nEMC\nQ\n" ) box = writer.pages[0]["/Annots"][13].get_object()["/AP"]["/N"]["/BBox"] From 03c2e65b59673b578574865aecb1e26f8611be9f Mon Sep 17 00:00:00 2001 From: PJBrs Date: Sat, 8 Nov 2025 09:22:32 +0100 Subject: [PATCH 06/14] ENH: _appearance_stream: Add IntEnum for aligment options This adds a class TextAlignment that defines alignment options. --- pypdf/generic/_appearance_stream.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pypdf/generic/_appearance_stream.py b/pypdf/generic/_appearance_stream.py index e4981ad4a..d759483b7 100644 --- a/pypdf/generic/_appearance_stream.py +++ b/pypdf/generic/_appearance_stream.py @@ -1,4 +1,5 @@ import re +from enum import IntEnum from typing import Any, Optional, Union, cast from .._cmap import _default_fonts_space_width, build_char_map_from_dict @@ -18,6 +19,14 @@ DEFAULT_FONT_SIZE_IN_MULTILINE = 12 +class TextAlignment(IntEnum): + """Defines the alignment options for text within a form field's appearance stream.""" + + LEFT = 0 + CENTER = 1 + RIGHT = 2 + + class TextStreamAppearance(DecodedStreamObject): """ A class representing the appearance stream for a text-based form field. From 8f87395e3eacbd4da995ba9122ce71bc4c1e52f9 Mon Sep 17 00:00:00 2001 From: PJBrs Date: Wed, 24 Sep 2025 20:26:55 +0200 Subject: [PATCH 07/14] ENH: TextAppearanceStream: Add right alignment and centering This patch changes the TextAppearanceStream code so that it can deal with right alignment and centered text. Note that both require correct font metrics in order to work. --- pypdf/generic/_appearance_stream.py | 46 ++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/pypdf/generic/_appearance_stream.py b/pypdf/generic/_appearance_stream.py index d759483b7..3174904e4 100644 --- a/pypdf/generic/_appearance_stream.py +++ b/pypdf/generic/_appearance_stream.py @@ -144,7 +144,8 @@ def _generate_appearance_stream_data( font_name: str = "/Helv", font_size: float = 0.0, font_color: str = "0 g", - is_multiline: bool = False + is_multiline: bool = False, + alignment: TextAlignment = TextAlignment.LEFT ) -> bytes: """ Generates the raw bytes of the PDF appearance stream for a text field. @@ -166,6 +167,7 @@ def _generate_appearance_stream_data( font_color: The color to apply to the font, represented as a PDF graphics state string (e.g., "0 g" for black). is_multiline: A boolean indicating if the text field is multiline. + alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER Returns: A byte string containing the PDF content stream data. @@ -188,7 +190,7 @@ def _generate_appearance_stream_data( font_descriptor, font_size, rectangle.width - 3, # One point margin left and right, and an additional point because the first - # offset takes one extra point (see below, under "line_number == 0:") + # offset takes one extra point (see below, "desired_abs_x_start") rectangle.height - 3, # One point margin for top and bottom, one point extra for the first line # (see y_offset) text, @@ -208,6 +210,7 @@ def _generate_appearance_stream_data( f"q\n/Tx BMC \nq\n1 1 {rectangle.width - 1} {rectangle.height - 1} " f"re\nW\nBT\n{default_appearance}\n" ).encode() + current_x_pos: float = 0 # Initial virtual position within the text object. for line_number, (line_width, line) in enumerate(lines): if selection and line in selection: @@ -216,11 +219,33 @@ def _generate_appearance_stream_data( f"1 {y_offset - (line_number * font_size * 1.4) - 1} {rectangle.width - 2} {font_size + 2} re\n" f"0.5 0.5 0.5 rg s\n{default_appearance}\n" ).encode() + + # Calculate the desired absolute starting X for the current line + desired_abs_x_start: float = 0 + if alignment == TextAlignment.RIGHT: + desired_abs_x_start = rectangle.width - 2 - line_width + elif alignment == TextAlignment.CENTER: + desired_abs_x_start = (rectangle.width - line_width) / 2 + else: # Left aligned; default + desired_abs_x_start = 2 + # Calculate x_rel_offset: how much to move from the current_x_pos + # to reach the desired_abs_x_start. + x_rel_offset = desired_abs_x_start - current_x_pos + + # Y-offset: + y_rel_offset: float = 0 if line_number == 0: - ap_stream += f"2 {y_offset} Td\n".encode() + y_rel_offset = y_offset # Initial vertical position else: - # Td is a relative translation - ap_stream += f"0 {-font_size * 1.4} Td\n".encode() + y_rel_offset = - font_size * 1.4 # Move down by line height + + # Td is a relative translation (Tx and Ty). + # It updates the current text position. + ap_stream += f"{x_rel_offset} {y_rel_offset} Td\n".encode() + # Update current_x_pos based on the Td operation for the next iteration. + # This is the X position where the *current line* will start. + current_x_pos = desired_abs_x_start + encoded_line: list[bytes] = [ font_glyph_byte_map.get(c, c.encode("utf-16-be")) for c in line ] @@ -240,7 +265,8 @@ def __init__( font_name: str = "/Helv", font_size: float = 0.0, font_color: str = "0 g", - is_multiline: bool = False + is_multiline: bool = False, + alignment: TextAlignment = TextAlignment.LEFT ) -> None: """ Initializes a TextStreamAppearance object. @@ -259,6 +285,7 @@ def __init__( font_size: The font size. If 0, it's auto-calculated. font_color: The font color string. is_multiline: A boolean indicating if the text field is multiline. + alignment: Left-aligned (0), centered (1) or right-aligned (2) text. """ super().__init__() @@ -307,7 +334,8 @@ def __init__( font_name, font_size, font_color, - is_multiline + is_multiline, + alignment ) self[NameObject("/Type")] = NameObject("/XObject") @@ -414,6 +442,7 @@ def from_text_annotation( # Retrieve field text, selected values and formatting information is_multiline = False field_flags = field.get(FieldDictionaryAttributes.Ff, 0) + alignment = field.get("/Q", 0) if field_flags & FieldDictionaryAttributes.FfBits.Multiline: is_multiline = True if ( @@ -440,7 +469,8 @@ def from_text_annotation( font_name, font_size, font_color, - is_multiline + is_multiline, + alignment ) if AnnotationDictionaryAttributes.AP in annotation: for key, value in ( From 13d34401ce63611c318c0db37d34462c80f6446d Mon Sep 17 00:00:00 2001 From: PJBrs Date: Thu, 25 Sep 2025 09:11:52 +0200 Subject: [PATCH 08/14] MAINT: TextAppearanceStream: Don't use _default_fonts_space_width We need the info that is in CORE_FONT_METRICS, and that is the same information as in _default_fonts_space_width anyway. So this patch removes a bit of redundancy. --- pypdf/generic/_appearance_stream.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pypdf/generic/_appearance_stream.py b/pypdf/generic/_appearance_stream.py index 3174904e4..ce6bf81dc 100644 --- a/pypdf/generic/_appearance_stream.py +++ b/pypdf/generic/_appearance_stream.py @@ -2,7 +2,7 @@ from enum import IntEnum from typing import Any, Optional, Union, cast -from .._cmap import _default_fonts_space_width, build_char_map_from_dict +from .._cmap import build_char_map_from_dict from .._codecs.core_fontmetrics import CORE_FONT_METRICS from .._font import FontDescriptor from .._utils import logger_warning @@ -427,8 +427,8 @@ def from_text_annotation( ).get_object(), ) document_font_resources = document_resources.get("/Font", DictionaryObject()).get_object() - # _default_fonts_space_width keys is the list of Standard fonts - if font_name not in document_font_resources and font_name not in _default_fonts_space_width: + # CORE_FONT_METRICS is the dict with Standard font metrics + if font_name not in document_font_resources and font_name.removeprefix("/") not in CORE_FONT_METRICS: # ...or AcroForm dictionary document_resources = cast( dict[Any, Any], From b67b92153e3e2a8573129ee16596782c45b48ede Mon Sep 17 00:00:00 2001 From: PJBrs Date: Thu, 25 Sep 2025 22:00:04 +0200 Subject: [PATCH 09/14] ENH: tests: _appearance_stream Add tests for the TextStreamAppearance. --- tests/test_appearance_stream.py | 62 +++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 tests/test_appearance_stream.py diff --git a/tests/test_appearance_stream.py b/tests/test_appearance_stream.py new file mode 100644 index 000000000..2aba3cdf9 --- /dev/null +++ b/tests/test_appearance_stream.py @@ -0,0 +1,62 @@ +"""Test the pypdf.generic._appearance_stream module.""" + +from pypdf.generic._appearance_stream import TextStreamAppearance + + +def test_scale_text(): + rectangle = (0, 0, 9.1, 55.4) + font_size = 10.1 + text = "Hello World" + is_multiline = False + appearance_stream = TextStreamAppearance( + text, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline + ) + assert b"10.1 Tf" in appearance_stream.get_data() + text = "This is a very very long sentence that probably will scale below the minimum font size" + font_size = 0.0 + appearance_stream = TextStreamAppearance( + text, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline + ) + assert b"4.0 Tf" in appearance_stream.get_data() + rectangle = (0, 0, 160, 360) + font_size = 0.0 + text = """Welcome to pypdf +pypdf is a free and open source pure-python PDF library capable of splitting, merging, cropping, and +transforming the pages of PDF files. It can also add custom data, viewing options, and passwords to PDF +files. pypdf can retrieve text and metadata from PDFs as well. + +See pdfly for a CLI application that uses pypdf to interact with PDFs. + """ + is_multiline = True + appearance_stream = TextStreamAppearance( + text, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline + ) + assert b"12 Tf" in appearance_stream.get_data() + assert b"pypdf is a free and open" in appearance_stream.get_data() + rectangle = (0, 0, 160, 160) + appearance_stream = TextStreamAppearance( + text, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline + ) + assert b"8.8 Tf" in appearance_stream.get_data() + rectangle = (0, 0, 160, 12) + appearance_stream = TextStreamAppearance( + text, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline + ) + text = """Option A +Option B +Option C +Option D +""" + selection = "Option A" + assert b"4.0 Tf" in appearance_stream.get_data() + text = "pneumonoultramicroscopicsilicovolcanoconiosis" + appearance_stream = TextStreamAppearance( + text, selection, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline + ) + assert b"7.2 Tf" in appearance_stream.get_data() + rectangle = (0, 0, 10, 100) + text = "OneWord" + appearance_stream = TextStreamAppearance( + text, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline + ) + assert b"OneWord" in appearance_stream.get_data() From 13c3839b3208a508d8e428ece7c86ea8a303865f Mon Sep 17 00:00:00 2001 From: PJBrs Date: Fri, 26 Sep 2025 10:11:33 +0200 Subject: [PATCH 10/14] ENH: docs: Add documentation about flattening a PDF form --- docs/user/forms.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/user/forms.md b/docs/user/forms.md index 568bf0210..b8549eb93 100644 --- a/docs/user/forms.md +++ b/docs/user/forms.md @@ -46,6 +46,12 @@ parameter is `True` by default for legacy compatibility, but this flags the PDF processor to recompute the field's rendering, and may trigger a "save changes" dialog for users who open the generated PDF. +If you want to flatten your form, that is, keeping all form field contents while +removing the form fields themselves, you can set the `flatten` parameter in +`update_page_form_field_values()` to `True`, to convert form field contents to +regular PDF content, and then use `writer.remove_annotations(subtypes="/Widget")` +to remove all form fields. This will result in a flattened PDF. + ## Some notes about form fields and annotations PDF forms have a dual-nature approach to the fields: From 60553136be74afd03e3a191d9679e104036651d2 Mon Sep 17 00:00:00 2001 From: PJBrs Date: Wed, 5 Nov 2025 17:57:57 +0100 Subject: [PATCH 11/14] MAINT: Add myself as contributor --- CONTRIBUTORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 87fe2261e..96fa9b17d 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -13,6 +13,7 @@ history and [GitHub's 'Contributors' feature](https://github.com/py-pdf/pypdf/gr * [abyesilyurt](https://github.com/abyesilyurt) * [ArkieCoder](https://github.com/ArkieCoder) +* [Beers, PJ](https://github.com/PJBrs) * [Clauss, Christian](https://github.com/cclauss) * [DL6ER](https://github.com/DL6ER) * [Duy, Phan Thanh](https://github.com/zuypt) From ef1101932c31d142e9bea0466f2fbb4be743aac8 Mon Sep 17 00:00:00 2001 From: PJBrs Date: Mon, 10 Nov 2025 18:10:07 +0100 Subject: [PATCH 12/14] MAINT: tests/test_appearance_stream.py: Newline after assert statements --- tests/test_appearance_stream.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_appearance_stream.py b/tests/test_appearance_stream.py index 2aba3cdf9..da59807c2 100644 --- a/tests/test_appearance_stream.py +++ b/tests/test_appearance_stream.py @@ -12,12 +12,14 @@ def test_scale_text(): text, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline ) assert b"10.1 Tf" in appearance_stream.get_data() + text = "This is a very very long sentence that probably will scale below the minimum font size" font_size = 0.0 appearance_stream = TextStreamAppearance( text, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline ) assert b"4.0 Tf" in appearance_stream.get_data() + rectangle = (0, 0, 160, 360) font_size = 0.0 text = """Welcome to pypdf @@ -33,11 +35,13 @@ def test_scale_text(): ) assert b"12 Tf" in appearance_stream.get_data() assert b"pypdf is a free and open" in appearance_stream.get_data() + rectangle = (0, 0, 160, 160) appearance_stream = TextStreamAppearance( text, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline ) assert b"8.8 Tf" in appearance_stream.get_data() + rectangle = (0, 0, 160, 12) appearance_stream = TextStreamAppearance( text, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline @@ -49,11 +53,13 @@ def test_scale_text(): """ selection = "Option A" assert b"4.0 Tf" in appearance_stream.get_data() + text = "pneumonoultramicroscopicsilicovolcanoconiosis" appearance_stream = TextStreamAppearance( text, selection, rectangle=rectangle, font_size=font_size, is_multiline=is_multiline ) assert b"7.2 Tf" in appearance_stream.get_data() + rectangle = (0, 0, 10, 100) text = "OneWord" appearance_stream = TextStreamAppearance( From 84b656cd9af82d9b2acd580e900b18df6665bb6e Mon Sep 17 00:00:00 2001 From: PJBrs Date: Wed, 12 Nov 2025 08:24:35 +0100 Subject: [PATCH 13/14] MAINT: Make more use of TextAligenment IntEnum --- pypdf/generic/_appearance_stream.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pypdf/generic/_appearance_stream.py b/pypdf/generic/_appearance_stream.py index ce6bf81dc..3a05a94d6 100644 --- a/pypdf/generic/_appearance_stream.py +++ b/pypdf/generic/_appearance_stream.py @@ -167,7 +167,7 @@ def _generate_appearance_stream_data( font_color: The color to apply to the font, represented as a PDF graphics state string (e.g., "0 g" for black). is_multiline: A boolean indicating if the text field is multiline. - alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER + alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER. Returns: A byte string containing the PDF content stream data. @@ -285,7 +285,7 @@ def __init__( font_size: The font size. If 0, it's auto-calculated. font_color: The font color string. is_multiline: A boolean indicating if the text field is multiline. - alignment: Left-aligned (0), centered (1) or right-aligned (2) text. + alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER. """ super().__init__() @@ -442,7 +442,7 @@ def from_text_annotation( # Retrieve field text, selected values and formatting information is_multiline = False field_flags = field.get(FieldDictionaryAttributes.Ff, 0) - alignment = field.get("/Q", 0) + alignment = field.get("/Q", TextAlignment.LEFT) if field_flags & FieldDictionaryAttributes.FfBits.Multiline: is_multiline = True if ( From e4231bcd8b35623a5a20235a097ef624c09f64ca Mon Sep 17 00:00:00 2001 From: PJBrs Date: Wed, 12 Nov 2025 17:35:46 +0100 Subject: [PATCH 14/14] MAINT: docs: Simplify flattening documentation --- docs/user/forms.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/user/forms.md b/docs/user/forms.md index b8549eb93..3e1f8c08d 100644 --- a/docs/user/forms.md +++ b/docs/user/forms.md @@ -48,9 +48,10 @@ dialog for users who open the generated PDF. If you want to flatten your form, that is, keeping all form field contents while removing the form fields themselves, you can set the `flatten` parameter in -`update_page_form_field_values()` to `True`, to convert form field contents to -regular PDF content, and then use `writer.remove_annotations(subtypes="/Widget")` -to remove all form fields. This will result in a flattened PDF. +{func}`~pypdf.PdfWriter.update_page_form_field_values` to `True`. This +will convert form field contents to regular PDF content. Afterwards, use +{func}`~pypdf.PdfWriter.remove_annotations` with `subtypes="/Widget"` +to remove all form fields to get an actual flattened PDF. ## Some notes about form fields and annotations