diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/AltoXmlTextExporter.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/AltoXmlTextExporter.cs index c259f8338..7ec83ec8a 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/AltoXmlTextExporter.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/AltoXmlTextExporter.cs @@ -219,7 +219,7 @@ private AltoDocument.AltoGraphicalElement ToAltoGraphicalElement(PdfPath pdfPath private AltoDocument.AltoIllustration ToAltoIllustration(IPdfImage pdfImage, double height) { illustrationCount++; - var rectangle = pdfImage.Bounds; + var rectangle = pdfImage.BoundingBox; return new AltoDocument.AltoIllustration { diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/HOcrTextExporter.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/HOcrTextExporter.cs index 7b0fd33f7..22a434646 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/HOcrTextExporter.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/HOcrTextExporter.cs @@ -273,7 +273,7 @@ private string GetCode(PdfPath path, double pageHeight, bool subPaths, int level private string GetCode(IPdfImage pdfImage, double pageHeight, int level) { imageCount++; - var bbox = pdfImage.Bounds; + var bbox = pdfImage.BoundingBox; return GetIndent(level) + ""; } diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/PageXmlTextExporter.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/PageXmlTextExporter.cs index 47337bba3..c1d6f44f8 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/PageXmlTextExporter.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/PageXmlTextExporter.cs @@ -273,7 +273,7 @@ private PageXmlDocument.PageXmlLineDrawingRegion ToPageXmlLineDrawingRegion(PdfP private PageXmlDocument.PageXmlImageRegion ToPageXmlImageRegion(IPdfImage pdfImage, PageXmlData data, double pageWidth, double pageHeight) { data.RegionsCount++; - var bbox = pdfImage.Bounds; + var bbox = pdfImage.BoundingBox; return new PageXmlDocument.PageXmlImageRegion() { Coords = ToCoords(bbox, pageWidth, pageHeight), diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextBlock.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextBlock.cs index eb7bf75be..80b4dfa6a 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextBlock.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextBlock.cs @@ -9,8 +9,13 @@ /// /// A block of text. /// - public class TextBlock - { + public class TextBlock: ILettersBlock + { + /// + /// The letters contained in this TextBlock + /// + public IReadOnlyList Letters { get; } + /// /// The separator used between lines in the block. /// @@ -63,6 +68,7 @@ public TextBlock(IReadOnlyList lines, string separator = "\n") ReadingOrder = -1; TextLines = lines; + Letters = lines.SelectMany(tl => tl.Words).SelectMany(w => w.Letters).ToList().AsReadOnly(); if (lines.Count == 1) { diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs index 322780f3d..873c062c0 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs @@ -9,8 +9,13 @@ /// /// A line of text. /// - public class TextLine + public class TextLine : ILettersBlock { + /// + /// The letters contained in this TextLine + /// + public IReadOnlyList Letters { get; } + /// /// The separator used between words in the line. /// @@ -56,6 +61,7 @@ public TextLine(IReadOnlyList words, string separator = " ") Separator = separator; Words = words; + Letters = words.SelectMany(w => w.Letters).ToList().AsReadOnly(); if (Words.Count == 1) { diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WhitespaceCoverExtractor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WhitespaceCoverExtractor.cs index 395780ede..68be72a8e 100644 --- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WhitespaceCoverExtractor.cs +++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WhitespaceCoverExtractor.cs @@ -51,7 +51,7 @@ public static IReadOnlyList GetWhitespaces(IEnumerable words if (images?.Any() == true) { - bboxes.AddRange(images.Where(w => w.Bounds.Width > 0 && w.Bounds.Height > 0).Select(o => o.Bounds)); + bboxes.AddRange(images.Where(w => w.BoundingBox.Width > 0 && w.BoundingBox.Height > 0).Select(o => o.BoundingBox)); } return GetWhitespaces(bboxes, diff --git a/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs b/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs index 22e6e5eb0..de53fe2d2 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs @@ -26,29 +26,29 @@ public void ImagesHaveCorrectDimensionsAndLocations() { var page = document.GetPage(1); - var images = page.GetImages().OrderBy(x => x.Bounds.Width).ToList(); + var images = page.GetImages().OrderBy(x => x.BoundingBox.Width).ToList(); var pdfPigSquare = images[0]; - Assert.Equal(148.3d, pdfPigSquare.Bounds.Width, doubleComparer); - Assert.Equal(148.3d, pdfPigSquare.Bounds.Height, doubleComparer); - Assert.Equal(60.1d, pdfPigSquare.Bounds.Left, doubleComparer); - Assert.Equal(765.8d, pdfPigSquare.Bounds.Top, doubleComparer); + Assert.Equal(148.3d, pdfPigSquare.BoundingBox.Width, doubleComparer); + Assert.Equal(148.3d, pdfPigSquare.BoundingBox.Height, doubleComparer); + Assert.Equal(60.1d, pdfPigSquare.BoundingBox.Left, doubleComparer); + Assert.Equal(765.8d, pdfPigSquare.BoundingBox.Top, doubleComparer); var pdfPigSquished = images[1]; - Assert.Equal(206.8d, pdfPigSquished.Bounds.Width, doubleComparer); - Assert.Equal(83.2d, pdfPigSquished.Bounds.Height, doubleComparer); - Assert.Equal(309.8d, pdfPigSquished.Bounds.Left, doubleComparer); - Assert.Equal(552.1d, pdfPigSquished.Bounds.Top, doubleComparer); + Assert.Equal(206.8d, pdfPigSquished.BoundingBox.Width, doubleComparer); + Assert.Equal(83.2d, pdfPigSquished.BoundingBox.Height, doubleComparer); + Assert.Equal(309.8d, pdfPigSquished.BoundingBox.Left, doubleComparer); + Assert.Equal(552.1d, pdfPigSquished.BoundingBox.Top, doubleComparer); var birthdayPigs = images[2]; - Assert.Equal(391d, birthdayPigs.Bounds.Width, doubleComparer); - Assert.Equal(267.1d, birthdayPigs.Bounds.Height, doubleComparer); - Assert.Equal(102.2d, birthdayPigs.Bounds.Left, doubleComparer); - Assert.Equal(426.3d, birthdayPigs.Bounds.Top, doubleComparer); + Assert.Equal(391d, birthdayPigs.BoundingBox.Width, doubleComparer); + Assert.Equal(267.1d, birthdayPigs.BoundingBox.Height, doubleComparer); + Assert.Equal(102.2d, birthdayPigs.BoundingBox.Left, doubleComparer); + Assert.Equal(426.3d, birthdayPigs.BoundingBox.Top, doubleComparer); } } diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index dc387e6e8..affe13ee1 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -77,6 +77,8 @@ public void OnlyExposedApiIsPublic() "UglyToad.PdfPig.Content.DocumentInformation", "UglyToad.PdfPig.Content.EmbeddedFile", "UglyToad.PdfPig.Content.Hyperlink", + "UglyToad.PdfPig.Content.IBoundingBox", + "UglyToad.PdfPig.Content.ILettersBlock", "UglyToad.PdfPig.Content.InlineImage", "UglyToad.PdfPig.Content.IPageFactory`1", "UglyToad.PdfPig.Content.IPdfImage", diff --git a/src/UglyToad.PdfPig.Tests/TestPdfImage.cs b/src/UglyToad.PdfPig.Tests/TestPdfImage.cs index aba9c7268..5878310be 100644 --- a/src/UglyToad.PdfPig.Tests/TestPdfImage.cs +++ b/src/UglyToad.PdfPig.Tests/TestPdfImage.cs @@ -9,7 +9,7 @@ public class TestPdfImage : IPdfImage { - public PdfRectangle Bounds { get; set; } + public PdfRectangle BoundingBox { get; set; } public int WidthInSamples { get; set; } diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs index 9ddd9683e..6186a44b8 100644 --- a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs +++ b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs @@ -563,8 +563,8 @@ public void CanWriteSinglePageWithJpeg() Assert.NotNull(image); - Assert.Equal(expectedBounds.BottomLeft, image.Bounds.BottomLeft); - Assert.Equal(expectedBounds.TopRight, image.Bounds.TopRight); + Assert.Equal(expectedBounds.BottomLeft, image.BoundingBox.BottomLeft); + Assert.Equal(expectedBounds.TopRight, image.BoundingBox.TopRight); Assert.Equal(imageBytes, image.RawMemory.ToArray()); } @@ -609,10 +609,10 @@ public void CanWrite2PagesSharingJpeg() Assert.Equal(2, page1Images.Count); var image1 = page1Images[0]; - Assert.Equal(expectedBounds1, image1.Bounds); + Assert.Equal(expectedBounds1, image1.BoundingBox); var image2 = page1Images[1]; - Assert.Equal(expectedBounds2, image2.Bounds); + Assert.Equal(expectedBounds2, image2.BoundingBox); var page2Doc = document.GetPage(2); @@ -620,7 +620,7 @@ public void CanWrite2PagesSharingJpeg() Assert.NotNull(image3); - Assert.Equal(expectedBounds3, image3.Bounds); + Assert.Equal(expectedBounds3, image3.BoundingBox); Assert.Equal(imageBytes, image1.RawMemory.ToArray()); Assert.Equal(imageBytes, image2.RawMemory.ToArray()); @@ -696,8 +696,8 @@ public void CanWriteSinglePageWithPng() Assert.NotNull(image); - Assert.Equal(expectedBounds.BottomLeft, image.Bounds.BottomLeft); - Assert.Equal(expectedBounds.TopRight, image.Bounds.TopRight); + Assert.Equal(expectedBounds.BottomLeft, image.BoundingBox.BottomLeft); + Assert.Equal(expectedBounds.TopRight, image.BoundingBox.TopRight); Assert.True(image.TryGetPng(out var png)); Assert.NotNull(png); diff --git a/src/UglyToad.PdfPig/Content/IBoundingBox.cs b/src/UglyToad.PdfPig/Content/IBoundingBox.cs new file mode 100644 index 000000000..2abc3da4a --- /dev/null +++ b/src/UglyToad.PdfPig/Content/IBoundingBox.cs @@ -0,0 +1,36 @@ +namespace UglyToad.PdfPig.Content +{ + using UglyToad.PdfPig.Core; + + /// + /// Interface for classes with a bounding box + /// + public interface IBoundingBox + { + /// + /// Gets the Bounding Box: The rectangle completely containing this object + /// + PdfRectangle BoundingBox { get; } + } + + /// + /// Interface for classes with a bounding box and text + /// + public interface ILettersBlock : IBoundingBox + { + /// + /// The text of the block + /// + string Text { get; } + + /// + /// Text orientation of the block. + /// + TextOrientation TextOrientation { get; } + + /// + /// The letters contained in the Block + /// + IReadOnlyList Letters { get; } + } +} diff --git a/src/UglyToad.PdfPig/Content/IPdfImage.cs b/src/UglyToad.PdfPig/Content/IPdfImage.cs index 544ea3e76..6116f45bc 100644 --- a/src/UglyToad.PdfPig/Content/IPdfImage.cs +++ b/src/UglyToad.PdfPig/Content/IPdfImage.cs @@ -12,13 +12,8 @@ /// /// An image in a PDF document, may be an or a PostScript image XObject (). /// - public interface IPdfImage + public interface IPdfImage : IBoundingBox { - /// - /// The placement rectangle of the image in PDF coordinates. - /// - PdfRectangle Bounds { get; } - /// /// The width of the image in samples. /// diff --git a/src/UglyToad.PdfPig/Content/InlineImage.cs b/src/UglyToad.PdfPig/Content/InlineImage.cs index 7c8c0f5ec..54d467722 100644 --- a/src/UglyToad.PdfPig/Content/InlineImage.cs +++ b/src/UglyToad.PdfPig/Content/InlineImage.cs @@ -19,7 +19,7 @@ public class InlineImage : IPdfImage private readonly Lazy>? memoryFactory; /// - public PdfRectangle Bounds { get; } + public PdfRectangle BoundingBox { get; } /// public int WidthInSamples { get; } @@ -69,7 +69,7 @@ internal InlineImage(PdfRectangle bounds, int widthInSamples, int heightInSample DictionaryToken streamDictionary, ColorSpaceDetails colorSpaceDetails) { - Bounds = bounds; + BoundingBox = bounds; WidthInSamples = widthInSamples; HeightInSamples = heightInSamples; Decode = decode; @@ -124,7 +124,7 @@ public bool TryGetBytesAsMemory(out ReadOnlyMemory bytes) /// public override string ToString() { - return $"Inline Image (w {Bounds.Width}, h {Bounds.Height})"; + return $"Inline Image (w {BoundingBox.Width}, h {BoundingBox.Height})"; } } } diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs index 3ee0222f7..9cb32730b 100644 --- a/src/UglyToad.PdfPig/Content/Letter.cs +++ b/src/UglyToad.PdfPig/Content/Letter.cs @@ -7,12 +7,12 @@ /// /// A glyph or combination of glyphs (characters) drawn by a PDF content stream. /// - public class Letter - { + public class Letter : IBoundingBox + { /// /// The text for this letter or unicode character. /// - public string Value { get; } + public string Value { get; } /// /// Text orientation of the letter. @@ -44,7 +44,12 @@ public class Letter /// For example letters with descenders, p, j, etc., will have a box extending below the they are placed at. /// The width of the glyph may also be more or less than the allocated for the character in the PDF content. /// - public PdfRectangle GlyphRectangle { get; } + public PdfRectangle GlyphRectangle { get; } + + /// + /// Gets the Bounding Box: The rectangle completely containing this object. Same as + /// + public PdfRectangle BoundingBox => GlyphRectangle; /// /// Size as defined in the PDF file. This is not equivalent to font size in points but is relative to other font sizes on the page. diff --git a/src/UglyToad.PdfPig/Content/Word.cs b/src/UglyToad.PdfPig/Content/Word.cs index 7d157fba8..7d96c8aa9 100644 --- a/src/UglyToad.PdfPig/Content/Word.cs +++ b/src/UglyToad.PdfPig/Content/Word.cs @@ -9,7 +9,7 @@ /// /// A word. /// - public class Word + public class Word : ILettersBlock { /// /// The text of the word. diff --git a/src/UglyToad.PdfPig/XObjects/XObjectImage.cs b/src/UglyToad.PdfPig/XObjects/XObjectImage.cs index 09ebf4e79..dda187e74 100644 --- a/src/UglyToad.PdfPig/XObjects/XObjectImage.cs +++ b/src/UglyToad.PdfPig/XObjects/XObjectImage.cs @@ -19,7 +19,7 @@ public class XObjectImage : IPdfImage private readonly Lazy>? memoryFactory; /// - public PdfRectangle Bounds { get; } + public PdfRectangle BoundingBox { get; } /// public int WidthInSamples { get; } @@ -81,7 +81,7 @@ internal XObjectImage(PdfRectangle bounds, Lazy>? bytes, ColorSpaceDetails? colorSpaceDetails) { - Bounds = bounds; + BoundingBox = bounds; WidthInSamples = widthInSamples; HeightInSamples = heightInSamples; BitsPerComponent = bitsPerComponent; @@ -116,7 +116,7 @@ public bool TryGetBytesAsMemory(out ReadOnlyMemory bytes) /// public override string ToString() { - return $"XObject Image (w {Bounds.Width}, h {Bounds.Height}): {ImageDictionary}"; + return $"XObject Image (w {BoundingBox.Width}, h {BoundingBox.Height}): {ImageDictionary}"; } } }