diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/AltoXmlTextExporter.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/AltoXmlTextExporter.cs
index c259f8338..7ec83ec8a 100644
--- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/AltoXmlTextExporter.cs
+++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/AltoXmlTextExporter.cs
@@ -219,7 +219,7 @@ private AltoDocument.AltoGraphicalElement ToAltoGraphicalElement(PdfPath pdfPath
private AltoDocument.AltoIllustration ToAltoIllustration(IPdfImage pdfImage, double height)
{
illustrationCount++;
- var rectangle = pdfImage.Bounds;
+ var rectangle = pdfImage.BoundingBox;
return new AltoDocument.AltoIllustration
{
diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/HOcrTextExporter.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/HOcrTextExporter.cs
index 7b0fd33f7..22a434646 100644
--- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/HOcrTextExporter.cs
+++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/HOcrTextExporter.cs
@@ -273,7 +273,7 @@ private string GetCode(PdfPath path, double pageHeight, bool subPaths, int level
private string GetCode(IPdfImage pdfImage, double pageHeight, int level)
{
imageCount++;
- var bbox = pdfImage.Bounds;
+ var bbox = pdfImage.BoundingBox;
return GetIndent(level) + "";
}
diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/PageXmlTextExporter.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/PageXmlTextExporter.cs
index 47337bba3..c1d6f44f8 100644
--- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/PageXmlTextExporter.cs
+++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/Export/PageXmlTextExporter.cs
@@ -273,7 +273,7 @@ private PageXmlDocument.PageXmlLineDrawingRegion ToPageXmlLineDrawingRegion(PdfP
private PageXmlDocument.PageXmlImageRegion ToPageXmlImageRegion(IPdfImage pdfImage, PageXmlData data, double pageWidth, double pageHeight)
{
data.RegionsCount++;
- var bbox = pdfImage.Bounds;
+ var bbox = pdfImage.BoundingBox;
return new PageXmlDocument.PageXmlImageRegion()
{
Coords = ToCoords(bbox, pageWidth, pageHeight),
diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextBlock.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextBlock.cs
index eb7bf75be..80b4dfa6a 100644
--- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextBlock.cs
+++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextBlock.cs
@@ -9,8 +9,13 @@
///
/// A block of text.
///
- public class TextBlock
- {
+ public class TextBlock: ILettersBlock
+ {
+ ///
+ /// The letters contained in this TextBlock
+ ///
+ public IReadOnlyList Letters { get; }
+
///
/// The separator used between lines in the block.
///
@@ -63,6 +68,7 @@ public TextBlock(IReadOnlyList lines, string separator = "\n")
ReadingOrder = -1;
TextLines = lines;
+ Letters = lines.SelectMany(tl => tl.Words).SelectMany(w => w.Letters).ToList().AsReadOnly();
if (lines.Count == 1)
{
diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs
index 322780f3d..873c062c0 100644
--- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs
+++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextLine.cs
@@ -9,8 +9,13 @@
///
/// A line of text.
///
- public class TextLine
+ public class TextLine : ILettersBlock
{
+ ///
+ /// The letters contained in this TextLine
+ ///
+ public IReadOnlyList Letters { get; }
+
///
/// The separator used between words in the line.
///
@@ -56,6 +61,7 @@ public TextLine(IReadOnlyList words, string separator = " ")
Separator = separator;
Words = words;
+ Letters = words.SelectMany(w => w.Letters).ToList().AsReadOnly();
if (Words.Count == 1)
{
diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WhitespaceCoverExtractor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WhitespaceCoverExtractor.cs
index 395780ede..68be72a8e 100644
--- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WhitespaceCoverExtractor.cs
+++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/WhitespaceCoverExtractor.cs
@@ -51,7 +51,7 @@ public static IReadOnlyList GetWhitespaces(IEnumerable words
if (images?.Any() == true)
{
- bboxes.AddRange(images.Where(w => w.Bounds.Width > 0 && w.Bounds.Height > 0).Select(o => o.Bounds));
+ bboxes.AddRange(images.Where(w => w.BoundingBox.Width > 0 && w.BoundingBox.Height > 0).Select(o => o.BoundingBox));
}
return GetWhitespaces(bboxes,
diff --git a/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs b/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs
index 22e6e5eb0..de53fe2d2 100644
--- a/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs
+++ b/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs
@@ -26,29 +26,29 @@ public void ImagesHaveCorrectDimensionsAndLocations()
{
var page = document.GetPage(1);
- var images = page.GetImages().OrderBy(x => x.Bounds.Width).ToList();
+ var images = page.GetImages().OrderBy(x => x.BoundingBox.Width).ToList();
var pdfPigSquare = images[0];
- Assert.Equal(148.3d, pdfPigSquare.Bounds.Width, doubleComparer);
- Assert.Equal(148.3d, pdfPigSquare.Bounds.Height, doubleComparer);
- Assert.Equal(60.1d, pdfPigSquare.Bounds.Left, doubleComparer);
- Assert.Equal(765.8d, pdfPigSquare.Bounds.Top, doubleComparer);
+ Assert.Equal(148.3d, pdfPigSquare.BoundingBox.Width, doubleComparer);
+ Assert.Equal(148.3d, pdfPigSquare.BoundingBox.Height, doubleComparer);
+ Assert.Equal(60.1d, pdfPigSquare.BoundingBox.Left, doubleComparer);
+ Assert.Equal(765.8d, pdfPigSquare.BoundingBox.Top, doubleComparer);
var pdfPigSquished = images[1];
- Assert.Equal(206.8d, pdfPigSquished.Bounds.Width, doubleComparer);
- Assert.Equal(83.2d, pdfPigSquished.Bounds.Height, doubleComparer);
- Assert.Equal(309.8d, pdfPigSquished.Bounds.Left, doubleComparer);
- Assert.Equal(552.1d, pdfPigSquished.Bounds.Top, doubleComparer);
+ Assert.Equal(206.8d, pdfPigSquished.BoundingBox.Width, doubleComparer);
+ Assert.Equal(83.2d, pdfPigSquished.BoundingBox.Height, doubleComparer);
+ Assert.Equal(309.8d, pdfPigSquished.BoundingBox.Left, doubleComparer);
+ Assert.Equal(552.1d, pdfPigSquished.BoundingBox.Top, doubleComparer);
var birthdayPigs = images[2];
- Assert.Equal(391d, birthdayPigs.Bounds.Width, doubleComparer);
- Assert.Equal(267.1d, birthdayPigs.Bounds.Height, doubleComparer);
- Assert.Equal(102.2d, birthdayPigs.Bounds.Left, doubleComparer);
- Assert.Equal(426.3d, birthdayPigs.Bounds.Top, doubleComparer);
+ Assert.Equal(391d, birthdayPigs.BoundingBox.Width, doubleComparer);
+ Assert.Equal(267.1d, birthdayPigs.BoundingBox.Height, doubleComparer);
+ Assert.Equal(102.2d, birthdayPigs.BoundingBox.Left, doubleComparer);
+ Assert.Equal(426.3d, birthdayPigs.BoundingBox.Top, doubleComparer);
}
}
diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs
index dc387e6e8..affe13ee1 100644
--- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs
+++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs
@@ -77,6 +77,8 @@ public void OnlyExposedApiIsPublic()
"UglyToad.PdfPig.Content.DocumentInformation",
"UglyToad.PdfPig.Content.EmbeddedFile",
"UglyToad.PdfPig.Content.Hyperlink",
+ "UglyToad.PdfPig.Content.IBoundingBox",
+ "UglyToad.PdfPig.Content.ILettersBlock",
"UglyToad.PdfPig.Content.InlineImage",
"UglyToad.PdfPig.Content.IPageFactory`1",
"UglyToad.PdfPig.Content.IPdfImage",
diff --git a/src/UglyToad.PdfPig.Tests/TestPdfImage.cs b/src/UglyToad.PdfPig.Tests/TestPdfImage.cs
index aba9c7268..5878310be 100644
--- a/src/UglyToad.PdfPig.Tests/TestPdfImage.cs
+++ b/src/UglyToad.PdfPig.Tests/TestPdfImage.cs
@@ -9,7 +9,7 @@
public class TestPdfImage : IPdfImage
{
- public PdfRectangle Bounds { get; set; }
+ public PdfRectangle BoundingBox { get; set; }
public int WidthInSamples { get; set; }
diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs
index 9ddd9683e..6186a44b8 100644
--- a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs
@@ -563,8 +563,8 @@ public void CanWriteSinglePageWithJpeg()
Assert.NotNull(image);
- Assert.Equal(expectedBounds.BottomLeft, image.Bounds.BottomLeft);
- Assert.Equal(expectedBounds.TopRight, image.Bounds.TopRight);
+ Assert.Equal(expectedBounds.BottomLeft, image.BoundingBox.BottomLeft);
+ Assert.Equal(expectedBounds.TopRight, image.BoundingBox.TopRight);
Assert.Equal(imageBytes, image.RawMemory.ToArray());
}
@@ -609,10 +609,10 @@ public void CanWrite2PagesSharingJpeg()
Assert.Equal(2, page1Images.Count);
var image1 = page1Images[0];
- Assert.Equal(expectedBounds1, image1.Bounds);
+ Assert.Equal(expectedBounds1, image1.BoundingBox);
var image2 = page1Images[1];
- Assert.Equal(expectedBounds2, image2.Bounds);
+ Assert.Equal(expectedBounds2, image2.BoundingBox);
var page2Doc = document.GetPage(2);
@@ -620,7 +620,7 @@ public void CanWrite2PagesSharingJpeg()
Assert.NotNull(image3);
- Assert.Equal(expectedBounds3, image3.Bounds);
+ Assert.Equal(expectedBounds3, image3.BoundingBox);
Assert.Equal(imageBytes, image1.RawMemory.ToArray());
Assert.Equal(imageBytes, image2.RawMemory.ToArray());
@@ -696,8 +696,8 @@ public void CanWriteSinglePageWithPng()
Assert.NotNull(image);
- Assert.Equal(expectedBounds.BottomLeft, image.Bounds.BottomLeft);
- Assert.Equal(expectedBounds.TopRight, image.Bounds.TopRight);
+ Assert.Equal(expectedBounds.BottomLeft, image.BoundingBox.BottomLeft);
+ Assert.Equal(expectedBounds.TopRight, image.BoundingBox.TopRight);
Assert.True(image.TryGetPng(out var png));
Assert.NotNull(png);
diff --git a/src/UglyToad.PdfPig/Content/IBoundingBox.cs b/src/UglyToad.PdfPig/Content/IBoundingBox.cs
new file mode 100644
index 000000000..2abc3da4a
--- /dev/null
+++ b/src/UglyToad.PdfPig/Content/IBoundingBox.cs
@@ -0,0 +1,36 @@
+namespace UglyToad.PdfPig.Content
+{
+ using UglyToad.PdfPig.Core;
+
+ ///
+ /// Interface for classes with a bounding box
+ ///
+ public interface IBoundingBox
+ {
+ ///
+ /// Gets the Bounding Box: The rectangle completely containing this object
+ ///
+ PdfRectangle BoundingBox { get; }
+ }
+
+ ///
+ /// Interface for classes with a bounding box and text
+ ///
+ public interface ILettersBlock : IBoundingBox
+ {
+ ///
+ /// The text of the block
+ ///
+ string Text { get; }
+
+ ///
+ /// Text orientation of the block.
+ ///
+ TextOrientation TextOrientation { get; }
+
+ ///
+ /// The letters contained in the Block
+ ///
+ IReadOnlyList Letters { get; }
+ }
+}
diff --git a/src/UglyToad.PdfPig/Content/IPdfImage.cs b/src/UglyToad.PdfPig/Content/IPdfImage.cs
index 544ea3e76..6116f45bc 100644
--- a/src/UglyToad.PdfPig/Content/IPdfImage.cs
+++ b/src/UglyToad.PdfPig/Content/IPdfImage.cs
@@ -12,13 +12,8 @@
///
/// An image in a PDF document, may be an or a PostScript image XObject ().
///
- public interface IPdfImage
+ public interface IPdfImage : IBoundingBox
{
- ///
- /// The placement rectangle of the image in PDF coordinates.
- ///
- PdfRectangle Bounds { get; }
-
///
/// The width of the image in samples.
///
diff --git a/src/UglyToad.PdfPig/Content/InlineImage.cs b/src/UglyToad.PdfPig/Content/InlineImage.cs
index 7c8c0f5ec..54d467722 100644
--- a/src/UglyToad.PdfPig/Content/InlineImage.cs
+++ b/src/UglyToad.PdfPig/Content/InlineImage.cs
@@ -19,7 +19,7 @@ public class InlineImage : IPdfImage
private readonly Lazy>? memoryFactory;
///
- public PdfRectangle Bounds { get; }
+ public PdfRectangle BoundingBox { get; }
///
public int WidthInSamples { get; }
@@ -69,7 +69,7 @@ internal InlineImage(PdfRectangle bounds, int widthInSamples, int heightInSample
DictionaryToken streamDictionary,
ColorSpaceDetails colorSpaceDetails)
{
- Bounds = bounds;
+ BoundingBox = bounds;
WidthInSamples = widthInSamples;
HeightInSamples = heightInSamples;
Decode = decode;
@@ -124,7 +124,7 @@ public bool TryGetBytesAsMemory(out ReadOnlyMemory bytes)
///
public override string ToString()
{
- return $"Inline Image (w {Bounds.Width}, h {Bounds.Height})";
+ return $"Inline Image (w {BoundingBox.Width}, h {BoundingBox.Height})";
}
}
}
diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs
index 3ee0222f7..9cb32730b 100644
--- a/src/UglyToad.PdfPig/Content/Letter.cs
+++ b/src/UglyToad.PdfPig/Content/Letter.cs
@@ -7,12 +7,12 @@
///
/// A glyph or combination of glyphs (characters) drawn by a PDF content stream.
///
- public class Letter
- {
+ public class Letter : IBoundingBox
+ {
///
/// The text for this letter or unicode character.
///
- public string Value { get; }
+ public string Value { get; }
///
/// Text orientation of the letter.
@@ -44,7 +44,12 @@ public class Letter
/// For example letters with descenders, p, j, etc., will have a box extending below the they are placed at.
/// The width of the glyph may also be more or less than the allocated for the character in the PDF content.
///
- public PdfRectangle GlyphRectangle { get; }
+ public PdfRectangle GlyphRectangle { get; }
+
+ ///
+ /// Gets the Bounding Box: The rectangle completely containing this object. Same as
+ ///
+ public PdfRectangle BoundingBox => GlyphRectangle;
///
/// Size as defined in the PDF file. This is not equivalent to font size in points but is relative to other font sizes on the page.
diff --git a/src/UglyToad.PdfPig/Content/Word.cs b/src/UglyToad.PdfPig/Content/Word.cs
index 7d157fba8..7d96c8aa9 100644
--- a/src/UglyToad.PdfPig/Content/Word.cs
+++ b/src/UglyToad.PdfPig/Content/Word.cs
@@ -9,7 +9,7 @@
///
/// A word.
///
- public class Word
+ public class Word : ILettersBlock
{
///
/// The text of the word.
diff --git a/src/UglyToad.PdfPig/XObjects/XObjectImage.cs b/src/UglyToad.PdfPig/XObjects/XObjectImage.cs
index 09ebf4e79..dda187e74 100644
--- a/src/UglyToad.PdfPig/XObjects/XObjectImage.cs
+++ b/src/UglyToad.PdfPig/XObjects/XObjectImage.cs
@@ -19,7 +19,7 @@ public class XObjectImage : IPdfImage
private readonly Lazy>? memoryFactory;
///
- public PdfRectangle Bounds { get; }
+ public PdfRectangle BoundingBox { get; }
///
public int WidthInSamples { get; }
@@ -81,7 +81,7 @@ internal XObjectImage(PdfRectangle bounds,
Lazy>? bytes,
ColorSpaceDetails? colorSpaceDetails)
{
- Bounds = bounds;
+ BoundingBox = bounds;
WidthInSamples = widthInSamples;
HeightInSamples = heightInSamples;
BitsPerComponent = bitsPerComponent;
@@ -116,7 +116,7 @@ public bool TryGetBytesAsMemory(out ReadOnlyMemory bytes)
///
public override string ToString()
{
- return $"XObject Image (w {Bounds.Width}, h {Bounds.Height}): {ImageDictionary}";
+ return $"XObject Image (w {BoundingBox.Width}, h {BoundingBox.Height}): {ImageDictionary}";
}
}
}