From 3671e813d49822676198026fdffe157fb86a0812 Mon Sep 17 00:00:00 2001 From: Adrian-George Bostan Date: Sat, 5 Oct 2019 17:11:13 +0300 Subject: [PATCH 1/3] Add more functionality to text chunks --- creator/paragraph.go | 80 ++++---------------------- creator/styled_paragraph.go | 11 ++-- creator/text_chunk.go | 110 ++++++++++++++++++++++++++++++++++-- 3 files changed, 123 insertions(+), 78 deletions(-) diff --git a/creator/paragraph.go b/creator/paragraph.go index 338f5701e..1d308d5a0 100644 --- a/creator/paragraph.go +++ b/creator/paragraph.go @@ -260,81 +260,20 @@ func (p *Paragraph) wrapText() error { return nil } - var line []rune - lineWidth := 0.0 - p.textLines = nil + chunk := NewTextChunk(p.text, TextStyle{ + Font: p.textFont, + FontSize: p.fontSize, + }) - runes := []rune(p.text) - var widths []float64 - - for _, r := range runes { - // Newline wrapping. - if r == '\u000A' { // LF - // Moves to next line. - p.textLines = append(p.textLines, string(line)) - line = nil - lineWidth = 0 - widths = nil - continue - } - - metrics, found := p.textFont.GetRuneMetrics(r) - if !found { - common.Log.Debug("ERROR: Rune char metrics not found! rune=0x%04x=%c font=%s %#q", - r, r, p.textFont.BaseFont(), p.textFont.Subtype()) - common.Log.Trace("Font: %#v", p.textFont) - common.Log.Trace("Encoder: %#v", p.textFont.Encoder()) - return errors.New("glyph char metrics missing") - } - - w := p.fontSize * metrics.Wx - if lineWidth+w > p.wrapWidth*1000.0 { - // Goes out of bounds: Wrap. - // Breaks on the character. - idx := -1 - for i := len(line) - 1; i >= 0; i-- { - if line[i] == ' ' { // TODO: What about other space glyphs like controlHT? - idx = i - break - } - } - if idx > 0 { - // Back up to last space. - p.textLines = append(p.textLines, string(line[0:idx+1])) - - // Remainder of line. - line = append(line[idx+1:], r) - widths = append(widths[idx+1:], w) - lineWidth = sum(widths) - - } else { - p.textLines = append(p.textLines, string(line)) - line = []rune{r} - widths = []float64{w} - lineWidth = w - } - } else { - line = append(line, r) - lineWidth += w - widths = append(widths, w) - } - } - if len(line) > 0 { - p.textLines = append(p.textLines, string(line)) + lines, err := chunk.Wrap(p.wrapWidth) + if err != nil { + return err } + p.textLines = lines return nil } -// sum returns the sums of the elements in `widths`. -func sum(widths []float64) float64 { - total := 0.0 - for _, w := range widths { - total += w - } - return total -} - // GeneratePageBlocks generates the page blocks. Multiple blocks are generated if the contents wrap // over multiple pages. Implements the Drawable interface. func (p *Paragraph) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) { @@ -489,6 +428,9 @@ func drawParagraphOnBlock(blk *Block, p *Paragraph, ctx DrawContext) (DrawContex var encoded []byte for _, r := range runes { + if r == '\u000A' { // LF + continue + } if r == ' ' { // TODO: What about \t and other spaces. if len(encoded) > 0 { objs = append(objs, core.MakeStringFromBytes(encoded)) diff --git a/creator/styled_paragraph.go b/creator/styled_paragraph.go index 1ba7ce4d7..02ed8195e 100644 --- a/creator/styled_paragraph.go +++ b/creator/styled_paragraph.go @@ -96,7 +96,7 @@ func (p *StyledParagraph) appendChunk(chunk *TextChunk) *TextChunk { // Append adds a new text chunk to the paragraph. func (p *StyledParagraph) Append(text string) *TextChunk { - chunk := newTextChunk(text, p.defaultStyle) + chunk := NewTextChunk(text, p.defaultStyle) return p.appendChunk(chunk) } @@ -107,7 +107,7 @@ func (p *StyledParagraph) Insert(index uint, text string) *TextChunk { index = l } - chunk := newTextChunk(text, p.defaultStyle) + chunk := NewTextChunk(text, p.defaultStyle) p.chunks = append(p.chunks[:index], append([]*TextChunk{chunk}, p.chunks[index:]...)...) p.wrapText() @@ -118,7 +118,7 @@ func (p *StyledParagraph) Insert(index uint, text string) *TextChunk { // The text parameter represents the text that is displayed and the url // parameter sets the destionation of the link. func (p *StyledParagraph) AddExternalLink(text, url string) *TextChunk { - chunk := newTextChunk(text, p.defaultLinkStyle) + chunk := NewTextChunk(text, p.defaultLinkStyle) chunk.annotation = newExternalLinkAnnotation(url) return p.appendChunk(chunk) } @@ -130,7 +130,7 @@ func (p *StyledParagraph) AddExternalLink(text, url string) *TextChunk { // The zoom of the destination page is controlled with the zoom // parameter. Pass in 0 to keep the current zoom value. func (p *StyledParagraph) AddInternalLink(text string, page int64, x, y, zoom float64) *TextChunk { - chunk := newTextChunk(text, p.defaultLinkStyle) + chunk := NewTextChunk(text, p.defaultLinkStyle) chunk.annotation = newInternalLinkAnnotation(page-1, x, y, zoom) return p.appendChunk(chunk) } @@ -745,6 +745,9 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext) var encStr []byte for _, rn := range chunk.Text { + if r == '\u000A' { // LF + continue + } if rn == ' ' { if len(encStr) > 0 { cc.Add_rg(r, g, b). diff --git a/creator/text_chunk.go b/creator/text_chunk.go index 5a664bc8d..f70d9f82c 100644 --- a/creator/text_chunk.go +++ b/creator/text_chunk.go @@ -6,6 +6,11 @@ package creator import ( + "errors" + "strings" + "unicode" + + "github.com/unidoc/unipdf/v3/common" "github.com/unidoc/unipdf/v3/core" "github.com/unidoc/unipdf/v3/model" ) @@ -26,17 +31,112 @@ type TextChunk struct { annotationProcessed bool } +// NewTextChunk returns a new text chunk instance. +func NewTextChunk(text string, style TextStyle) *TextChunk { + return &TextChunk{ + Text: text, + Style: style, + } +} + // SetAnnotation sets a annotation on a TextChunk. func (tc *TextChunk) SetAnnotation(annotation *model.PdfAnnotation) { tc.annotation = annotation } -// newTextChunk returns a new text chunk instance. -func newTextChunk(text string, style TextStyle) *TextChunk { - return &TextChunk{ - Text: text, - Style: style, +// Wrap wraps the text of the chunk into lines based on its style and the +// specified width. +func (tc *TextChunk) Wrap(width float64) ([]string, error) { + if int(width) <= 0 { + return []string{tc.Text}, nil } + + var lines []string + var line []rune + var lineWidth float64 + var widths []float64 + + style := tc.Style + runes := []rune(tc.Text) + + for _, r := range runes { + // Move to the next line due to newline wrapping (LF). + if r == '\u000A' { + lines = append(lines, strings.TrimRightFunc(string(line), unicode.IsSpace)+string(r)) + line = nil + lineWidth = 0 + widths = nil + continue + } + + metrics, found := style.Font.GetRuneMetrics(r) + if !found { + common.Log.Debug("ERROR: Rune char metrics not found! rune=0x%04x=%c font=%s %#q", + r, r, style.Font.BaseFont(), style.Font.Subtype()) + common.Log.Trace("Font: %#v", style.Font) + common.Log.Trace("Encoder: %#v", style.Font.Encoder()) + return nil, errors.New("glyph char metrics missing") + } + + w := style.FontSize * metrics.Wx + charWidth := w + style.CharSpacing*1000.0 + if lineWidth+w > width*1000.0 { + // Goes out of bounds. Break on the character. + idx := -1 + for i := len(line) - 1; i >= 0; i-- { + if line[i] == ' ' { + idx = i + break + } + } + if idx > 0 { + // Back up to last space. + lines = append(lines, strings.TrimRightFunc(string(line[0:idx+1]), unicode.IsSpace)) + + // Remainder of line. + line = append(line[idx+1:], r) + widths = append(widths[idx+1:], charWidth) + + lineWidth = 0 + for _, width := range widths { + lineWidth += width + } + } else { + lines = append(lines, strings.TrimRightFunc(string(line), unicode.IsSpace)) + line = []rune{r} + widths = []float64{charWidth} + lineWidth = charWidth + } + } else { + line = append(line, r) + lineWidth += charWidth + widths = append(widths, charWidth) + } + } + if len(line) > 0 { + lines = append(lines, string(line)) + } + + return lines, nil +} + +// Fit fits the chunk into the specified bounding box, cropping off the +// remainder in a new chunk, if it exceeds the specified dimensions. +func (tc *TextChunk) Fit(width, height float64) (*TextChunk, error) { + lines, err := tc.Wrap(width) + if err != nil { + return nil, err + } + + fit := int(height / tc.Style.FontSize) + if fit >= len(lines) { + return nil, nil + } + lf := "\u000A" + tc.Text = strings.Replace(strings.Join(lines[:fit], " "), lf+" ", lf, -1) + + remainder := strings.Replace(strings.Join(lines[fit:], " "), lf+" ", lf, -1) + return NewTextChunk(remainder, tc.Style), nil } // newExternalLinkAnnotation returns a new external link annotation. From 826eb2a669ace6603f3c12ee69743998354683ac Mon Sep 17 00:00:00 2001 From: Adrian-George Bostan Date: Sat, 5 Oct 2019 18:02:07 +0300 Subject: [PATCH 2/3] Add creator text chunk test cases --- creator/text_chunk_test.go | 130 +++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 creator/text_chunk_test.go diff --git a/creator/text_chunk_test.go b/creator/text_chunk_test.go new file mode 100644 index 000000000..3a1952bef --- /dev/null +++ b/creator/text_chunk_test.go @@ -0,0 +1,130 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package creator + +import ( + "testing" + + "github.com/stretchr/testify/require" + "github.com/unidoc/unipdf/v3/model" +) + +func TestTextChunkWrap(t *testing.T) { + text := "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\nUt enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." + tc := NewTextChunk(text, TextStyle{ + Font: model.DefaultFont(), + FontSize: 10, + }) + + // Check wrap when width <= 0. + expectedLines := []string{text} + + lines, err := tc.Wrap(0) + require.NoError(t, err) + require.Equal(t, len(lines), len(expectedLines)) + require.Equal(t, lines, expectedLines) + + // Check wrap for width = 500. + expectedLines = []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore", + "magna aliqua.\n", + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.", + "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint", + "occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + } + + lines, err = tc.Wrap(500) + require.NoError(t, err) + require.Equal(t, len(lines), 5) + require.Equal(t, lines, expectedLines) + + // Check wrap for width = 100. + expectedLines = []string{ + "Lorem ipsum dolor sit", + "amet, consectetur", + "adipiscing elit, sed do", + "eiusmod tempor", + "incididunt ut labore et", + "dolore magna aliqua.\n", + "Ut enim ad minim", + "veniam, quis nostrud", + "exercitation ullamco", + "laboris nisi ut aliquip", + "ex ea commodo", + "consequat. Duis aute", + "irure dolor in", + "reprehenderit in", + "voluptate velit esse", + "cillum dolore eu", + "fugiat nulla pariatur.", + "Excepteur sint", + "occaecat cupidatat", + "non proident, sunt in", + "culpa qui officia", + "deserunt mollit anim", + "id est laborum.", + } + + lines, err = tc.Wrap(100) + require.NoError(t, err) + require.Equal(t, len(lines), len(expectedLines)) + require.Equal(t, lines, expectedLines) + + // Check wrap for width = 2000. + expectedLines = []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\n", + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + } + + lines, err = tc.Wrap(2000) + require.NoError(t, err) + require.Equal(t, len(lines), len(expectedLines)) + require.Equal(t, lines, expectedLines) +} + +func TestTextChunkFit(t *testing.T) { + text := "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\nUt enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." + tc := NewTextChunk(text, TextStyle{ + Font: model.DefaultFont(), + FontSize: 10, + }) + + expected := [][2]string{ + [2]string{ + "Lorem ipsum dolor sit", + "amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\nUt enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + }, + [2]string{ + "amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et", + "dolore magna aliqua.\nUt enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + }, + [2]string{ + "dolore magna aliqua.\nUt enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in", + "reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + }, + [2]string{ + "reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", + "", + }, + } + + for i := 1; i < 10; i++ { + tc2, err := tc.Fit(float64(i*100), float64(i*10)) + require.NoError(t, err) + + remainder := "" + if tc2 != nil { + remainder = tc2.Text + } + require.Equal(t, tc.Text, expected[i-1][0]) + require.Equal(t, remainder, expected[i-1][1]) + + if tc2 == nil { + break + } + tc = tc2 + } +} From 07f759807a056c9791a93792020b1c2469fb2adb Mon Sep 17 00:00:00 2001 From: Adrian-George Bostan Date: Tue, 8 Oct 2019 23:20:51 +0300 Subject: [PATCH 3/3] Improve documentation of the text chunk Fit method --- creator/text_chunk.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/creator/text_chunk.go b/creator/text_chunk.go index f70d9f82c..32e3cf2fb 100644 --- a/creator/text_chunk.go +++ b/creator/text_chunk.go @@ -122,6 +122,9 @@ func (tc *TextChunk) Wrap(width float64) ([]string, error) { // Fit fits the chunk into the specified bounding box, cropping off the // remainder in a new chunk, if it exceeds the specified dimensions. +// NOTE: The method assumes a line height of 1.0. In order to account for other +// line height values, the passed in height must be divided by the line height: +// height = height / lineHeight func (tc *TextChunk) Fit(width, height float64) (*TextChunk, error) { lines, err := tc.Wrap(width) if err != nil {