Skip to content

Commit c852c40

Browse files
committed
Use glyph indices for font tracking in vector formats
With libraqm, string layout produces glyph indices, not character codes, and font features may even produce different glyphs for the same character code (e.g., by picking a different Stylistic Set). Thus we cannot rely on character codes as unique items within a font, and must move toward glyph indices everywhere.
1 parent 6a03850 commit c852c40

File tree

9 files changed

+129
-133
lines changed

9 files changed

+129
-133
lines changed

lib/matplotlib/_mathtext.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
if T.TYPE_CHECKING:
3939
from collections.abc import Iterable
40-
from .ft2font import CharacterCodeType, Glyph
40+
from .ft2font import CharacterCodeType, Glyph, GlyphIndexType
4141

4242

4343
ParserElement.enable_packrat()
@@ -86,7 +86,7 @@ class VectorParse(NamedTuple):
8686
width: float
8787
height: float
8888
depth: float
89-
glyphs: list[tuple[FT2Font, float, CharacterCodeType, float, float]]
89+
glyphs: list[tuple[FT2Font, float, GlyphIndexType, float, float]]
9090
rects: list[tuple[float, float, float, float]]
9191

9292
VectorParse.__module__ = "matplotlib.mathtext"
@@ -131,7 +131,7 @@ def __init__(self, box: Box):
131131
def to_vector(self) -> VectorParse:
132132
w, h, d = map(
133133
np.ceil, [self.box.width, self.box.height, self.box.depth])
134-
gs = [(info.font, info.fontsize, info.num, ox, h - oy + info.offset)
134+
gs = [(info.font, info.fontsize, info.glyph_id, ox, h - oy + info.offset)
135135
for ox, oy, info in self.glyphs]
136136
rs = [(x1, h - y2, x2 - x1, y2 - y1)
137137
for x1, y1, x2, y2 in self.rects]
@@ -213,7 +213,7 @@ class FontInfo(NamedTuple):
213213
fontsize: float
214214
postscript_name: str
215215
metrics: FontMetrics
216-
num: CharacterCodeType
216+
glyph_id: GlyphIndexType
217217
glyph: Glyph
218218
offset: float
219219

@@ -374,7 +374,8 @@ def _get_info(self, fontname: str, font_class: str, sym: str, fontsize: float,
374374
dpi: float) -> FontInfo:
375375
font, num, slanted = self._get_glyph(fontname, font_class, sym)
376376
font.set_size(fontsize, dpi)
377-
glyph = font.load_char(num, flags=self.load_glyph_flags)
377+
glyph_id = font.get_char_index(num)
378+
glyph = font.load_glyph(glyph_id, flags=self.load_glyph_flags)
378379

379380
xmin, ymin, xmax, ymax = (val / 64 for val in glyph.bbox)
380381
offset = self._get_offset(font, glyph, fontsize, dpi)
@@ -396,7 +397,7 @@ def _get_info(self, fontname: str, font_class: str, sym: str, fontsize: float,
396397
fontsize=fontsize,
397398
postscript_name=font.postscript_name,
398399
metrics=metrics,
399-
num=num,
400+
glyph_id=glyph_id,
400401
glyph=glyph,
401402
offset=offset
402403
)
@@ -426,8 +427,7 @@ def get_kern(self, font1: str, fontclass1: str, sym1: str, fontsize1: float,
426427
info1 = self._get_info(font1, fontclass1, sym1, fontsize1, dpi)
427428
info2 = self._get_info(font2, fontclass2, sym2, fontsize2, dpi)
428429
font = info1.font
429-
return font.get_kerning(font.get_char_index(info1.num),
430-
font.get_char_index(info2.num),
430+
return font.get_kerning(info1.glyph_id, info2.glyph_id,
431431
Kerning.DEFAULT) / 64
432432
return super().get_kern(font1, fontclass1, sym1, fontsize1,
433433
font2, fontclass2, sym2, fontsize2, dpi)

lib/matplotlib/_text_helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
class LayoutItem:
1515
ft_object: FT2Font
1616
char: str
17-
glyph_idx: GlyphIndexType
17+
glyph_index: GlyphIndexType
1818
x: float
1919
prev_kern: float
2020

lib/matplotlib/backends/_backend_pdf_ps.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,27 @@ def _cached_get_afm_from_fname(fname):
2020
return AFM(fh)
2121

2222

23-
def get_glyphs_subset(fontfile, characters):
23+
def get_glyphs_subset(fontfile, glyphs):
2424
"""
25-
Subset a TTF font
25+
Subset a TTF font.
2626
27-
Reads the named fontfile and restricts the font to the characters.
27+
Reads the named fontfile and restricts the font to the glyphs.
2828
2929
Parameters
3030
----------
3131
fontfile : str
3232
Path to the font file
33-
characters : str
34-
Continuous set of characters to include in subset
33+
glyphs : set[int]
34+
Set of glyph IDs to include in subset.
3535
3636
Returns
3737
-------
3838
fontTools.ttLib.ttFont.TTFont
3939
An open font object representing the subset, which needs to
4040
be closed by the caller.
4141
"""
42-
43-
options = subset.Options(glyph_names=True, recommended_glyphs=True)
42+
options = subset.Options(glyph_names=True, recommended_glyphs=True,
43+
retain_gids=True)
4444

4545
# Prevent subsetting extra tables.
4646
options.drop_tables += [
@@ -71,7 +71,7 @@ def get_glyphs_subset(fontfile, characters):
7171

7272
font = subset.load_font(fontfile, options)
7373
subsetter = subset.Subsetter(options=options)
74-
subsetter.populate(text=characters)
74+
subsetter.populate(gids=glyphs)
7575
subsetter.subset(font)
7676
return font
7777

@@ -97,10 +97,10 @@ def font_as_file(font):
9797

9898
class CharacterTracker:
9999
"""
100-
Helper for font subsetting by the pdf and ps backends.
100+
Helper for font subsetting by the PDF and PS backends.
101101
102-
Maintains a mapping of font paths to the set of character codepoints that
103-
are being used from that font.
102+
Maintains a mapping of font paths to the set of glyphs that are being used from that
103+
font.
104104
"""
105105

106106
def __init__(self):
@@ -110,10 +110,11 @@ def track(self, font, s):
110110
"""Record that string *s* is being typeset using font *font*."""
111111
char_to_font = font._get_fontmap(s)
112112
for _c, _f in char_to_font.items():
113-
self.used.setdefault(_f.fname, set()).add(ord(_c))
113+
glyph_index = _f.get_char_index(ord(_c))
114+
self.used.setdefault(_f.fname, set()).add(glyph_index)
114115

115116
def track_glyph(self, font, glyph):
116-
"""Record that codepoint *glyph* is being typeset using font *font*."""
117+
"""Record that glyph index *glyph* is being typeset using font *font*."""
117118
self.used.setdefault(font.fname, set()).add(glyph)
118119

119120

lib/matplotlib/backends/backend_cairo.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import functools
1010
import gzip
11+
import itertools
1112
import math
1213

1314
import numpy as np
@@ -248,13 +249,12 @@ def _draw_mathtext(self, gc, x, y, s, prop, angle):
248249
if angle:
249250
ctx.rotate(np.deg2rad(-angle))
250251

251-
for font, fontsize, idx, ox, oy in glyphs:
252+
for (font, fontsize), font_glyphs in itertools.groupby(
253+
glyphs, key=lambda x: (x[0], x[1])):
252254
ctx.new_path()
253-
ctx.move_to(ox, -oy)
254-
ctx.select_font_face(
255-
*_cairo_font_args_from_font_prop(ttfFontProperty(font)))
255+
ctx.select_font_face(*_cairo_font_args_from_font_prop(ttfFontProperty(font)))
256256
ctx.set_font_size(self.points_to_pixels(fontsize))
257-
ctx.show_text(chr(idx))
257+
ctx.show_glyphs([(idx, ox, -oy) for _, _, idx, ox, oy in font_glyphs])
258258

259259
for ox, oy, w, h in rects:
260260
ctx.new_path()

lib/matplotlib/backends/backend_pdf.py

Lines changed: 37 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -960,9 +960,9 @@ def writeFonts(self):
960960
else:
961961
# a normal TrueType font
962962
_log.debug('Writing TrueType font.')
963-
chars = self._character_tracker.used.get(filename)
964-
if chars:
965-
fonts[Fx] = self.embedTTF(filename, chars)
963+
glyphs = self._character_tracker.used.get(filename)
964+
if glyphs:
965+
fonts[Fx] = self.embedTTF(filename, glyphs)
966966
self.writeObject(self.fontObject, fonts)
967967

968968
def _write_afm_font(self, filename):
@@ -1136,9 +1136,8 @@ def _get_xobject_glyph_name(self, filename, glyph_name):
11361136
end
11371137
end"""
11381138

1139-
def embedTTF(self, filename, characters):
1139+
def embedTTF(self, filename, glyphs):
11401140
"""Embed the TTF font from the named file into the document."""
1141-
11421141
font = get_font(filename)
11431142
fonttype = mpl.rcParams['pdf.fonttype']
11441143

@@ -1153,7 +1152,7 @@ def cvt(length, upe=font.units_per_EM, nearest=True):
11531152
else:
11541153
return math.ceil(value)
11551154

1156-
def embedTTFType3(font, characters, descriptor):
1155+
def embedTTFType3(font, glyphs, descriptor):
11571156
"""The Type 3-specific part of embedding a Truetype font"""
11581157
widthsObject = self.reserveObject('font widths')
11591158
fontdescObject = self.reserveObject('font descriptor')
@@ -1200,15 +1199,13 @@ def get_char_width(charcode):
12001199
# Make the "Differences" array, sort the ccodes < 255 from
12011200
# the multi-byte ccodes, and build the whole set of glyph ids
12021201
# that we need from this font.
1203-
glyph_ids = []
12041202
differences = []
12051203
multi_byte_chars = set()
1206-
for c in characters:
1207-
ccode = c
1208-
gind = font.get_char_index(ccode)
1209-
glyph_ids.append(gind)
1204+
charmap = {gind: ccode for ccode, gind in font.get_charmap().items()}
1205+
for gind in glyphs:
12101206
glyph_name = font.get_glyph_name(gind)
1211-
if ccode <= 255:
1207+
ccode = charmap.get(gind)
1208+
if ccode is not None and ccode <= 255:
12121209
differences.append((ccode, glyph_name))
12131210
else:
12141211
multi_byte_chars.add(glyph_name)
@@ -1222,7 +1219,7 @@ def get_char_width(charcode):
12221219
last_c = c
12231220

12241221
# Make the charprocs array.
1225-
rawcharprocs = _get_pdf_charprocs(filename, glyph_ids)
1222+
rawcharprocs = _get_pdf_charprocs(filename, glyphs)
12261223
charprocs = {}
12271224
for charname in sorted(rawcharprocs):
12281225
stream = rawcharprocs[charname]
@@ -1259,7 +1256,7 @@ def get_char_width(charcode):
12591256

12601257
return fontdictObject
12611258

1262-
def embedTTFType42(font, characters, descriptor):
1259+
def embedTTFType42(font, glyphs, descriptor):
12631260
"""The Type 42-specific part of embedding a Truetype font"""
12641261
fontdescObject = self.reserveObject('font descriptor')
12651262
cidFontDictObject = self.reserveObject('CID font dictionary')
@@ -1269,9 +1266,8 @@ def embedTTFType42(font, characters, descriptor):
12691266
wObject = self.reserveObject('Type 0 widths')
12701267
toUnicodeMapObject = self.reserveObject('ToUnicode map')
12711268

1272-
subset_str = "".join(chr(c) for c in characters)
1273-
_log.debug("SUBSET %s characters: %s", filename, subset_str)
1274-
with _backend_pdf_ps.get_glyphs_subset(filename, subset_str) as subset:
1269+
_log.debug("SUBSET %s characters: %s", filename, glyphs)
1270+
with _backend_pdf_ps.get_glyphs_subset(filename, glyphs) as subset:
12751271
fontdata = _backend_pdf_ps.font_as_file(subset)
12761272
_log.debug(
12771273
"SUBSET %s %d -> %d", filename,
@@ -1319,11 +1315,11 @@ def embedTTFType42(font, characters, descriptor):
13191315
cid_to_gid_map = ['\0'] * 65536
13201316
widths = []
13211317
max_ccode = 0
1322-
for c in characters:
1323-
ccode = c
1324-
gind = font.get_char_index(ccode)
1325-
glyph = font.load_char(ccode,
1326-
flags=LoadFlags.NO_SCALE | LoadFlags.NO_HINTING)
1318+
charmap = {gind: ccode for ccode, gind in font.get_charmap().items()}
1319+
for gind in glyphs:
1320+
glyph = font.load_glyph(gind,
1321+
flags=LoadFlags.NO_SCALE | LoadFlags.NO_HINTING)
1322+
ccode = charmap[gind]
13271323
widths.append((ccode, cvt(glyph.horiAdvance)))
13281324
if ccode < 65536:
13291325
cid_to_gid_map[ccode] = chr(gind)
@@ -1361,11 +1357,10 @@ def embedTTFType42(font, characters, descriptor):
13611357
(len(unicode_groups), b"\n".join(unicode_bfrange)))
13621358

13631359
# Add XObjects for unsupported chars
1364-
glyph_ids = []
1365-
for ccode in characters:
1366-
if not _font_supports_glyph(fonttype, ccode):
1367-
gind = full_font.get_char_index(ccode)
1368-
glyph_ids.append(gind)
1360+
glyph_ids = [
1361+
gind for gind in glyphs
1362+
if not _font_supports_glyph(fonttype, charmap[gind])
1363+
]
13691364

13701365
bbox = [cvt(x, nearest=False) for x in full_font.bbox]
13711366
rawcharprocs = _get_pdf_charprocs(filename, glyph_ids)
@@ -1450,9 +1445,9 @@ def embedTTFType42(font, characters, descriptor):
14501445
}
14511446

14521447
if fonttype == 3:
1453-
return embedTTFType3(font, characters, descriptor)
1448+
return embedTTFType3(font, glyphs, descriptor)
14541449
elif fonttype == 42:
1455-
return embedTTFType42(font, characters, descriptor)
1450+
return embedTTFType42(font, glyphs, descriptor)
14561451

14571452
def alphaState(self, alpha):
14581453
"""Return name of an ExtGState that sets alpha to the given value."""
@@ -2215,28 +2210,32 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
22152210
oldx, oldy = 0, 0
22162211
unsupported_chars = []
22172212

2213+
font_charmaps = {}
22182214
self.file.output(Op.begin_text)
2219-
for font, fontsize, num, ox, oy in glyphs:
2220-
self.file._character_tracker.track_glyph(font, num)
2215+
for font, fontsize, glyph_index, ox, oy in glyphs:
2216+
self.file._character_tracker.track_glyph(font, glyph_index)
22212217
fontname = font.fname
2222-
if not _font_supports_glyph(fonttype, num):
2218+
if font not in font_charmaps:
2219+
font_charmaps[font] = {gind: ccode
2220+
for ccode, gind in font.get_charmap().items()}
2221+
ccode = font_charmaps[font].get(glyph_index)
2222+
if ccode is None or not _font_supports_glyph(fonttype, ccode):
22232223
# Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
22242224
# Type 42) must be emitted separately (below).
2225-
unsupported_chars.append((font, fontsize, ox, oy, num))
2225+
unsupported_chars.append((font, fontsize, ox, oy, glyph_index))
22262226
else:
22272227
self._setup_textpos(ox, oy, 0, oldx, oldy)
22282228
oldx, oldy = ox, oy
22292229
if (fontname, fontsize) != prev_font:
22302230
self.file.output(self.file.fontName(fontname), fontsize,
22312231
Op.selectfont)
22322232
prev_font = fontname, fontsize
2233-
self.file.output(self.encode_string(chr(num), fonttype),
2233+
self.file.output(self.encode_string(chr(ccode), fonttype),
22342234
Op.show)
22352235
self.file.output(Op.end_text)
22362236

2237-
for font, fontsize, ox, oy, num in unsupported_chars:
2238-
self._draw_xobject_glyph(
2239-
font, fontsize, font.get_char_index(num), ox, oy)
2237+
for font, fontsize, ox, oy, glyph_index in unsupported_chars:
2238+
self._draw_xobject_glyph(font, fontsize, glyph_index, ox, oy)
22402239

22412240
# Draw any horizontal lines in the math layout
22422241
for ox, oy, width, height in rects:
@@ -2399,7 +2398,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
23992398
singlebyte_chunks[-1][2].append(item.char)
24002399
prev_was_multibyte = False
24012400
else:
2402-
multibyte_glyphs.append((item.ft_object, item.x, item.glyph_idx))
2401+
multibyte_glyphs.append((item.ft_object, item.x, item.glyph_index))
24032402
prev_was_multibyte = True
24042403
# Do the rotation and global translation as a single matrix
24052404
# concatenation up front
@@ -2409,7 +2408,6 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
24092408
-math.sin(a), math.cos(a),
24102409
x, y, Op.concat_matrix)
24112410
# Emit all the 1-byte characters in a BT/ET group.
2412-
24132411
self.file.output(Op.begin_text)
24142412
prev_start_x = 0
24152413
for ft_object, start_x, kerns_or_chars in singlebyte_chunks:

0 commit comments

Comments
 (0)