diff --git a/Makefile b/Makefile
index 7f6835769..f5811672a 100644
--- a/Makefile
+++ b/Makefile
@@ -28,7 +28,8 @@ ZOPFLIPNG = zopflipng
OPTIPNG = optipng
EMOJI_BUILDER = third_party/color_emoji/emoji_builder.py
-ADD_GLYPHS = third_party/color_emoji/add_glyphs.py
+ADD_GLYPHS = add_glyphs.py
+ADD_GLYPHS_FLAGS = -a emoji_aliases.txt
PUA_ADDER = map_pua_emoji.py
VS_ADDER = add_vs_cmap.py # from nototools
@@ -193,7 +194,7 @@ endif
# Run make without -j if this happens.
%.ttx: %.ttx.tmpl $(ADD_GLYPHS) $(ALL_COMPRESSED_FILES)
- @python $(ADD_GLYPHS) "$<" "$@" "$(COMPRESSED_DIR)/emoji_u"
+ @python $(ADD_GLYPHS) -f "$<" -o "$@" -d "$(COMPRESSED_DIR)" $(ADD_GLYPHS_FLAGS)
%.ttf: %.ttx
@rm -f "$@"
diff --git a/NotoColorEmoji.tmpl.ttx.tmpl b/NotoColorEmoji.tmpl.ttx.tmpl
index a2ae62b7e..fffdc03e9 100644
--- a/NotoColorEmoji.tmpl.ttx.tmpl
+++ b/NotoColorEmoji.tmpl.ttx.tmpl
@@ -7,7 +7,7 @@
-
+
@@ -191,72 +191,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -265,72 +199,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/add_aliases.py b/add_aliases.py
index d4af6a05f..fd523b278 100755
--- a/add_aliases.py
+++ b/add_aliases.py
@@ -36,9 +36,14 @@ def seq_to_str(seq):
def read_emoji_aliases():
+ alias_path = path.join(DATA_ROOT, 'emoji_aliases.txt')
+ return read_emoji_aliases(alias_path)
+
+
+def read_emoji_aliases(filename):
result = {}
- with open(path.join(DATA_ROOT, 'emoji_aliases.txt'), 'r') as f:
+ with open(filename, 'r') as f:
for line in f:
ix = line.find('#')
if (ix > -1):
@@ -47,8 +52,8 @@ def read_emoji_aliases():
if not line:
continue
als, trg = (s.strip() for s in line.split(';'))
- als_seq = tuple([int(x, 16) for x in als.split('_')])
try:
+ als_seq = tuple([int(x, 16) for x in als.split('_')])
trg_seq = tuple([int(x, 16) for x in trg.split('_')])
except:
print 'cannot process alias %s -> %s' % (als, trg)
diff --git a/add_glyphs.py b/add_glyphs.py
new file mode 100644
index 000000000..f4d8d0f83
--- /dev/null
+++ b/add_glyphs.py
@@ -0,0 +1,403 @@
+#!/usr/bin/env python
+
+"""Extend a ttx file with additional data.
+
+Takes a ttx file and one or more directories containing image files named
+after sequences of codepoints, extends the cmap, hmtx, GSUB, and GlyphOrder
+tables in the source ttx file based on these sequences, and writes out a new
+ttx file.
+
+This can also apply aliases from an alias file."""
+
+import argparse
+import collections
+import os
+from os import path
+import re
+import sys
+
+from fontTools import ttx
+from fontTools.ttLib.tables import otTables
+
+import add_emoji_gsub
+import add_aliases
+
+sys.path.append(
+ path.join(os.path.dirname(__file__), 'third_party', 'color_emoji'))
+from png import PNG
+
+
+def get_seq_to_file(image_dir, prefix, suffix):
+ """Return a mapping from codepoint sequences to files in the given directory,
+ for files that match the prefix and suffix. File names with this prefix and
+ suffix should consist of codepoints in hex separated by underscore. 'fe0f'
+ (the codepoint of the emoji presentation variation selector) is stripped from
+ the sequence.
+ """
+ start = len(prefix)
+ limit = -len(suffix)
+ seq_to_file = {}
+ for name in os.listdir(image_dir):
+ if not (name.startswith(prefix) and name.endswith(suffix)):
+ continue
+ try:
+ cps = [int(s, 16) for s in name[start:limit].split('_')]
+ seq = tuple(cp for cp in cps if cp != 0xfe0f)
+ except:
+ raise Exception('could not parse "%s"' % name)
+ for cp in cps:
+ if not (0 <= cp <= 0x10ffff):
+ raise Exception('bad codepoint(s) in "%s"' % name)
+ if seq in seq_to_file:
+ raise Exception('duplicate sequence for "%s" in %s' % (name, image_dir))
+ seq_to_file[seq] = path.join(image_dir, name)
+ return seq_to_file
+
+
+def collect_seq_to_file(image_dirs, prefix, suffix):
+ """Return a sequence to file mapping by calling get_seq_to_file on a list
+ of directories. When sequences for files in later directories match those
+ from earlier directories, the later file replaces the earlier one.
+ """
+ seq_to_file = {}
+ for image_dir in image_dirs:
+ seq_to_file.update(get_seq_to_file(image_dir, prefix, suffix))
+ return seq_to_file
+
+
+def remap_values(seq_to_file, map_fn):
+ return {k: map_fn(v) for k, v in seq_to_file.iteritems()}
+
+
+def get_png_file_to_advance_mapper(lineheight):
+ def map_fn(filename):
+ wid, ht = PNG(filename).get_size()
+ return int(round(float(lineheight) * wid / ht))
+ return map_fn
+
+
+def cp_name(cp):
+ """return uniXXXX or uXXXXX(X) as a name for the glyph mapped to this cp."""
+ return '%s%04X' % ('u' if cp > 0xffff else 'uni', cp)
+
+
+def seq_name(seq):
+ """Sequences of length one get the cp_name. Others start with 'u' followed by
+ two or more 4-to-6-digit hex strings separated by underscore."""
+ if len(seq) == 1:
+ return cp_name(seq[0])
+ return 'u' + '_'.join('%04X' % cp for cp in seq)
+
+
+def collect_cps(seqs):
+ cps = set()
+ for seq in seqs:
+ cps.update(seq)
+ return cps
+
+
+def get_glyphorder_cps_and_truncate(glyphOrder):
+ """This scans glyphOrder for names that correspond to a single codepoint
+ using the 'u(ni)XXXXXX' syntax. All names that don't match are moved
+ to the front the glyphOrder list in their original order, and the
+ list is truncated. The ones that do match are returned as a set of
+ codepoints."""
+ glyph_name_re = re.compile(r'^u(?:ni)?([0-9a-fA-F]{4,6})$')
+ cps = set()
+ write_ix = 0
+ for ix, name in enumerate(glyphOrder):
+ m = glyph_name_re.match(name)
+ if m:
+ cps.add(int(m.group(1), 16))
+ else:
+ glyphOrder[write_ix] = name
+ write_ix += 1
+ del glyphOrder[write_ix:]
+ return cps
+
+
+def get_all_seqs(font, seq_to_advance):
+ """Copies the sequences from seq_to_advance and extends it with single-
+ codepoint sequences from the GlyphOrder table as well as those internal
+ to sequences in seq_to_advance. Reduces the GlyphOrder table. """
+
+ all_seqs = set(seq_to_advance.keys())
+ # using collect_cps includes cps internal to a seq
+ cps = collect_cps(all_seqs)
+ glyphOrder = font.getGlyphOrder()
+ # extract cps in glyphOrder and reduce glyphOrder to only those that remain
+ glyphOrder_cps = get_glyphorder_cps_and_truncate(glyphOrder)
+ cps.update(glyphOrder_cps)
+ # add new single codepoint sequences from glyphOrder and sequences
+ all_seqs.update((cp,) for cp in cps)
+ return all_seqs
+
+
+def get_font_cmap(font):
+ """Return the first cmap in the font, we assume it exists and is a unicode
+ cmap."""
+ return font['cmap'].tables[0].cmap
+
+
+def add_glyph_data(font, seqs, seq_to_advance):
+ """Add hmtx and GlyphOrder data for all sequences in seqs, and ensures there's
+ a cmap entry for each single-codepoint sequence. Seqs not in seq_to_advance
+ will get a zero advance."""
+
+ # We allow the template cmap to omit mappings for single-codepoint glyphs
+ # defined in the template's GlyphOrder table. Similarly, the hmtx table can
+ # omit advances. We assume glyphs named 'uniXXXX' or 'uXXXXX(X)' in the
+ # GlyphOrder table correspond to codepoints based on the name; we don't
+ # attempt to handle other types of names and these must occur in the cmap and
+ # hmtx tables in the template.
+ #
+ # seq_to_advance maps sequences (including single codepoints) to advances.
+ # All codepoints in these sequences will be added to the cmap. Some cps
+ # in these sequences have no corresponding single-codepoint sequence, they
+ # will also get added.
+ #
+ # The added codepoints have no advance information, so will get a zero
+ # advance.
+
+ cmap = get_font_cmap(font)
+ hmtx = font['hmtx'].metrics
+
+ # We don't expect sequences to be in the glyphOrder, since we removed all the
+ # single-cp sequences from it and don't expect it to already contain names
+ # corresponding to multiple-cp sequencess. But just in case, we use
+ # reverseGlyphMap to avoid duplicating names accidentally.
+
+ updatedGlyphOrder = False
+ reverseGlyphMap = font.getReverseGlyphMap()
+
+ # Order the glyphs by grouping all the single-codepoint sequences first,
+ # then order by sequence so that related sequences are together. We group
+ # by single-codepoint sequence first in order to keep these glyphs together--
+ # they're used in the coverage tables for some of the substitutions, and
+ # those tables can be more compact this way.
+ for seq in sorted(seqs, key=lambda s: (0 if len(s) == 1 else 1, s)):
+ name = seq_name(seq)
+ if len(seq) == 1:
+ cmap[seq[0]] = name
+ advance = seq_to_advance.get(seq, 0)
+ hmtx[name] = [advance, 0]
+ if name not in reverseGlyphMap:
+ font.glyphOrder.append(name)
+ updatedGlyphOrder=True
+
+ if updatedGlyphOrder:
+ delattr(font, '_reverseGlyphOrderDict')
+
+
+def add_aliases_to_cmap(font, aliases):
+ """Some aliases might map a single codepoint to some other sequence. These
+ should map directly to the glyph for that sequence in the cmap. (Others will
+ map via GSUB).
+ """
+ if not aliases:
+ return
+
+ cp_aliases = [seq for seq in aliases if len(seq) == 1]
+ if not cp_aliases:
+ return
+
+ cmap = get_font_cmap(font)
+ for src_seq in cp_aliases:
+ cp = src_seq[0]
+ name = seq_name(aliases[src_seq])
+ cmap[cp] = name
+
+
+def get_rtl_seq(seq):
+ """Return the rtl variant of the sequence, if it has one, else the empty
+ sequence.
+ """
+ # Sequences with ZWJ or TAG_END in them will reflect. Fitzpatrick modifiers
+ # however do not, so if we reflect we make a pass to swap them back into their
+ # logical order.
+
+ ZWJ = 0x200d
+ TAG_END = 0xe007f
+ def is_fitzpatrick(cp):
+ return 0x1f3fb <= cp <= 0x1f3ff
+
+ if not (ZWJ in seq or TAG_END in seq):
+ return ()
+
+ rev_seq = list(seq)
+ rev_seq.reverse()
+ for i in xrange(1, len(rev_seq)):
+ if is_fitzpatrick(rev_seq[i-1]):
+ tmp = rev_seq[i]
+ rev_seq[i] = rev_seq[i-1]
+ rev_seq[i-1] = tmp
+ return tuple(rev_seq)
+
+
+def get_gsub_ligature_lookup(font):
+ """If the font does not have a GSUB table, create one with a ligature
+ substitution lookup. If it does, ensure the first lookup is a properly
+ initialized ligature substitution lookup. Return the lookup."""
+
+ # The template might include more lookups after lookup 0, if it has a
+ # GSUB table.
+ if 'GSUB' not in font:
+ ligature_subst = otTables.LigatureSubst()
+ ligature_subst.ligatures = {}
+
+ lookup = otTables.Lookup()
+ lookup.LookupType = 4
+ lookup.LookupFlag = 0
+ lookup.SubTableCount = 1
+ lookup.SubTable = [ligature_subst]
+
+ font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
+ else:
+ lookup = font['GSUB'].table.LookupList.Lookup[0]
+ assert lookup.LookupFlag == 0
+
+ # importXML doesn't fully init GSUB structures, so help it out
+ if not hasattr(lookup, 'LookupType'):
+ st = lookup.SubTable[0]
+ assert st.LookupType == 4
+ setattr(lookup, 'LookupType', 4)
+
+ if not hasattr(st, 'ligatures'):
+ setattr(st, 'ligatures', {})
+
+ return lookup
+
+
+def add_ligature_sequences(font, seqs, aliases):
+ """Add ligature sequences."""
+
+ seq_to_target_name = {
+ seq: seq_name(seq) for seq in seqs if len(seq) > 1}
+ if aliases:
+ seq_to_target_name.update({
+ seq: seq_name(aliases[seq]) for seq in aliases if len(seq) > 1})
+ if not seq_to_target_name:
+ return
+
+ rtl_seq_to_target_name = {
+ get_rtl_seq(seq): name for seq, name in seq_to_target_name.iteritems()}
+ seq_to_target_name.update(rtl_seq_to_target_name)
+ # sequences that don't have rtl variants get mapped to the empty sequence,
+ # delete it.
+ if () in seq_to_target_name:
+ del seq_to_target_name[()]
+
+ # organize by first codepoint in sequence
+ keyed_ligatures = collections.defaultdict(list)
+ for t in seq_to_target_name.iteritems():
+ first_cp = t[0][0]
+ keyed_ligatures[first_cp].append(t)
+
+ def add_ligature(lookup, cmap, seq, name):
+ # The sequences consist of codepoints, but the entries in the ligature table
+ # are glyph names. Aliasing can give single codepoints names based on
+ # sequences (e.g. 'guardsman' with 'male guardsman') so we map the
+ # codepoints through the cmap to get the glyph names.
+ glyph_names = [cmap[cp] for cp in seq]
+
+ lig = otTables.Ligature()
+ lig.CompCount = len(seq)
+ lig.Component = glyph_names[1:]
+ lig.LigGlyph = name
+
+ ligatures = lookup.SubTable[0].ligatures
+ first_name = glyph_names[0]
+ try:
+ ligatures[first_name].append(lig)
+ except KeyError:
+ ligatures[first_name] = [lig]
+
+ lookup = get_gsub_ligature_lookup(font)
+ cmap = get_font_cmap(font)
+ for first_cp in sorted(keyed_ligatures):
+ pairs = keyed_ligatures[first_cp]
+
+ # Sort longest first, this ensures longer sequences with common prefixes
+ # are handled before shorter ones. The secondary sort is a standard
+ # sort on the codepoints in the sequence.
+ pairs.sort(key = lambda pair: (-len(pair[0]), pair[0]))
+ for seq, name in pairs:
+ add_ligature(lookup, cmap, seq, name)
+
+
+def update_font_data(font, seq_to_advance, aliases):
+ """Update the font's cmap, hmtx, GSUB, and GlyphOrder tables."""
+ seqs = get_all_seqs(font, seq_to_advance)
+ add_glyph_data(font, seqs, seq_to_advance)
+ add_aliases_to_cmap(font, aliases)
+ add_ligature_sequences(font, seqs, aliases)
+
+
+def apply_aliases(seq_dict, aliases):
+ """Aliases is a mapping from sequence to replacement sequence. We can use
+ an alias if the target is a key in the dictionary. Furthermore, if the
+ source is a key in the dictionary, we can delete it. This updates the
+ dictionary and returns the usable aliases."""
+ usable_aliases = {}
+ for k, v in aliases.iteritems():
+ if v in seq_dict:
+ usable_aliases[k] = v
+ if k in seq_dict:
+ del seq_dict[k]
+ return usable_aliases
+
+
+def update_ttx(in_file, out_file, image_dirs, prefix, ext, aliases_file):
+ if ext != '.png':
+ raise Exception('extension "%s" not supported' % ext)
+
+ seq_to_file = collect_seq_to_file(image_dirs, prefix, ext)
+ if not seq_to_file:
+ raise ValueError(
+ 'no sequences with prefix "%s" and extension "%s" in %s' % (
+ prefix, ext, ', '.join(image_dirs)))
+
+ aliases = None
+ if aliases_file:
+ aliases = add_aliases.read_emoji_aliases(aliases_file)
+ aliases = apply_aliases(seq_to_file, aliases)
+
+ font = ttx.TTFont()
+ font.importXML(in_file)
+
+ lineheight = font['hhea'].ascent - font['hhea'].descent
+ map_fn = get_png_file_to_advance_mapper(lineheight)
+ seq_to_advance = remap_values(seq_to_file, map_fn)
+
+ update_font_data(font, seq_to_advance, aliases)
+
+ font.saveXML(out_file)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '-f', '--in_file', help='ttx input file', metavar='file', required=True)
+ parser.add_argument(
+ '-o', '--out_file', help='ttx output file', metavar='file', required=True)
+ parser.add_argument(
+ '-d', '--image_dirs', help='directories containing image files',
+ nargs='+', metavar='dir', required=True)
+ parser.add_argument(
+ '-p', '--prefix', help='file prefix (default "emoji_u")',
+ metavar='pfx', default='emoji_u')
+ parser.add_argument(
+ '-e', '--ext', help='file extension (default ".png", currently only '
+ '".png" is supported', metavar='ext', default='.png')
+ parser.add_argument(
+ '-a', '--aliases', help='process alias table', const='emoji_aliases.txt',
+ nargs='?', metavar='file')
+ args = parser.parse_args()
+
+ update_ttx(
+ args.in_file, args.out_file, args.image_dirs, args.prefix, args.ext,
+ args.aliases)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/emoji_aliases.txt b/emoji_aliases.txt
index 40e555324..56a59b299 100644
--- a/emoji_aliases.txt
+++ b/emoji_aliases.txt
@@ -1,4 +1,6 @@
# alias table
+# from;to
+# the 'from' sequence should be represented by the image for the 'to' sequence
# 'fe0f' is not in these sequences
1f3c3;1f3c3_200d_2642 # RUNNER -> man running
1f3c3_1f3fb;1f3c3_1f3fb_200d_2642 # light skin tone
@@ -192,3 +194,10 @@
26f9_1f3fe;26f9_1f3fe_200d_2642 # medium-dark skin tone
26f9_1f3ff;26f9_1f3ff_200d_2642 # dark skin tone
fe82b;unknown_flag # no name -> no name
+
+# flag aliases
+1f1e7_1f1fb;1f1f3_1f1f4 # BV -> NO
+1f1e8_1f1f5;1f1eb_1f1f7 # CP -> FR
+1f1ed_1f1f2;1f1e6_1f1fa # HM -> AU
+1f1f8_1f1ef;1f1f3_1f1f4 # SJ -> NO
+1f1fa_1f1f2;1f1fa_1f1f8 # UM -> US
diff --git a/third_party/color_emoji/add_glyphs.py b/third_party/color_emoji/add_glyphs.py
deleted file mode 100644
index 2bbf41f5a..000000000
--- a/third_party/color_emoji/add_glyphs.py
+++ /dev/null
@@ -1,282 +0,0 @@
-#!/usr/bin/env python
-
-import collections, glob, os, re, sys
-from fontTools import ttx
-from fontTools.ttLib.tables import otTables
-from png import PNG
-
-# PUA character for unknown flag. This avoids the legacy emoji pua values, but
-# is in the same area.
-UNKNOWN_FLAG_GLYPH_NAME = "uFE82B"
-
-sys.path.append(
- os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))
-import add_emoji_gsub
-
-
-def is_vs(cp):
- return cp >= 0xfe00 and cp <= 0xfe0f
-
-def is_fitzpatrick(gname):
- cp = int(gname[1:], 16)
- return 0x1f3fb <= cp <= 0x1f3ff
-
-def codes_to_string(codes):
- if "_" in codes:
- pieces = codes.split ("_")
- string = "".join ([unichr (int (code, 16)) for code in pieces])
- else:
- try:
- string = unichr (int (codes, 16))
- except:
- raise ValueError("uh-oh, no unichr for '%s'" % codes)
- return string
-
-
-def glyph_sequence(string):
- # sequence of names of glyphs that form a ligature
- # variation selectors are stripped
- return ["u%04X" % ord(char) for char in string if not is_vs(ord(char))]
-
-
-def glyph_name(string):
- # name of a ligature
- # includes variation selectors when present
- return "_".join (["u%04X" % ord (char) for char in string])
-
-
-def add_ligature (font, seq, name):
- if 'GSUB' not in font:
- ligature_subst = otTables.LigatureSubst()
- ligature_subst.ligatures = {}
-
- lookup = otTables.Lookup()
- lookup.LookupType = 4
- lookup.LookupFlag = 0
- lookup.SubTableCount = 1
- lookup.SubTable = [ligature_subst]
-
- font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
- else:
- lookup = font['GSUB'].table.LookupList.Lookup[0]
- # assert lookup.LookupType == 4
- assert lookup.LookupFlag == 0
-
- # importXML doesn't fully init GSUB structures, so help it out
- if not hasattr(lookup, 'LookupType'):
- st = lookup.SubTable[0]
- assert st.LookupType == 4
- setattr(lookup, 'LookupType', 4)
-
- if not hasattr(st, 'ligatures'):
- setattr(st, 'ligatures', {})
-
- ligatures = lookup.SubTable[0].ligatures
-
- lig = otTables.Ligature()
- lig.CompCount = len(seq)
- lig.Component = seq[1:]
- lig.LigGlyph = name
-
- first = seq[0]
- try:
- ligatures[first].append(lig)
- except KeyError:
- ligatures[first] = [lig]
-
-
-# Ligating sequences for emoji that already have a defined codepoint,
-# to match the sequences for the related emoji with no codepoint.
-# The key is the name of the glyph with the codepoint, the value is the
-# name of the sequence in filename form.
-EXTRA_SEQUENCES = {
- 'u1F46A': '1F468_200D_1F469_200D_1F466', # MWB
- 'u1F491': '1F469_200D_2764_FE0F_200D_1F468', # WHM
- 'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM
-}
-
-# Flag aliases - from: to
-FLAG_ALIASES = {
- 'BV': 'NO',
- 'CP': 'FR',
- 'HM': 'AU',
- 'SJ': 'NO',
- 'UM': 'US',
-}
-
-if len (sys.argv) < 4:
- print >>sys.stderr, """
-Usage:
-
-add_glyphs.py font.ttx out-font.ttx strike-prefix...
-
-This will search for files that have strike-prefix followed by one or more
-hex numbers (separated by underscore if more than one), and end in ".png".
-For example, if strike-prefix is "icons/u", then files with names like
-"icons/u1F4A9.png" or "icons/u1F1EF_1F1F5.png" will be loaded. The script
-then adds cmap, htmx, and potentially GSUB entries for the Unicode
-characters found. The advance width will be chosen based on image aspect
-ratio. If Unicode values outside the BMP are desired, the existing cmap
-table should be of the appropriate (format 12) type. Only the first cmap
-table and the first GSUB lookup (if existing) are modified.
-"""
- sys.exit (1)
-
-in_file = sys.argv[1]
-out_file = sys.argv[2]
-img_prefixen = sys.argv[3:]
-del sys.argv
-
-font = ttx.TTFont()
-font.importXML (in_file)
-
-img_files = {}
-for img_prefix in img_prefixen:
- glb = "%s*.png" % img_prefix
- print "Looking for images matching '%s'." % glb
- for img_file in glob.glob (glb):
- codes = img_file[len (img_prefix):-4]
- u = codes_to_string(codes)
- if u in img_files:
- print 'overwriting %s with %s' % (img_files[u], img_file)
- img_files[u] = img_file
-if not img_files:
- raise Exception ("No image files found in '%s'." % glb)
-
-ascent = font['hhea'].ascent
-descent = -font['hhea'].descent
-
-g = font['GlyphOrder'].glyphOrder
-c = font['cmap'].tables[0].cmap
-h = font['hmtx'].metrics
-
-# Sort the characters by length, then codepoint, to keep the order stable
-# and avoid adding empty glyphs for multi-character glyphs if any piece is
-# also included.
-img_pairs = img_files.items ()
-img_pairs.sort (key=lambda pair: (len (pair[0]), pair[0]))
-
-glyph_names = set()
-ligatures = {}
-
-def add_lig_sequence(ligatures, seq, n):
- # We have emoji sequences using regional indicator symbols, tags,
- # ZWJ, fitzpatrick modifiers, and combinations of ZWJ and fitzpatrick
- # modifiers. Currently, Harfbuzz special-cases the fitzpatrick
- # modifiers to treat them as combining marks instead of as Other
- # Neutral, which unicode says they are, and processes them
- # in visual order (at least in some circumstances). So to handle
- # emoji sequences in an RTL context we need GSUB sequences that match
- # this order.
- # Regional indicator symbols are LTR, and emoji+fitzpatrick are
- # effectively LTR, so we only reorder sequences with ZWJ or tags. If
- # however the ZWJ sequence has fitzpatrick modifiers, those need to
- # still follow the emoji they logically follow, so simply reversing the
- # sequence doesn't work. This code assumes the lig sequence is valid.
- tseq = tuple(seq)
- if tseq in ligatures:
- print 'lig sequence %s, replace %s with %s' % (
- tseq, ligatures[tseq], n)
- ligatures[tseq] = n
- if 'u200D' in seq or 'uE007F' in seq:
- rev_seq = seq[:]
- rev_seq.reverse()
- for i in xrange(1, len(rev_seq)):
- if is_fitzpatrick(rev_seq[i - 1]):
- tmp = rev_seq[i]
- rev_seq[i] = rev_seq[i-1]
- rev_seq[i-1] = tmp
-
- trseq = tuple(rev_seq)
- # if trseq in ligatures:
- # print 'rev lig sequence %s, replace %s with %s' % (
- # trseq, ligatures[trseq], n)
- ligatures[trseq] = n
-
-
-for (u, filename) in img_pairs:
- n = glyph_name (u)
- glyph_names.add(n)
- # print "Adding glyph for %s" % n
-
- g.append (n)
- for char in u:
- cp = ord(char)
- if cp not in c and not is_vs(cp):
- name = glyph_name (char)
- if name not in glyph_names:
- g.append(name)
- c[cp] = name
- if len (u) > 1:
- h[name] = [0, 0]
- (img_width, img_height) = PNG (filename).get_size ()
- advance = int (round ((float (ascent+descent) * img_width / img_height)))
- h[n] = [advance, 0]
- if len (u) > 1:
- seq = glyph_sequence(u)
- add_lig_sequence(ligatures, seq, n)
-
-for n in EXTRA_SEQUENCES:
- if n in glyph_names:
- seq = glyph_sequence(codes_to_string(EXTRA_SEQUENCES[n]))
- add_lig_sequence(ligatures, seq, n)
- else:
- print 'extras: no glyph for %s' % n
-
-# Add missing regional indicator sequences and flag aliases
-# if we support any.
-regional_names = frozenset('u%X' % cp for cp in range(0x1F1E6, 0x1F200))
-
-def _is_flag_sequence(t):
- return len(t) == 2 and t[0] in regional_names and t[1] in regional_names
-
-have_flags = False
-for k in ligatures:
- if _is_flag_sequence(k):
- have_flags = True
- break
-
-if have_flags and UNKNOWN_FLAG_GLYPH_NAME not in glyph_names:
- raise ValueError(
- 'Have flags but no unknown flag glyph "%s"' % UNKNOWN_FLAG_GLYPH_NAME)
-
-# sigh, too many separate files with the same code.
-# copied from add_emoji_gsub.
-def _reg_indicator(letter):
- assert 'A' <= letter <= 'Z'
- return 0x1F1E6 + ord(letter) - ord('A')
-
-def _reg_lig_sequence(flag_name):
- """Returns a tuple of strings naming the codepoints that form the ligature."""
- assert len(flag_name) == 2
- return tuple('u%X' % _reg_indicator(cp) for cp in flag_name)
-
-def _reg_lig_name(flag_name):
- """Returns a glyph name for the flag name."""
- return '_'.join(_reg_lig_sequence(flag_name))
-
-if have_flags:
- print 'Adding flag aliases.'
- for flag_from, flag_to in FLAG_ALIASES.iteritems():
- seq = _reg_lig_sequence(flag_from)
- name = _reg_lig_name(flag_to)
- add_lig_sequence(ligatures, seq, name)
-
-keyed_ligatures = collections.defaultdict(list)
-for k, v in ligatures.iteritems():
- first = k[0]
- keyed_ligatures[first].append((k, v))
-
-for base in sorted(keyed_ligatures):
- pairs = keyed_ligatures[base]
- # print 'base %s has %d sequences' % (base, len(pairs))
-
- # Sort longest first, this ensures longer sequences with common prefixes
- # are handled before shorter ones. It would be better to have multiple
- # lookups, most likely.
- pairs.sort(key = lambda pair: (len(pair[0]), pair[0]), reverse=True)
- for seq, name in pairs:
- # print seq, name
- add_ligature(font, seq, name)
-
-font.saveXML (out_file)