diff --git a/Makefile b/Makefile index 7f6835769..f5811672a 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,8 @@ ZOPFLIPNG = zopflipng OPTIPNG = optipng EMOJI_BUILDER = third_party/color_emoji/emoji_builder.py -ADD_GLYPHS = third_party/color_emoji/add_glyphs.py +ADD_GLYPHS = add_glyphs.py +ADD_GLYPHS_FLAGS = -a emoji_aliases.txt PUA_ADDER = map_pua_emoji.py VS_ADDER = add_vs_cmap.py # from nototools @@ -193,7 +194,7 @@ endif # Run make without -j if this happens. %.ttx: %.ttx.tmpl $(ADD_GLYPHS) $(ALL_COMPRESSED_FILES) - @python $(ADD_GLYPHS) "$<" "$@" "$(COMPRESSED_DIR)/emoji_u" + @python $(ADD_GLYPHS) -f "$<" -o "$@" -d "$(COMPRESSED_DIR)" $(ADD_GLYPHS_FLAGS) %.ttf: %.ttx @rm -f "$@" diff --git a/NotoColorEmoji.tmpl.ttx.tmpl b/NotoColorEmoji.tmpl.ttx.tmpl index a2ae62b7e..fffdc03e9 100644 --- a/NotoColorEmoji.tmpl.ttx.tmpl +++ b/NotoColorEmoji.tmpl.ttx.tmpl @@ -7,7 +7,7 @@ - + @@ -191,72 +191,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -265,72 +199,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/add_aliases.py b/add_aliases.py index d4af6a05f..fd523b278 100755 --- a/add_aliases.py +++ b/add_aliases.py @@ -36,9 +36,14 @@ def seq_to_str(seq): def read_emoji_aliases(): + alias_path = path.join(DATA_ROOT, 'emoji_aliases.txt') + return read_emoji_aliases(alias_path) + + +def read_emoji_aliases(filename): result = {} - with open(path.join(DATA_ROOT, 'emoji_aliases.txt'), 'r') as f: + with open(filename, 'r') as f: for line in f: ix = line.find('#') if (ix > -1): @@ -47,8 +52,8 @@ def read_emoji_aliases(): if not line: continue als, trg = (s.strip() for s in line.split(';')) - als_seq = tuple([int(x, 16) for x in als.split('_')]) try: + als_seq = tuple([int(x, 16) for x in als.split('_')]) trg_seq = tuple([int(x, 16) for x in trg.split('_')]) except: print 'cannot process alias %s -> %s' % (als, trg) diff --git a/add_glyphs.py b/add_glyphs.py new file mode 100644 index 000000000..f4d8d0f83 --- /dev/null +++ b/add_glyphs.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python + +"""Extend a ttx file with additional data. + +Takes a ttx file and one or more directories containing image files named +after sequences of codepoints, extends the cmap, hmtx, GSUB, and GlyphOrder +tables in the source ttx file based on these sequences, and writes out a new +ttx file. + +This can also apply aliases from an alias file.""" + +import argparse +import collections +import os +from os import path +import re +import sys + +from fontTools import ttx +from fontTools.ttLib.tables import otTables + +import add_emoji_gsub +import add_aliases + +sys.path.append( + path.join(os.path.dirname(__file__), 'third_party', 'color_emoji')) +from png import PNG + + +def get_seq_to_file(image_dir, prefix, suffix): + """Return a mapping from codepoint sequences to files in the given directory, + for files that match the prefix and suffix. File names with this prefix and + suffix should consist of codepoints in hex separated by underscore. 'fe0f' + (the codepoint of the emoji presentation variation selector) is stripped from + the sequence. + """ + start = len(prefix) + limit = -len(suffix) + seq_to_file = {} + for name in os.listdir(image_dir): + if not (name.startswith(prefix) and name.endswith(suffix)): + continue + try: + cps = [int(s, 16) for s in name[start:limit].split('_')] + seq = tuple(cp for cp in cps if cp != 0xfe0f) + except: + raise Exception('could not parse "%s"' % name) + for cp in cps: + if not (0 <= cp <= 0x10ffff): + raise Exception('bad codepoint(s) in "%s"' % name) + if seq in seq_to_file: + raise Exception('duplicate sequence for "%s" in %s' % (name, image_dir)) + seq_to_file[seq] = path.join(image_dir, name) + return seq_to_file + + +def collect_seq_to_file(image_dirs, prefix, suffix): + """Return a sequence to file mapping by calling get_seq_to_file on a list + of directories. When sequences for files in later directories match those + from earlier directories, the later file replaces the earlier one. + """ + seq_to_file = {} + for image_dir in image_dirs: + seq_to_file.update(get_seq_to_file(image_dir, prefix, suffix)) + return seq_to_file + + +def remap_values(seq_to_file, map_fn): + return {k: map_fn(v) for k, v in seq_to_file.iteritems()} + + +def get_png_file_to_advance_mapper(lineheight): + def map_fn(filename): + wid, ht = PNG(filename).get_size() + return int(round(float(lineheight) * wid / ht)) + return map_fn + + +def cp_name(cp): + """return uniXXXX or uXXXXX(X) as a name for the glyph mapped to this cp.""" + return '%s%04X' % ('u' if cp > 0xffff else 'uni', cp) + + +def seq_name(seq): + """Sequences of length one get the cp_name. Others start with 'u' followed by + two or more 4-to-6-digit hex strings separated by underscore.""" + if len(seq) == 1: + return cp_name(seq[0]) + return 'u' + '_'.join('%04X' % cp for cp in seq) + + +def collect_cps(seqs): + cps = set() + for seq in seqs: + cps.update(seq) + return cps + + +def get_glyphorder_cps_and_truncate(glyphOrder): + """This scans glyphOrder for names that correspond to a single codepoint + using the 'u(ni)XXXXXX' syntax. All names that don't match are moved + to the front the glyphOrder list in their original order, and the + list is truncated. The ones that do match are returned as a set of + codepoints.""" + glyph_name_re = re.compile(r'^u(?:ni)?([0-9a-fA-F]{4,6})$') + cps = set() + write_ix = 0 + for ix, name in enumerate(glyphOrder): + m = glyph_name_re.match(name) + if m: + cps.add(int(m.group(1), 16)) + else: + glyphOrder[write_ix] = name + write_ix += 1 + del glyphOrder[write_ix:] + return cps + + +def get_all_seqs(font, seq_to_advance): + """Copies the sequences from seq_to_advance and extends it with single- + codepoint sequences from the GlyphOrder table as well as those internal + to sequences in seq_to_advance. Reduces the GlyphOrder table. """ + + all_seqs = set(seq_to_advance.keys()) + # using collect_cps includes cps internal to a seq + cps = collect_cps(all_seqs) + glyphOrder = font.getGlyphOrder() + # extract cps in glyphOrder and reduce glyphOrder to only those that remain + glyphOrder_cps = get_glyphorder_cps_and_truncate(glyphOrder) + cps.update(glyphOrder_cps) + # add new single codepoint sequences from glyphOrder and sequences + all_seqs.update((cp,) for cp in cps) + return all_seqs + + +def get_font_cmap(font): + """Return the first cmap in the font, we assume it exists and is a unicode + cmap.""" + return font['cmap'].tables[0].cmap + + +def add_glyph_data(font, seqs, seq_to_advance): + """Add hmtx and GlyphOrder data for all sequences in seqs, and ensures there's + a cmap entry for each single-codepoint sequence. Seqs not in seq_to_advance + will get a zero advance.""" + + # We allow the template cmap to omit mappings for single-codepoint glyphs + # defined in the template's GlyphOrder table. Similarly, the hmtx table can + # omit advances. We assume glyphs named 'uniXXXX' or 'uXXXXX(X)' in the + # GlyphOrder table correspond to codepoints based on the name; we don't + # attempt to handle other types of names and these must occur in the cmap and + # hmtx tables in the template. + # + # seq_to_advance maps sequences (including single codepoints) to advances. + # All codepoints in these sequences will be added to the cmap. Some cps + # in these sequences have no corresponding single-codepoint sequence, they + # will also get added. + # + # The added codepoints have no advance information, so will get a zero + # advance. + + cmap = get_font_cmap(font) + hmtx = font['hmtx'].metrics + + # We don't expect sequences to be in the glyphOrder, since we removed all the + # single-cp sequences from it and don't expect it to already contain names + # corresponding to multiple-cp sequencess. But just in case, we use + # reverseGlyphMap to avoid duplicating names accidentally. + + updatedGlyphOrder = False + reverseGlyphMap = font.getReverseGlyphMap() + + # Order the glyphs by grouping all the single-codepoint sequences first, + # then order by sequence so that related sequences are together. We group + # by single-codepoint sequence first in order to keep these glyphs together-- + # they're used in the coverage tables for some of the substitutions, and + # those tables can be more compact this way. + for seq in sorted(seqs, key=lambda s: (0 if len(s) == 1 else 1, s)): + name = seq_name(seq) + if len(seq) == 1: + cmap[seq[0]] = name + advance = seq_to_advance.get(seq, 0) + hmtx[name] = [advance, 0] + if name not in reverseGlyphMap: + font.glyphOrder.append(name) + updatedGlyphOrder=True + + if updatedGlyphOrder: + delattr(font, '_reverseGlyphOrderDict') + + +def add_aliases_to_cmap(font, aliases): + """Some aliases might map a single codepoint to some other sequence. These + should map directly to the glyph for that sequence in the cmap. (Others will + map via GSUB). + """ + if not aliases: + return + + cp_aliases = [seq for seq in aliases if len(seq) == 1] + if not cp_aliases: + return + + cmap = get_font_cmap(font) + for src_seq in cp_aliases: + cp = src_seq[0] + name = seq_name(aliases[src_seq]) + cmap[cp] = name + + +def get_rtl_seq(seq): + """Return the rtl variant of the sequence, if it has one, else the empty + sequence. + """ + # Sequences with ZWJ or TAG_END in them will reflect. Fitzpatrick modifiers + # however do not, so if we reflect we make a pass to swap them back into their + # logical order. + + ZWJ = 0x200d + TAG_END = 0xe007f + def is_fitzpatrick(cp): + return 0x1f3fb <= cp <= 0x1f3ff + + if not (ZWJ in seq or TAG_END in seq): + return () + + rev_seq = list(seq) + rev_seq.reverse() + for i in xrange(1, len(rev_seq)): + if is_fitzpatrick(rev_seq[i-1]): + tmp = rev_seq[i] + rev_seq[i] = rev_seq[i-1] + rev_seq[i-1] = tmp + return tuple(rev_seq) + + +def get_gsub_ligature_lookup(font): + """If the font does not have a GSUB table, create one with a ligature + substitution lookup. If it does, ensure the first lookup is a properly + initialized ligature substitution lookup. Return the lookup.""" + + # The template might include more lookups after lookup 0, if it has a + # GSUB table. + if 'GSUB' not in font: + ligature_subst = otTables.LigatureSubst() + ligature_subst.ligatures = {} + + lookup = otTables.Lookup() + lookup.LookupType = 4 + lookup.LookupFlag = 0 + lookup.SubTableCount = 1 + lookup.SubTable = [ligature_subst] + + font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup]) + else: + lookup = font['GSUB'].table.LookupList.Lookup[0] + assert lookup.LookupFlag == 0 + + # importXML doesn't fully init GSUB structures, so help it out + if not hasattr(lookup, 'LookupType'): + st = lookup.SubTable[0] + assert st.LookupType == 4 + setattr(lookup, 'LookupType', 4) + + if not hasattr(st, 'ligatures'): + setattr(st, 'ligatures', {}) + + return lookup + + +def add_ligature_sequences(font, seqs, aliases): + """Add ligature sequences.""" + + seq_to_target_name = { + seq: seq_name(seq) for seq in seqs if len(seq) > 1} + if aliases: + seq_to_target_name.update({ + seq: seq_name(aliases[seq]) for seq in aliases if len(seq) > 1}) + if not seq_to_target_name: + return + + rtl_seq_to_target_name = { + get_rtl_seq(seq): name for seq, name in seq_to_target_name.iteritems()} + seq_to_target_name.update(rtl_seq_to_target_name) + # sequences that don't have rtl variants get mapped to the empty sequence, + # delete it. + if () in seq_to_target_name: + del seq_to_target_name[()] + + # organize by first codepoint in sequence + keyed_ligatures = collections.defaultdict(list) + for t in seq_to_target_name.iteritems(): + first_cp = t[0][0] + keyed_ligatures[first_cp].append(t) + + def add_ligature(lookup, cmap, seq, name): + # The sequences consist of codepoints, but the entries in the ligature table + # are glyph names. Aliasing can give single codepoints names based on + # sequences (e.g. 'guardsman' with 'male guardsman') so we map the + # codepoints through the cmap to get the glyph names. + glyph_names = [cmap[cp] for cp in seq] + + lig = otTables.Ligature() + lig.CompCount = len(seq) + lig.Component = glyph_names[1:] + lig.LigGlyph = name + + ligatures = lookup.SubTable[0].ligatures + first_name = glyph_names[0] + try: + ligatures[first_name].append(lig) + except KeyError: + ligatures[first_name] = [lig] + + lookup = get_gsub_ligature_lookup(font) + cmap = get_font_cmap(font) + for first_cp in sorted(keyed_ligatures): + pairs = keyed_ligatures[first_cp] + + # Sort longest first, this ensures longer sequences with common prefixes + # are handled before shorter ones. The secondary sort is a standard + # sort on the codepoints in the sequence. + pairs.sort(key = lambda pair: (-len(pair[0]), pair[0])) + for seq, name in pairs: + add_ligature(lookup, cmap, seq, name) + + +def update_font_data(font, seq_to_advance, aliases): + """Update the font's cmap, hmtx, GSUB, and GlyphOrder tables.""" + seqs = get_all_seqs(font, seq_to_advance) + add_glyph_data(font, seqs, seq_to_advance) + add_aliases_to_cmap(font, aliases) + add_ligature_sequences(font, seqs, aliases) + + +def apply_aliases(seq_dict, aliases): + """Aliases is a mapping from sequence to replacement sequence. We can use + an alias if the target is a key in the dictionary. Furthermore, if the + source is a key in the dictionary, we can delete it. This updates the + dictionary and returns the usable aliases.""" + usable_aliases = {} + for k, v in aliases.iteritems(): + if v in seq_dict: + usable_aliases[k] = v + if k in seq_dict: + del seq_dict[k] + return usable_aliases + + +def update_ttx(in_file, out_file, image_dirs, prefix, ext, aliases_file): + if ext != '.png': + raise Exception('extension "%s" not supported' % ext) + + seq_to_file = collect_seq_to_file(image_dirs, prefix, ext) + if not seq_to_file: + raise ValueError( + 'no sequences with prefix "%s" and extension "%s" in %s' % ( + prefix, ext, ', '.join(image_dirs))) + + aliases = None + if aliases_file: + aliases = add_aliases.read_emoji_aliases(aliases_file) + aliases = apply_aliases(seq_to_file, aliases) + + font = ttx.TTFont() + font.importXML(in_file) + + lineheight = font['hhea'].ascent - font['hhea'].descent + map_fn = get_png_file_to_advance_mapper(lineheight) + seq_to_advance = remap_values(seq_to_file, map_fn) + + update_font_data(font, seq_to_advance, aliases) + + font.saveXML(out_file) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '-f', '--in_file', help='ttx input file', metavar='file', required=True) + parser.add_argument( + '-o', '--out_file', help='ttx output file', metavar='file', required=True) + parser.add_argument( + '-d', '--image_dirs', help='directories containing image files', + nargs='+', metavar='dir', required=True) + parser.add_argument( + '-p', '--prefix', help='file prefix (default "emoji_u")', + metavar='pfx', default='emoji_u') + parser.add_argument( + '-e', '--ext', help='file extension (default ".png", currently only ' + '".png" is supported', metavar='ext', default='.png') + parser.add_argument( + '-a', '--aliases', help='process alias table', const='emoji_aliases.txt', + nargs='?', metavar='file') + args = parser.parse_args() + + update_ttx( + args.in_file, args.out_file, args.image_dirs, args.prefix, args.ext, + args.aliases) + + +if __name__ == '__main__': + main() diff --git a/emoji_aliases.txt b/emoji_aliases.txt index 40e555324..56a59b299 100644 --- a/emoji_aliases.txt +++ b/emoji_aliases.txt @@ -1,4 +1,6 @@ # alias table +# from;to +# the 'from' sequence should be represented by the image for the 'to' sequence # 'fe0f' is not in these sequences 1f3c3;1f3c3_200d_2642 # RUNNER -> man running 1f3c3_1f3fb;1f3c3_1f3fb_200d_2642 # light skin tone @@ -192,3 +194,10 @@ 26f9_1f3fe;26f9_1f3fe_200d_2642 # medium-dark skin tone 26f9_1f3ff;26f9_1f3ff_200d_2642 # dark skin tone fe82b;unknown_flag # no name -> no name + +# flag aliases +1f1e7_1f1fb;1f1f3_1f1f4 # BV -> NO +1f1e8_1f1f5;1f1eb_1f1f7 # CP -> FR +1f1ed_1f1f2;1f1e6_1f1fa # HM -> AU +1f1f8_1f1ef;1f1f3_1f1f4 # SJ -> NO +1f1fa_1f1f2;1f1fa_1f1f8 # UM -> US diff --git a/third_party/color_emoji/add_glyphs.py b/third_party/color_emoji/add_glyphs.py deleted file mode 100644 index 2bbf41f5a..000000000 --- a/third_party/color_emoji/add_glyphs.py +++ /dev/null @@ -1,282 +0,0 @@ -#!/usr/bin/env python - -import collections, glob, os, re, sys -from fontTools import ttx -from fontTools.ttLib.tables import otTables -from png import PNG - -# PUA character for unknown flag. This avoids the legacy emoji pua values, but -# is in the same area. -UNKNOWN_FLAG_GLYPH_NAME = "uFE82B" - -sys.path.append( - os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) -import add_emoji_gsub - - -def is_vs(cp): - return cp >= 0xfe00 and cp <= 0xfe0f - -def is_fitzpatrick(gname): - cp = int(gname[1:], 16) - return 0x1f3fb <= cp <= 0x1f3ff - -def codes_to_string(codes): - if "_" in codes: - pieces = codes.split ("_") - string = "".join ([unichr (int (code, 16)) for code in pieces]) - else: - try: - string = unichr (int (codes, 16)) - except: - raise ValueError("uh-oh, no unichr for '%s'" % codes) - return string - - -def glyph_sequence(string): - # sequence of names of glyphs that form a ligature - # variation selectors are stripped - return ["u%04X" % ord(char) for char in string if not is_vs(ord(char))] - - -def glyph_name(string): - # name of a ligature - # includes variation selectors when present - return "_".join (["u%04X" % ord (char) for char in string]) - - -def add_ligature (font, seq, name): - if 'GSUB' not in font: - ligature_subst = otTables.LigatureSubst() - ligature_subst.ligatures = {} - - lookup = otTables.Lookup() - lookup.LookupType = 4 - lookup.LookupFlag = 0 - lookup.SubTableCount = 1 - lookup.SubTable = [ligature_subst] - - font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup]) - else: - lookup = font['GSUB'].table.LookupList.Lookup[0] - # assert lookup.LookupType == 4 - assert lookup.LookupFlag == 0 - - # importXML doesn't fully init GSUB structures, so help it out - if not hasattr(lookup, 'LookupType'): - st = lookup.SubTable[0] - assert st.LookupType == 4 - setattr(lookup, 'LookupType', 4) - - if not hasattr(st, 'ligatures'): - setattr(st, 'ligatures', {}) - - ligatures = lookup.SubTable[0].ligatures - - lig = otTables.Ligature() - lig.CompCount = len(seq) - lig.Component = seq[1:] - lig.LigGlyph = name - - first = seq[0] - try: - ligatures[first].append(lig) - except KeyError: - ligatures[first] = [lig] - - -# Ligating sequences for emoji that already have a defined codepoint, -# to match the sequences for the related emoji with no codepoint. -# The key is the name of the glyph with the codepoint, the value is the -# name of the sequence in filename form. -EXTRA_SEQUENCES = { - 'u1F46A': '1F468_200D_1F469_200D_1F466', # MWB - 'u1F491': '1F469_200D_2764_FE0F_200D_1F468', # WHM - 'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM -} - -# Flag aliases - from: to -FLAG_ALIASES = { - 'BV': 'NO', - 'CP': 'FR', - 'HM': 'AU', - 'SJ': 'NO', - 'UM': 'US', -} - -if len (sys.argv) < 4: - print >>sys.stderr, """ -Usage: - -add_glyphs.py font.ttx out-font.ttx strike-prefix... - -This will search for files that have strike-prefix followed by one or more -hex numbers (separated by underscore if more than one), and end in ".png". -For example, if strike-prefix is "icons/u", then files with names like -"icons/u1F4A9.png" or "icons/u1F1EF_1F1F5.png" will be loaded. The script -then adds cmap, htmx, and potentially GSUB entries for the Unicode -characters found. The advance width will be chosen based on image aspect -ratio. If Unicode values outside the BMP are desired, the existing cmap -table should be of the appropriate (format 12) type. Only the first cmap -table and the first GSUB lookup (if existing) are modified. -""" - sys.exit (1) - -in_file = sys.argv[1] -out_file = sys.argv[2] -img_prefixen = sys.argv[3:] -del sys.argv - -font = ttx.TTFont() -font.importXML (in_file) - -img_files = {} -for img_prefix in img_prefixen: - glb = "%s*.png" % img_prefix - print "Looking for images matching '%s'." % glb - for img_file in glob.glob (glb): - codes = img_file[len (img_prefix):-4] - u = codes_to_string(codes) - if u in img_files: - print 'overwriting %s with %s' % (img_files[u], img_file) - img_files[u] = img_file -if not img_files: - raise Exception ("No image files found in '%s'." % glb) - -ascent = font['hhea'].ascent -descent = -font['hhea'].descent - -g = font['GlyphOrder'].glyphOrder -c = font['cmap'].tables[0].cmap -h = font['hmtx'].metrics - -# Sort the characters by length, then codepoint, to keep the order stable -# and avoid adding empty glyphs for multi-character glyphs if any piece is -# also included. -img_pairs = img_files.items () -img_pairs.sort (key=lambda pair: (len (pair[0]), pair[0])) - -glyph_names = set() -ligatures = {} - -def add_lig_sequence(ligatures, seq, n): - # We have emoji sequences using regional indicator symbols, tags, - # ZWJ, fitzpatrick modifiers, and combinations of ZWJ and fitzpatrick - # modifiers. Currently, Harfbuzz special-cases the fitzpatrick - # modifiers to treat them as combining marks instead of as Other - # Neutral, which unicode says they are, and processes them - # in visual order (at least in some circumstances). So to handle - # emoji sequences in an RTL context we need GSUB sequences that match - # this order. - # Regional indicator symbols are LTR, and emoji+fitzpatrick are - # effectively LTR, so we only reorder sequences with ZWJ or tags. If - # however the ZWJ sequence has fitzpatrick modifiers, those need to - # still follow the emoji they logically follow, so simply reversing the - # sequence doesn't work. This code assumes the lig sequence is valid. - tseq = tuple(seq) - if tseq in ligatures: - print 'lig sequence %s, replace %s with %s' % ( - tseq, ligatures[tseq], n) - ligatures[tseq] = n - if 'u200D' in seq or 'uE007F' in seq: - rev_seq = seq[:] - rev_seq.reverse() - for i in xrange(1, len(rev_seq)): - if is_fitzpatrick(rev_seq[i - 1]): - tmp = rev_seq[i] - rev_seq[i] = rev_seq[i-1] - rev_seq[i-1] = tmp - - trseq = tuple(rev_seq) - # if trseq in ligatures: - # print 'rev lig sequence %s, replace %s with %s' % ( - # trseq, ligatures[trseq], n) - ligatures[trseq] = n - - -for (u, filename) in img_pairs: - n = glyph_name (u) - glyph_names.add(n) - # print "Adding glyph for %s" % n - - g.append (n) - for char in u: - cp = ord(char) - if cp not in c and not is_vs(cp): - name = glyph_name (char) - if name not in glyph_names: - g.append(name) - c[cp] = name - if len (u) > 1: - h[name] = [0, 0] - (img_width, img_height) = PNG (filename).get_size () - advance = int (round ((float (ascent+descent) * img_width / img_height))) - h[n] = [advance, 0] - if len (u) > 1: - seq = glyph_sequence(u) - add_lig_sequence(ligatures, seq, n) - -for n in EXTRA_SEQUENCES: - if n in glyph_names: - seq = glyph_sequence(codes_to_string(EXTRA_SEQUENCES[n])) - add_lig_sequence(ligatures, seq, n) - else: - print 'extras: no glyph for %s' % n - -# Add missing regional indicator sequences and flag aliases -# if we support any. -regional_names = frozenset('u%X' % cp for cp in range(0x1F1E6, 0x1F200)) - -def _is_flag_sequence(t): - return len(t) == 2 and t[0] in regional_names and t[1] in regional_names - -have_flags = False -for k in ligatures: - if _is_flag_sequence(k): - have_flags = True - break - -if have_flags and UNKNOWN_FLAG_GLYPH_NAME not in glyph_names: - raise ValueError( - 'Have flags but no unknown flag glyph "%s"' % UNKNOWN_FLAG_GLYPH_NAME) - -# sigh, too many separate files with the same code. -# copied from add_emoji_gsub. -def _reg_indicator(letter): - assert 'A' <= letter <= 'Z' - return 0x1F1E6 + ord(letter) - ord('A') - -def _reg_lig_sequence(flag_name): - """Returns a tuple of strings naming the codepoints that form the ligature.""" - assert len(flag_name) == 2 - return tuple('u%X' % _reg_indicator(cp) for cp in flag_name) - -def _reg_lig_name(flag_name): - """Returns a glyph name for the flag name.""" - return '_'.join(_reg_lig_sequence(flag_name)) - -if have_flags: - print 'Adding flag aliases.' - for flag_from, flag_to in FLAG_ALIASES.iteritems(): - seq = _reg_lig_sequence(flag_from) - name = _reg_lig_name(flag_to) - add_lig_sequence(ligatures, seq, name) - -keyed_ligatures = collections.defaultdict(list) -for k, v in ligatures.iteritems(): - first = k[0] - keyed_ligatures[first].append((k, v)) - -for base in sorted(keyed_ligatures): - pairs = keyed_ligatures[base] - # print 'base %s has %d sequences' % (base, len(pairs)) - - # Sort longest first, this ensures longer sequences with common prefixes - # are handled before shorter ones. It would be better to have multiple - # lookups, most likely. - pairs.sort(key = lambda pair: (len(pair[0]), pair[0]), reverse=True) - for seq, name in pairs: - # print seq, name - add_ligature(font, seq, name) - -font.saveXML (out_file)