From 0f227e78349a5cdd7122e4d1aeb15912edf9ba6f Mon Sep 17 00:00:00 2001 From: Doug Felt Date: Tue, 3 Nov 2015 18:40:19 -0800 Subject: [PATCH] wip - update emoji tooling to support tr51 sequences --- Makefile | 3 +- NotoColorEmoji.tmpl.ttx.tmpl | 11 +++ build_emoji_set.py | 73 ++++++++++++++ generate_emoji_placeholders.py | 95 ++++++++++++++++++ third_party/color_emoji/add_glyphs.py | 120 +++++++++++++++++++---- third_party/color_emoji/emoji_builder.py | 32 ++++-- 6 files changed, 306 insertions(+), 28 deletions(-) create mode 100644 build_emoji_set.py create mode 100644 generate_emoji_placeholders.py diff --git a/Makefile b/Makefile index 9217cc4e2..74cd544c2 100644 --- a/Makefile +++ b/Makefile @@ -78,7 +78,8 @@ flag-symlinks: $(WAVED_FLAGS) $(PNG128_FLAGS): flag-symlinks -EMOJI_PNG128 = ./png/128/emoji_u +#EMOJI_PNG128 = ./png/128/emoji_u +EMOJI_PNG128 = /tmp/placeholder_emoji_plus/emoji_u EMOJI_BUILDER = third_party/color_emoji/emoji_builder.py ADD_GLYPHS = third_party/color_emoji/add_glyphs.py diff --git a/NotoColorEmoji.tmpl.ttx.tmpl b/NotoColorEmoji.tmpl.ttx.tmpl index 64461de00..f982d063c 100644 --- a/NotoColorEmoji.tmpl.ttx.tmpl +++ b/NotoColorEmoji.tmpl.ttx.tmpl @@ -4,6 +4,10 @@ + + + + @@ -119,12 +123,19 @@ + + + + + + + diff --git a/build_emoji_set.py b/build_emoji_set.py new file mode 100644 index 000000000..3f0708bfe --- /dev/null +++ b/build_emoji_set.py @@ -0,0 +1,73 @@ +# delete dst, then: +# copy the placeholders to dst +# then copy the noto images to dst +# then copy the draft images to dst, skipping names with parens and +# after fixing the case of the names + +import glob +import os +from os import path +import re +import shutil + +DST = "/tmp/placeholder_emoji_plus" + +SRC_PLACEHOLDER = "/tmp/placeholder_emoji" +SRC_NOTO = "/usr/local/google/users/dougfelt/newnoto/noto-emoji/png/128" +SRC_DRAFT = "/usr/local/google/home/dougfelt/Downloads/PNG_latest_working_draft" + +# First, scan the draft images and select which ones to use. This does +# two things: +# - The download package returns all the images, including previous versions. +# Ensure we use the one with the highest version. +# - The names often mix case. Make sure we have all lower case names. +# +# If something seems amiss, we fail. + +UPDATED_NAMES = {} +FIXED_NAMES = {} +VAR_PAT = re.compile(r'(.*?)\((\d+)\)\.png') +for fname in glob.glob(path.join(SRC_DRAFT, '*.png')): + name = path.basename(fname) + m = VAR_PAT.match(name) + if m: + name = '%s.png' % m.group(1).lower() + version = int(m.group(2)) + if version > UPDATED_NAMES.get(name, (0, None))[0]: + print 'update %s to version %d' % (name, version) + UPDATED_NAMES[name] = (version, fname) + else: + name = name.lower() + FIXED_NAMES[name] = fname + +for name in UPDATED_NAMES: + if name not in FIXED_NAMES: + raise Exception('updated name %s not in names' % name) + fname = UPDATED_NAMES[name][1] + print 'using updated image %s for %s' % (fname, name) + FIXED_NAMES[name] = fname + +# Now, recreate the destination directory and copy the data into it. + +if path.isdir(DST): + shutil.rmtree(DST) +os.makedirs(DST) + +SKIP_PLACEHOLDERS = frozenset([ + 'emoji_u1f468_200d_1f469_200d_1f466.png', + 'emoji_u1f469_200d_2764_fe0f_200d_1f468.png', + 'emoji_u1f469_200d_2764_fe0f_200d_1f48b_200d_1f468.png', +]) + +for fname in glob.glob(path.join(SRC_PLACEHOLDER, '*.png')): + basename = path.basename(fname) + if basename in SKIP_PLACEHOLDERS: + print 'skip %s' % basename + continue + shutil.copy(fname, DST) + +for fname in glob.glob(path.join(SRC_NOTO, '*.png')): + shutil.copy(fname, DST) + +for name, fname in FIXED_NAMES.iteritems(): + shutil.copy(fname, path.join(DST, name)) diff --git a/generate_emoji_placeholders.py b/generate_emoji_placeholders.py new file mode 100644 index 000000000..48b508231 --- /dev/null +++ b/generate_emoji_placeholders.py @@ -0,0 +1,95 @@ +import os +from os import path +import subprocess + +OUTPUT_DIR = '/tmp/placeholder_emoji' + +def generate_image(name, text): + print name, text.replace('\n', '_') + subprocess.check_call( + ['convert', '-size', '100x100', 'label:%s' % text, + '%s/%s' % (OUTPUT_DIR, name)]) + +def is_color_patch(cp): + return cp >= 0x1f3fb and cp <= 0x1f3ff + +def has_color_patch(values): + for v in values: + if is_color_patch(v): + return True + return False + +def regional_to_ascii(cp): + return unichr(ord('A') + cp - 0x1f1e6) + +def is_flag_sequence(values): + if len(values) != 2: + return False + for v in values: + v -= 0x1f1e6 + if v < 0 or v > 25: + return False + return True + +def is_keycap_sequence(values): + return len(values) == 2 and values[1] == 0x20e3 + +def get_keycap_text(values): + return '-%c-' % unichr(values[0]) # convert gags on '[' + +char_map = { + 0x1f468: 'M', + 0x1f469: 'W', + 0x1f466: 'B', + 0x1f467: 'G', + 0x2764: 'H', # heavy black heart, no var sel + 0x1f48b: 'K', # kiss mark + 0x200D: '-', # zwj placeholder + 0xfe0f: '-', # variation selector placeholder + 0x1f441: 'I', # Eye + 0x1f5e8: 'W', # 'witness' (left speech bubble) +} + +def get_combining_text(values): + chars = [] + for v in values: + char = char_map.get(v, None) + if not char: + return None + if char != '-': + chars.append(char) + return ''.join(chars) + + +if not path.isdir(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) + +with open('sequences.txt', 'r') as f: + for seq in f: + seq = seq.strip() + text = None + values = [int(code, 16) for code in seq.split('_')] + if len(values) == 1: + val = values[0] + text = '%04X' % val # ensure upper case format + elif is_flag_sequence(values): + text = ''.join(regional_to_ascii(cp) for cp in values) + elif has_color_patch(values): + print 'skipping color patch sequence %s' % seq + elif is_keycap_sequence(values): + text = get_keycap_text(values) + else: + text = get_combining_text(values) + if not text: + print 'missing %s' % seq + + if text: + if len(text) > 3: + if len(text) == 4: + hi = text[:2] + lo = text[2:] + else: + hi = text[:-3] + lo = text[-3:] + text = '%s\n%s' % (hi, lo) + generate_image('emoji_u%s.png' % seq, text) diff --git a/third_party/color_emoji/add_glyphs.py b/third_party/color_emoji/add_glyphs.py index a8d986547..bea880064 100644 --- a/third_party/color_emoji/add_glyphs.py +++ b/third_party/color_emoji/add_glyphs.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -import glob, os, sys +import collections, glob, os, sys from fontTools import ttx from fontTools.ttLib.tables import otTables from png import PNG @@ -10,11 +10,31 @@ sys.path.append( import add_emoji_gsub +def is_vs(cp): + return cp >= 0xfe00 and cp <= 0xfe0f + +def codes_to_string(codes): + if "_" in codes: + pieces = codes.split ("_") + string = "".join ([unichr (int (code, 16)) for code in pieces]) + else: + string = unichr (int (codes, 16)) + return string + + +def glyph_sequence(string): + # sequence of names of glyphs that form a ligature + # variation selectors are stripped + return ["u%04X" % ord(char) for char in string if not is_vs(ord(char))] + + def glyph_name(string): + # name of a ligature + # includes variation selectors when present return "_".join (["u%04X" % ord (char) for char in string]) -def add_ligature (font, string): +def add_ligature (font, seq, name): if 'GSUB' not in font: ligature_subst = otTables.LigatureSubst() ligature_subst.ligatures = {} @@ -34,17 +54,27 @@ def add_ligature (font, string): ligatures = lookup.SubTable[0].ligatures lig = otTables.Ligature() - lig.CompCount = len(string) - lig.Component = [glyph_name(ch) for ch in string[1:]] - lig.LigGlyph = glyph_name(string) + lig.CompCount = len(seq) + lig.Component = seq[1:] + lig.LigGlyph = name - first = glyph_name(string[0]) + first = seq[0] try: ligatures[first].append(lig) except KeyError: ligatures[first] = [lig] +# Ligating sequences for emoji that already have a defined codepoint, +# to match the sequences for the related emoji with no codepoint. +# The key is the name of the glyph with the codepoint, the value is the +# name of the sequence in filename form. +EXTRA_SEQUENCES = { + 'u1F46A': '1F468_200D_1F469_200D_1F466', # MWB + 'u1F491': '1F469_200D_2764_FE0F_200D_1F468', # WHM + 'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM +} + if len (sys.argv) < 4: print >>sys.stderr, """ Usage: @@ -65,23 +95,22 @@ table and the first GSUB lookup (if existing) are modified. in_file = sys.argv[1] out_file = sys.argv[2] -img_prefix = sys.argv[3] +img_prefixen = sys.argv[3:] del sys.argv font = ttx.TTFont() font.importXML (in_file) img_files = {} -glb = "%s*.png" % img_prefix -print "Looking for images matching '%s'." % glb -for img_file in glob.glob (glb): - codes = img_file[len (img_prefix):-4] - if "_" in codes: - pieces = codes.split ("_") - u = "".join ([unichr (int (code, 16)) for code in pieces]) - else: - u = unichr (int (codes, 16)) - img_files[u] = img_file +for img_prefix in img_prefixen: + glb = "%s*.png" % img_prefix + print "Looking for images matching '%s'." % glb + for img_file in glob.glob (glb): + codes = img_file[len (img_prefix):-4] + u = codes_to_string(codes) + if u in img_files: + print 'overwriting %s with %s' % (img_files[u], imag_file) + img_files[u] = img_file if not img_files: raise Exception ("No image files found in '%s'." % glb) @@ -98,20 +127,71 @@ h = font['hmtx'].metrics img_pairs = img_files.items () img_pairs.sort (key=lambda pair: (len (pair[0]), pair[0])) +glyph_names = set() +ligatures = {} + +def add_lig_sequence(ligatures, seq, n): + # Assume sequences with ZWJ are emoji 'ligatures' and rtl order + # is also valid. Internal permutations, though, no. + # We associate a sequence with a filename. We can overwrite the + # sequence with a different filename later. + tseq = tuple(seq) + if tseq in ligatures: + print 'lig sequence %s, replace %s with %s' % ( + tseq, ligatures[tseq], n) + ligatures[tseq] = n + if 'u200D' in seq: + rev_seq = seq[:] + rev_seq.reverse() + trseq = tuple(rev_seq) + if trseq in ligatures: + print 'rev lig sequence %s, replace %s with %s' % ( + trseq, ligatures[trseq], n) + ligatures[trseq] = n + + for (u, filename) in img_pairs: print "Adding glyph for U+%s" % ",".join (["%04X" % ord (char) for char in u]) n = glyph_name (u) + glyph_names.add(n) + g.append (n) for char in u: - if char not in c: + cp = ord(char) + if cp not in c and not is_vs(cp): name = glyph_name (char) - c[ord (char)] = name + c[cp] = name if len (u) > 1: h[name] = [0, 0] (img_width, img_height) = PNG (filename).get_size () advance = int (round ((float (ascent+descent) * img_width / img_height))) h[n] = [advance, 0] if len (u) > 1: - add_ligature (font, u) + seq = glyph_sequence(u) + add_lig_sequence(ligatures, seq, n) + +for n in EXTRA_SEQUENCES: + if n in glyph_names: + seq = glyph_sequence(codes_to_string(EXTRA_SEQUENCES[n])) + add_lig_sequence(ligatures, seq, n) + else: + print 'extras: no glyph for %s' % n + + +keyed_ligatures = collections.defaultdict(list) +for k, v in ligatures.iteritems(): + first = k[0] + keyed_ligatures[first].append((k, v)) + +for base in sorted(keyed_ligatures): + pairs = keyed_ligatures[base] + print 'base %s has %d sequences' % (base, len(pairs)) + # Sort longest first, this ensures longer sequences with common prefixes + # are handled before shorter ones. It would be better to have multiple + # lookups, most likely. + pairs.sort(key = lambda pair: (len(pair[0]), pair[0]), reverse=True) + for seq, name in pairs: + print seq, name + add_ligature(font, seq, name) font.saveXML (out_file) diff --git a/third_party/color_emoji/emoji_builder.py b/third_party/color_emoji/emoji_builder.py index 5a4e646fe..844102ab5 100644 --- a/third_party/color_emoji/emoji_builder.py +++ b/third_party/color_emoji/emoji_builder.py @@ -20,7 +20,8 @@ import sys, struct, StringIO from png import PNG - +import os +from os import path def get_glyph_name_from_gsub (string, font, cmap_dict): ligatures = font['GSUB'].table.LookupList.Lookup[0].SubTable[0].ligatures @@ -83,6 +84,7 @@ class CBDT: write_func = self.image_write_func (image_format) for glyph in glyphs: img_file = glyph_filenames[glyph] + print 'writing data for glyph %s' % path.basename(img_file) offset = self.tell () write_func (PNG (img_file)) self.glyph_maps.append (GlyphMap (glyph, offset, image_format)) @@ -108,6 +110,7 @@ class CBDT: line_ascent = ascent * y_ppem / float (upem) y_bearing = int (round (line_ascent - .5 * (line_height - height))) advance = width + print "small glyph metrics h: %d w: %d a: %d" % (height, width, advance) # smallGlyphMetrics # Type Name # BYTE height @@ -115,10 +118,14 @@ class CBDT: # CHAR BearingX # CHAR BearingY # BYTE Advance - self.write (struct.pack ("BBbbB", + try: + self.write (struct.pack ("BBbbB", height, width, x_bearing, y_bearing, advance)) + except: + raise ValueError("h: %d w: %d a: %d x: %d y: 5d" % ( + height, width, advance, x_braring, y_bearing)) def write_format1 (self, png): @@ -437,8 +444,10 @@ By default they are dropped. eblc.write_header () eblc.start_strikes (len (img_prefixes)) - for img_prefix in img_prefixes: + def is_vs(cp): + return cp >= 0xfe00 and cp <= 0xfe0f + for img_prefix in img_prefixes: print img_files = {} @@ -448,9 +457,14 @@ By default they are dropped. codes = img_file[len (img_prefix):-4] if "_" in codes: pieces = codes.split ("_") - uchars = "".join ([unichr (int (code, 16)) for code in pieces]) + cps = [int(code, 16) for code in pieces] + uchars = "".join ([unichr(cp) for cp in cps if not is_vs(cp)]) else: - uchars = unichr (int (codes, 16)) + cp = int(codes, 16) + if is_vs(cp): + print "ignoring unexpected vs input %04x" % cp + continue + uchars = unichr(cp) img_files[uchars] = img_file if not img_files: raise Exception ("No image files found in '%s'." % glb) @@ -460,7 +474,11 @@ By default they are dropped. advance = width = height = 0 for uchars, img_file in img_files.items (): if len (uchars) == 1: - glyph_name = unicode_cmap.cmap[ord (uchars)] + try: + glyph_name = unicode_cmap.cmap[ord (uchars)] + except: + print "no cmap entry for %x" % ord(uchars) + raise ValueError("%x" % ord(uchars)) else: glyph_name = get_glyph_name_from_gsub (uchars, font, unicode_cmap.cmap) glyph_id = font.getGlyphID (glyph_name) @@ -476,7 +494,7 @@ By default they are dropped. glyphs = sorted (glyph_imgs.keys ()) if not glyphs: - raise Exception ("No common characteres found between font and '%s'." % glb) + raise Exception ("No common characters found between font and '%s'." % glb) print "Embedding images for %d glyphs for this strike." % len (glyphs) advance, width, height = (div (x, len (glyphs)) for x in (advance, width, height))