wip - update emoji tooling to support tr51 sequences

This commit is contained in:
Doug Felt 2015-11-03 18:40:19 -08:00
parent deff1a6545
commit 0f227e7834
6 changed files with 306 additions and 28 deletions

View file

@ -78,7 +78,8 @@ flag-symlinks: $(WAVED_FLAGS)
$(PNG128_FLAGS): flag-symlinks $(PNG128_FLAGS): flag-symlinks
EMOJI_PNG128 = ./png/128/emoji_u #EMOJI_PNG128 = ./png/128/emoji_u
EMOJI_PNG128 = /tmp/placeholder_emoji_plus/emoji_u
EMOJI_BUILDER = third_party/color_emoji/emoji_builder.py EMOJI_BUILDER = third_party/color_emoji/emoji_builder.py
ADD_GLYPHS = third_party/color_emoji/add_glyphs.py ADD_GLYPHS = third_party/color_emoji/add_glyphs.py

View file

@ -4,6 +4,10 @@
<GlyphOrder> <GlyphOrder>
<!-- The 'id' attribute is only for humans; it is ignored when parsed. --> <!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
<GlyphID id="0" name=".notdef"/> <GlyphID id="0" name=".notdef"/>
<GlyphID id="1" name="null"/>
<GlyphID id="2" name="nonmarkingreturn"/>
<GlyphID id="3" name="space"/>
<GlyphID id="4" name="u200D"/>
</GlyphOrder> </GlyphOrder>
<head> <head>
@ -119,12 +123,19 @@
<hmtx> <hmtx>
<mtx name=".notdef" width="2550" lsb="0"/> <mtx name=".notdef" width="2550" lsb="0"/>
<mtx name="null" width="0" lsb="0"/>
<mtx name="nonmarkingreturn" width="2550" lsb="0"/>
<mtx name="space" width="2550" lsb="0"/>
<mtx name="u200D" width="0" lsb="0"/>
</hmtx> </hmtx>
<cmap> <cmap>
<tableVersion version="0"/> <tableVersion version="0"/>
<cmap_format_12 platformID="3" platEncID="10" language="0" format="12" reserved="0" length="1" nGroups="1"> <cmap_format_12 platformID="3" platEncID="10" language="0" format="12" reserved="0" length="1" nGroups="1">
<map code="0x0" name=".notdef"/><!-- &lt;control> --> <map code="0x0" name=".notdef"/><!-- &lt;control> -->
<map code="0xd" name="nonmarkingreturn"/>
<map code="0x20" name="space"/>
<map code="0x200d" name="u200D"/>
</cmap_format_12> </cmap_format_12>
</cmap> </cmap>

73
build_emoji_set.py Normal file
View file

@ -0,0 +1,73 @@
# delete dst, then:
# copy the placeholders to dst
# then copy the noto images to dst
# then copy the draft images to dst, skipping names with parens and
# after fixing the case of the names
import glob
import os
from os import path
import re
import shutil
DST = "/tmp/placeholder_emoji_plus"
SRC_PLACEHOLDER = "/tmp/placeholder_emoji"
SRC_NOTO = "/usr/local/google/users/dougfelt/newnoto/noto-emoji/png/128"
SRC_DRAFT = "/usr/local/google/home/dougfelt/Downloads/PNG_latest_working_draft"
# First, scan the draft images and select which ones to use. This does
# two things:
# - The download package returns all the images, including previous versions.
# Ensure we use the one with the highest version.
# - The names often mix case. Make sure we have all lower case names.
#
# If something seems amiss, we fail.
UPDATED_NAMES = {}
FIXED_NAMES = {}
VAR_PAT = re.compile(r'(.*?)\((\d+)\)\.png')
for fname in glob.glob(path.join(SRC_DRAFT, '*.png')):
name = path.basename(fname)
m = VAR_PAT.match(name)
if m:
name = '%s.png' % m.group(1).lower()
version = int(m.group(2))
if version > UPDATED_NAMES.get(name, (0, None))[0]:
print 'update %s to version %d' % (name, version)
UPDATED_NAMES[name] = (version, fname)
else:
name = name.lower()
FIXED_NAMES[name] = fname
for name in UPDATED_NAMES:
if name not in FIXED_NAMES:
raise Exception('updated name %s not in names' % name)
fname = UPDATED_NAMES[name][1]
print 'using updated image %s for %s' % (fname, name)
FIXED_NAMES[name] = fname
# Now, recreate the destination directory and copy the data into it.
if path.isdir(DST):
shutil.rmtree(DST)
os.makedirs(DST)
SKIP_PLACEHOLDERS = frozenset([
'emoji_u1f468_200d_1f469_200d_1f466.png',
'emoji_u1f469_200d_2764_fe0f_200d_1f468.png',
'emoji_u1f469_200d_2764_fe0f_200d_1f48b_200d_1f468.png',
])
for fname in glob.glob(path.join(SRC_PLACEHOLDER, '*.png')):
basename = path.basename(fname)
if basename in SKIP_PLACEHOLDERS:
print 'skip %s' % basename
continue
shutil.copy(fname, DST)
for fname in glob.glob(path.join(SRC_NOTO, '*.png')):
shutil.copy(fname, DST)
for name, fname in FIXED_NAMES.iteritems():
shutil.copy(fname, path.join(DST, name))

View file

@ -0,0 +1,95 @@
import os
from os import path
import subprocess
OUTPUT_DIR = '/tmp/placeholder_emoji'
def generate_image(name, text):
print name, text.replace('\n', '_')
subprocess.check_call(
['convert', '-size', '100x100', 'label:%s' % text,
'%s/%s' % (OUTPUT_DIR, name)])
def is_color_patch(cp):
return cp >= 0x1f3fb and cp <= 0x1f3ff
def has_color_patch(values):
for v in values:
if is_color_patch(v):
return True
return False
def regional_to_ascii(cp):
return unichr(ord('A') + cp - 0x1f1e6)
def is_flag_sequence(values):
if len(values) != 2:
return False
for v in values:
v -= 0x1f1e6
if v < 0 or v > 25:
return False
return True
def is_keycap_sequence(values):
return len(values) == 2 and values[1] == 0x20e3
def get_keycap_text(values):
return '-%c-' % unichr(values[0]) # convert gags on '['
char_map = {
0x1f468: 'M',
0x1f469: 'W',
0x1f466: 'B',
0x1f467: 'G',
0x2764: 'H', # heavy black heart, no var sel
0x1f48b: 'K', # kiss mark
0x200D: '-', # zwj placeholder
0xfe0f: '-', # variation selector placeholder
0x1f441: 'I', # Eye
0x1f5e8: 'W', # 'witness' (left speech bubble)
}
def get_combining_text(values):
chars = []
for v in values:
char = char_map.get(v, None)
if not char:
return None
if char != '-':
chars.append(char)
return ''.join(chars)
if not path.isdir(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
with open('sequences.txt', 'r') as f:
for seq in f:
seq = seq.strip()
text = None
values = [int(code, 16) for code in seq.split('_')]
if len(values) == 1:
val = values[0]
text = '%04X' % val # ensure upper case format
elif is_flag_sequence(values):
text = ''.join(regional_to_ascii(cp) for cp in values)
elif has_color_patch(values):
print 'skipping color patch sequence %s' % seq
elif is_keycap_sequence(values):
text = get_keycap_text(values)
else:
text = get_combining_text(values)
if not text:
print 'missing %s' % seq
if text:
if len(text) > 3:
if len(text) == 4:
hi = text[:2]
lo = text[2:]
else:
hi = text[:-3]
lo = text[-3:]
text = '%s\n%s' % (hi, lo)
generate_image('emoji_u%s.png' % seq, text)

View file

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
import glob, os, sys import collections, glob, os, sys
from fontTools import ttx from fontTools import ttx
from fontTools.ttLib.tables import otTables from fontTools.ttLib.tables import otTables
from png import PNG from png import PNG
@ -10,11 +10,31 @@ sys.path.append(
import add_emoji_gsub import add_emoji_gsub
def is_vs(cp):
return cp >= 0xfe00 and cp <= 0xfe0f
def codes_to_string(codes):
if "_" in codes:
pieces = codes.split ("_")
string = "".join ([unichr (int (code, 16)) for code in pieces])
else:
string = unichr (int (codes, 16))
return string
def glyph_sequence(string):
# sequence of names of glyphs that form a ligature
# variation selectors are stripped
return ["u%04X" % ord(char) for char in string if not is_vs(ord(char))]
def glyph_name(string): def glyph_name(string):
# name of a ligature
# includes variation selectors when present
return "_".join (["u%04X" % ord (char) for char in string]) return "_".join (["u%04X" % ord (char) for char in string])
def add_ligature (font, string): def add_ligature (font, seq, name):
if 'GSUB' not in font: if 'GSUB' not in font:
ligature_subst = otTables.LigatureSubst() ligature_subst = otTables.LigatureSubst()
ligature_subst.ligatures = {} ligature_subst.ligatures = {}
@ -34,17 +54,27 @@ def add_ligature (font, string):
ligatures = lookup.SubTable[0].ligatures ligatures = lookup.SubTable[0].ligatures
lig = otTables.Ligature() lig = otTables.Ligature()
lig.CompCount = len(string) lig.CompCount = len(seq)
lig.Component = [glyph_name(ch) for ch in string[1:]] lig.Component = seq[1:]
lig.LigGlyph = glyph_name(string) lig.LigGlyph = name
first = glyph_name(string[0]) first = seq[0]
try: try:
ligatures[first].append(lig) ligatures[first].append(lig)
except KeyError: except KeyError:
ligatures[first] = [lig] ligatures[first] = [lig]
# Ligating sequences for emoji that already have a defined codepoint,
# to match the sequences for the related emoji with no codepoint.
# The key is the name of the glyph with the codepoint, the value is the
# name of the sequence in filename form.
EXTRA_SEQUENCES = {
'u1F46A': '1F468_200D_1F469_200D_1F466', # MWB
'u1F491': '1F469_200D_2764_FE0F_200D_1F468', # WHM
'u1F48F': '1F469_200D_2764_FE0F_200D_1F48B_200D_1F468', # WHKM
}
if len (sys.argv) < 4: if len (sys.argv) < 4:
print >>sys.stderr, """ print >>sys.stderr, """
Usage: Usage:
@ -65,23 +95,22 @@ table and the first GSUB lookup (if existing) are modified.
in_file = sys.argv[1] in_file = sys.argv[1]
out_file = sys.argv[2] out_file = sys.argv[2]
img_prefix = sys.argv[3] img_prefixen = sys.argv[3:]
del sys.argv del sys.argv
font = ttx.TTFont() font = ttx.TTFont()
font.importXML (in_file) font.importXML (in_file)
img_files = {} img_files = {}
glb = "%s*.png" % img_prefix for img_prefix in img_prefixen:
print "Looking for images matching '%s'." % glb glb = "%s*.png" % img_prefix
for img_file in glob.glob (glb): print "Looking for images matching '%s'." % glb
codes = img_file[len (img_prefix):-4] for img_file in glob.glob (glb):
if "_" in codes: codes = img_file[len (img_prefix):-4]
pieces = codes.split ("_") u = codes_to_string(codes)
u = "".join ([unichr (int (code, 16)) for code in pieces]) if u in img_files:
else: print 'overwriting %s with %s' % (img_files[u], imag_file)
u = unichr (int (codes, 16)) img_files[u] = img_file
img_files[u] = img_file
if not img_files: if not img_files:
raise Exception ("No image files found in '%s'." % glb) raise Exception ("No image files found in '%s'." % glb)
@ -98,20 +127,71 @@ h = font['hmtx'].metrics
img_pairs = img_files.items () img_pairs = img_files.items ()
img_pairs.sort (key=lambda pair: (len (pair[0]), pair[0])) img_pairs.sort (key=lambda pair: (len (pair[0]), pair[0]))
glyph_names = set()
ligatures = {}
def add_lig_sequence(ligatures, seq, n):
# Assume sequences with ZWJ are emoji 'ligatures' and rtl order
# is also valid. Internal permutations, though, no.
# We associate a sequence with a filename. We can overwrite the
# sequence with a different filename later.
tseq = tuple(seq)
if tseq in ligatures:
print 'lig sequence %s, replace %s with %s' % (
tseq, ligatures[tseq], n)
ligatures[tseq] = n
if 'u200D' in seq:
rev_seq = seq[:]
rev_seq.reverse()
trseq = tuple(rev_seq)
if trseq in ligatures:
print 'rev lig sequence %s, replace %s with %s' % (
trseq, ligatures[trseq], n)
ligatures[trseq] = n
for (u, filename) in img_pairs: for (u, filename) in img_pairs:
print "Adding glyph for U+%s" % ",".join (["%04X" % ord (char) for char in u]) print "Adding glyph for U+%s" % ",".join (["%04X" % ord (char) for char in u])
n = glyph_name (u) n = glyph_name (u)
glyph_names.add(n)
g.append (n) g.append (n)
for char in u: for char in u:
if char not in c: cp = ord(char)
if cp not in c and not is_vs(cp):
name = glyph_name (char) name = glyph_name (char)
c[ord (char)] = name c[cp] = name
if len (u) > 1: if len (u) > 1:
h[name] = [0, 0] h[name] = [0, 0]
(img_width, img_height) = PNG (filename).get_size () (img_width, img_height) = PNG (filename).get_size ()
advance = int (round ((float (ascent+descent) * img_width / img_height))) advance = int (round ((float (ascent+descent) * img_width / img_height)))
h[n] = [advance, 0] h[n] = [advance, 0]
if len (u) > 1: if len (u) > 1:
add_ligature (font, u) seq = glyph_sequence(u)
add_lig_sequence(ligatures, seq, n)
for n in EXTRA_SEQUENCES:
if n in glyph_names:
seq = glyph_sequence(codes_to_string(EXTRA_SEQUENCES[n]))
add_lig_sequence(ligatures, seq, n)
else:
print 'extras: no glyph for %s' % n
keyed_ligatures = collections.defaultdict(list)
for k, v in ligatures.iteritems():
first = k[0]
keyed_ligatures[first].append((k, v))
for base in sorted(keyed_ligatures):
pairs = keyed_ligatures[base]
print 'base %s has %d sequences' % (base, len(pairs))
# Sort longest first, this ensures longer sequences with common prefixes
# are handled before shorter ones. It would be better to have multiple
# lookups, most likely.
pairs.sort(key = lambda pair: (len(pair[0]), pair[0]), reverse=True)
for seq, name in pairs:
print seq, name
add_ligature(font, seq, name)
font.saveXML (out_file) font.saveXML (out_file)

View file

@ -20,7 +20,8 @@
import sys, struct, StringIO import sys, struct, StringIO
from png import PNG from png import PNG
import os
from os import path
def get_glyph_name_from_gsub (string, font, cmap_dict): def get_glyph_name_from_gsub (string, font, cmap_dict):
ligatures = font['GSUB'].table.LookupList.Lookup[0].SubTable[0].ligatures ligatures = font['GSUB'].table.LookupList.Lookup[0].SubTable[0].ligatures
@ -83,6 +84,7 @@ class CBDT:
write_func = self.image_write_func (image_format) write_func = self.image_write_func (image_format)
for glyph in glyphs: for glyph in glyphs:
img_file = glyph_filenames[glyph] img_file = glyph_filenames[glyph]
print 'writing data for glyph %s' % path.basename(img_file)
offset = self.tell () offset = self.tell ()
write_func (PNG (img_file)) write_func (PNG (img_file))
self.glyph_maps.append (GlyphMap (glyph, offset, image_format)) self.glyph_maps.append (GlyphMap (glyph, offset, image_format))
@ -108,6 +110,7 @@ class CBDT:
line_ascent = ascent * y_ppem / float (upem) line_ascent = ascent * y_ppem / float (upem)
y_bearing = int (round (line_ascent - .5 * (line_height - height))) y_bearing = int (round (line_ascent - .5 * (line_height - height)))
advance = width advance = width
print "small glyph metrics h: %d w: %d a: %d" % (height, width, advance)
# smallGlyphMetrics # smallGlyphMetrics
# Type Name # Type Name
# BYTE height # BYTE height
@ -115,10 +118,14 @@ class CBDT:
# CHAR BearingX # CHAR BearingX
# CHAR BearingY # CHAR BearingY
# BYTE Advance # BYTE Advance
self.write (struct.pack ("BBbbB", try:
self.write (struct.pack ("BBbbB",
height, width, height, width,
x_bearing, y_bearing, x_bearing, y_bearing,
advance)) advance))
except:
raise ValueError("h: %d w: %d a: %d x: %d y: 5d" % (
height, width, advance, x_braring, y_bearing))
def write_format1 (self, png): def write_format1 (self, png):
@ -437,8 +444,10 @@ By default they are dropped.
eblc.write_header () eblc.write_header ()
eblc.start_strikes (len (img_prefixes)) eblc.start_strikes (len (img_prefixes))
for img_prefix in img_prefixes: def is_vs(cp):
return cp >= 0xfe00 and cp <= 0xfe0f
for img_prefix in img_prefixes:
print print
img_files = {} img_files = {}
@ -448,9 +457,14 @@ By default they are dropped.
codes = img_file[len (img_prefix):-4] codes = img_file[len (img_prefix):-4]
if "_" in codes: if "_" in codes:
pieces = codes.split ("_") pieces = codes.split ("_")
uchars = "".join ([unichr (int (code, 16)) for code in pieces]) cps = [int(code, 16) for code in pieces]
uchars = "".join ([unichr(cp) for cp in cps if not is_vs(cp)])
else: else:
uchars = unichr (int (codes, 16)) cp = int(codes, 16)
if is_vs(cp):
print "ignoring unexpected vs input %04x" % cp
continue
uchars = unichr(cp)
img_files[uchars] = img_file img_files[uchars] = img_file
if not img_files: if not img_files:
raise Exception ("No image files found in '%s'." % glb) raise Exception ("No image files found in '%s'." % glb)
@ -460,7 +474,11 @@ By default they are dropped.
advance = width = height = 0 advance = width = height = 0
for uchars, img_file in img_files.items (): for uchars, img_file in img_files.items ():
if len (uchars) == 1: if len (uchars) == 1:
glyph_name = unicode_cmap.cmap[ord (uchars)] try:
glyph_name = unicode_cmap.cmap[ord (uchars)]
except:
print "no cmap entry for %x" % ord(uchars)
raise ValueError("%x" % ord(uchars))
else: else:
glyph_name = get_glyph_name_from_gsub (uchars, font, unicode_cmap.cmap) glyph_name = get_glyph_name_from_gsub (uchars, font, unicode_cmap.cmap)
glyph_id = font.getGlyphID (glyph_name) glyph_id = font.getGlyphID (glyph_name)
@ -476,7 +494,7 @@ By default they are dropped.
glyphs = sorted (glyph_imgs.keys ()) glyphs = sorted (glyph_imgs.keys ())
if not glyphs: if not glyphs:
raise Exception ("No common characteres found between font and '%s'." % glb) raise Exception ("No common characters found between font and '%s'." % glb)
print "Embedding images for %d glyphs for this strike." % len (glyphs) print "Embedding images for %d glyphs for this strike." % len (glyphs)
advance, width, height = (div (x, len (glyphs)) for x in (advance, width, height)) advance, width, height = (div (x, len (glyphs)) for x in (advance, width, height))