Merge pull request #83 from C1710/emoji13-1

Emoji 13.1
This commit is contained in:
Constantin A 2021-07-17 10:49:12 +02:00 committed by GitHub
commit 658550b0f6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9366 changed files with 51556 additions and 16548 deletions

197
Blobmoji.gpl Normal file
View file

@ -0,0 +1,197 @@
GIMP Palette
Name: Blobmoji
Columns: 10
#
255 235 238 red-50
255 205 210 red-100
239 154 154 red-200
229 115 115 red-300
227 95 92 red-400
219 68 55 red-500
211 48 44 red-600
198 42 42 red-700
178 36 36 red-800
155 24 24 red-900
252 228 236 pink-50
249 195 213 pink-100
244 164 191 pink-200
241 147 179 pink-300
235 122 164 pink-400
215 89 139 pink-500
209 71 122 pink-600
202 47 109 pink-700
179 35 98 pink-800
149 24 91 pink-900
243 229 245 purple-50
225 190 231 purple-100
202 152 211 purple-200
181 111 193 purple-300
165 79 180 purple-400
149 49 166 purple-500
136 46 160 purple-600
119 41 152 purple-700
104 37 144 purple-800
75 29 131 purple-900
237 231 246 indigo-50
197 202 233 indigo-100
160 168 217 indigo-200
120 133 204 indigo-300
92 106 192 indigo-400
72 87 172 indigo-500
66 79 162 indigo-600
56 69 151 indigo-700
48 59 139 indigo-800
33 41 119 indigo-900
232 240 246 blue-50
173 206 230 blue-100
145 188 219 blue-200
82 156 210 blue-300
46 130 189 blue-400
25 108 162 blue-500
28 97 146 blue-600
24 89 135 blue-700
19 81 124 blue-800
12 66 104 blue-900
225 245 254 light-blue-50
186 227 245 light-blue-100
148 212 241 light-blue-200
129 205 238 light-blue-300
98 192 234 light-blue-400
66 173 231 light-blue-500
30 157 219 light-blue-600
29 140 200 light-blue-700
25 124 183 light-blue-800
12 94 159 light-blue-900
225 250 254 sky-blue-50
186 235 245 sky-blue-100
148 224 241 sky-blue-200
129 215 238 sky-blue-300
98 205 234 sky-blue-400
64 192 231 sky-blue-500
30 175 219 sky-blue-600
29 163 200 sky-blue-700
25 149 183 sky-blue-800
12 127 159 sky-blue-900
220 246 245 teal-50
177 238 232 teal-100
93 221 209 teal-200
29 206 189 teal-300
27 187 172 teal-400
19 171 150 teal-500
11 138 125 teal-600
4 126 112 teal-700
4 107 94 teal-800
0 77 64 teal-900
241 248 233 light-green-50
216 235 194 light-green-100
188 221 152 light-green-200
164 208 113 light-green-300
142 197 79 light-green-400
124 179 66 light-green-500
111 160 59 light-green-600
91 140 49 light-green-700
72 118 40 light-green-800
40 83 24 light-green-900
248 249 233 lime-50
237 240 199 lime-100
225 232 162 lime-200
213 222 126 lime-300
204 215 97 lime-400
189 207 70 lime-500
182 191 62 lime-600
165 169 54 lime-700
149 148 45 lime-800
122 113 31 lime-900
255 253 231 yellow-50
255 250 209 yellow-100
255 247 179 yellow-200
255 244 143 yellow-300
255 240 112 yellow-400
255 238 88 yellow-500
253 221 78 yellow-600
251 199 70 yellow-700
250 178 61 yellow-800
246 142 50 yellow-900
255 248 225 amber-50
255 236 179 amber-100
255 224 130 amber-200
255 213 79 amber-300
255 202 40 amber-400
252 194 27 amber-500
255 179 0 amber-600
255 160 0 amber-700
255 143 0 amber-800
255 111 0 amber-900
255 243 224 dark-amber-50
255 224 178 dark-amber-100
255 204 128 dark-amber-200
255 183 77 dark-amber-300
255 167 38 dark-amber-400
247 147 41 dark-amber-500
247 140 4 dark-amber-600
241 124 4 dark-amber-700
239 108 0 dark-amber-800
230 81 0 dark-amber-900
251 236 231 orange-50
255 206 188 orange-100
255 176 145 orange-200
255 147 101 orange-300
242 141 94 orange-400
237 108 48 orange-500
231 102 43 orange-600
230 86 25 orange-700
216 76 21 orange-800
191 63 12 orange-900
250 250 250 grey-50
245 245 245 grey-100
238 238 238 grey-200
224 224 224 grey-300
189 189 189 grey-400
158 158 158 grey-500
117 117 117 grey-600
97 97 97 grey-700
66 66 66 grey-800
33 33 33 grey-900
236 240 241 blue-grey-50
203 219 224 blue-grey-100
175 199 207 blue-grey-200
147 177 187 blue-grey-300
134 168 180 blue-grey-400
101 147 162 blue-grey-500
90 129 142 blue-grey-600
75 107 117 blue-grey-700
62 82 89 blue-grey-800
43 59 64 blue-grey-900
239 235 233 brown-50
215 203 200 brown-100
188 169 164 brown-200
161 134 127 brown-300
141 107 99 brown-400
133 92 82 brown-500
109 74 65 brown-600
93 64 55 brown-700
78 52 46 brown-800
62 39 35 brown-900
# Skin tones
250 220 188 light-skin
224 187 149 medium-light-skin
191 143 104 medium-skin
155 100 60 medium-dark-skin
99 67 52 dark-skin
# Skin tones (hair)
49 45 45 light-hair
191 160 85 medium-light-hair
109 76 65 medium-hair
71 53 45 medium-dark-hair
35 32 32 dark-hair
249 224 115 blond-hair
# Skin tones (shadow/nose)
219 166 137 light-shadow
196 142 106 medium-light-shadow
153 103 79 medium-shadow
122 76 50 medium-dark-shadow
86 62 55 dark-shadow

3495
CHANGED.md Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,3 +1,5 @@
_This file will not be updated anymore. Please look at `CHANGED.md` instead_
Changes from Google Noto Color Emoji v2017-05-18 (approx.) Changes from Google Noto Color Emoji v2017-05-18 (approx.)
======= =======

View file

@ -1,22 +1,13 @@
FROM python:slim FROM ghcr.io/c1710/emoji_builder
RUN apt-get update \ COPY svg ./svg
&& apt-get install --no-install-recommends -y \ COPY third_party/region-flags/svg ./flags
make \ COPY emoji_aliases.txt NotoColorEmoji.tmpl.ttx.tmpl Blobmoji.gpl ./
gcc \ COPY AUTHORS CONTRIBUTORS CHANGES.md LICENSE ./
zopfli \
libc-dev \
libpng-dev \
libcairo2-dev \
imagemagick \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache notofonttools VOLUME /build
VOLUME /output
ADD . /blobmoji CMD /github_workflow_setup.sh && \
WORKDIR /blobmoji /bin/emoji_builder -vv -b /build -o Blobmoji.ttf -O /output --flags ./flags blobmoji -w -a ./emoji_aliases.txt --ttx-tmpl ./NotoColorEmoji.tmpl.ttx.tmpl --palette ./Blobmoji.gpl && \
mv /output/Blobmoji_win.ttf /output/BlobmojiWindows.ttf
RUN mkdir /output
CMD make -j $(nproc) && cp NotoColorEmoji.ttf /output/

View file

@ -1,4 +1,3 @@
#### DISCLAIMER: #### DISCLAIMER:
I am **neither** affiliated nor in _any_ relationship to the original creators or to Emojipedia or anything or anyone else. I am **neither** affiliated nor in _any_ relationship to the original creators or to Emojipedia or anything or anyone else.
@ -24,24 +23,39 @@ But now to the original content of this Readme:
Color and Black-and-White Noto emoji fonts, and tools for working with them. Color and Black-and-White Noto emoji fonts, and tools for working with them.
## Building NotoColorEmoji ## ~~Building NotoColorEmoji~~
Building NotoColorEmoji currently requires a Python 2.x wide build. To build ~~Building NotoColorEmoji currently requires a Python 2.x wide build. To build
the emoji font you will require a few files from nototools. Clone a copy from the emoji font you will require a few files from nototools. Clone a copy from
https://github.com/googlei18n/nototools and either put it in your PYTHONPATH or https://github.com/googlei18n/nototools and either put it in your PYTHONPATH or
use 'python setup.py develop' ('install' currently won't fully install all the use 'python setup.py develop' ('install' currently won't fully install all the
data used by nototools). You will also need fontTools, get it from data used by nototools). You will also need fontTools, get it from
https://github.com/behdad/fonttools.git. https://github.com/behdad/fonttools.git.~~
Then run `make`. NotoColorEmoji is the default target. It's suggested to use `-j`, ~~Then run `make`. NotoColorEmoji is the default target. It's suggested to use `-j`,
especially if you are using zopflipng for compression. Intermediate products especially if you are using zopflipng for compression. Intermediate products
(compressed image files, for example) will be put into a build subdirectory; the (compressed image files, for example) will be put into a build subdirectory; the
font will be at the top level. font will be at the top level.~~
## Docker build ## Building Blobmoji 13+
Alternatively, you can also build the font within Docker through the provided Dockerfile. _Building is now done using [emoji_builder](https://github.com/C1710/emoji_builder/) (name WIP). Once you have it running, you can build it using the following command (you'll need to replace `emoji_builder` by the executable you use, e.g. `emoji_builder.exe` and maybe including the path. I recommend copying it into the `blobmoji`-directory):_
Just run `docker build . -t blobmoji && docker run --rm -it -v "$PWD/output:/output" blobmoji`. The resulting font will reside in the 'output' folder in your current working directory. ```
emoji_builder --flags ./third_party/region-flags/svg blobmoji -w -a ./emoji_aliases.txt --ttx-tmpl ./NotoColorEmoji.tmpl.ttx.tmpl --palette ./Blobmoji.gpl --default_font "Comic Neue"
```
- `--flags`: Use the directory conaining the flags
- `-w` add a wave-effect to the flags
- `-a` use an alias file
- `--ttx-tmpl` Use the template for the font metadata
- `--palette` normalize the colors to a specific color palette in the GIMP format (which is a derivation of the color palette present in the [2014 _Material Design_](https://material.io/archive/guidelines/style/color.html#color-color-palette))
- `--default_font` Because the graphics program I currently use (Affinity Designer) outputs font specifications in a format that `resvg`/`emoji_builder` has issues with, the font is explicitly specified here (note that if the font is correctly recognized, this one is not used. So as of now it is _not_ used to use a font for _all_ emojis)
## ~~Docker build~~
~~Alternatively, you can also build the font within Docker through the provided Dockerfile.
Just run `docker build . -t blobmoji && docker run --rm -it -v "$PWD/output:/output" blobmoji`. The resulting font will reside in the 'output' folder in your current working directory.~~
The Docker build method isn't available for `emoji_builder` yet
## Using NotoColorEmoji ## Using NotoColorEmoji
@ -95,3 +109,6 @@ _Please try to use the discussion feature for artistic topics, like the style of
* 2017-09-13: Emoji redesign released. * 2017-09-13: Emoji redesign released.
* 2015-12-09: Unicode 7 and 8 emoji image data (.png format) added. * 2015-12-09: Unicode 7 and 8 emoji image data (.png format) added.
* 2015-09-29: All Noto fonts now licensed under the SIL Open Font License. * 2015-09-29: All Noto fonts now licensed under the SIL Open Font License.
_Microsoft, Windows are trademarks of the Microsoft group of companies._

View file

@ -1,224 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2017 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import glob
import os
from os import path
import shutil
import sys
from nototools import unicode_data
"""Create aliases in target directory.
In addition to links/copies named with aliased sequences, this can also
create canonically named aliases/copies, if requested."""
DATA_ROOT = path.dirname(path.abspath(__file__))
def str_to_seq(seq_str):
res = [int(s, 16) for s in seq_str.split('_')]
if 0xfe0f in res:
print('0xfe0f in file name: %s' % seq_str)
res = [x for x in res if x != 0xfe0f]
return tuple(res)
def seq_to_str(seq):
return '_'.join('%04x' % cp for cp in seq)
def read_default_unknown_flag_aliases():
unknown_flag_path = path.join(DATA_ROOT, 'unknown_flag_aliases.txt')
return read_emoji_aliases(unknown_flag_path)
def read_default_emoji_aliases():
alias_path = path.join(DATA_ROOT, 'emoji_aliases.txt')
return read_emoji_aliases(alias_path)
def read_emoji_aliases(filename):
result = {}
with open(filename, 'r') as f:
for line in f:
ix = line.find('#')
if (ix > -1):
line = line[:ix]
line = line.strip()
if not line:
continue
als, trg = (s.strip() for s in line.split(';'))
try:
als_seq = tuple([int(x, 16) for x in als.split('_')])
trg_seq = tuple([int(x, 16) for x in trg.split('_')])
except:
print('cannot process alias %s -> %s' % (als, trg))
continue
result[als_seq] = trg_seq
return result
def add_aliases(
srcdir, dstdir, aliasfile, prefix, ext, replace=False, copy=False,
canonical_names=False, dry_run=False):
"""Use aliasfile to create aliases of files in srcdir matching prefix/ext in
dstdir. If dstdir is null, use srcdir as dstdir. If replace is false
and a file already exists in dstdir, report and do nothing. If copy is false
create a symlink, else create a copy.
If canonical_names is true, check all source files and generate aliases/copies
using the canonical name if different from the existing name.
If dry_run is true, report what would be done. Dstdir will be created if
necessary, even if dry_run is true."""
if not path.isdir(srcdir):
print('%s is not a directory' % srcdir, file=sys.stderr)
return
if not dstdir:
dstdir = srcdir
elif not path.isdir(dstdir):
os.makedirs(dstdir)
prefix_len = len(prefix)
suffix_len = len(ext) + 1
filenames = [path.basename(f)
for f in glob.glob(path.join(srcdir, '%s*.%s' % (prefix, ext)))]
seq_to_file = {
str_to_seq(name[prefix_len:-suffix_len]) : name
for name in filenames}
aliases = read_emoji_aliases(aliasfile)
aliases_to_create = {}
aliases_to_replace = []
alias_exists = False
def check_alias_seq(seq):
alias_str = seq_to_str(seq)
alias_name = '%s%s.%s' % (prefix, alias_str, ext)
alias_path = path.join(dstdir, alias_name)
if path.exists(alias_path):
if replace:
aliases_to_replace.append(alias_name)
else:
print('alias %s exists' % alias_str, file=sys.stderr)
alias_exists = True
return None
return alias_name
canonical_to_file = {}
for als, trg in sorted(aliases.items()):
if trg not in seq_to_file:
print('target %s for %s does not exist' % (
seq_to_str(trg), seq_to_str(als)), file=sys.stderr)
continue
alias_name = check_alias_seq(als)
if alias_name:
target_file = seq_to_file[trg]
aliases_to_create[alias_name] = target_file
if canonical_names:
canonical_seq = unicode_data.get_canonical_emoji_sequence(als)
if canonical_seq and canonical_seq != als:
canonical_alias_name = check_alias_seq(canonical_seq)
if canonical_alias_name:
canonical_to_file[canonical_alias_name] = target_file
if canonical_names:
print('adding %d canonical aliases' % len(canonical_to_file))
for seq, f in seq_to_file.iteritems():
canonical_seq = unicode_data.get_canonical_emoji_sequence(seq)
if canonical_seq and canonical_seq != seq:
alias_name = check_alias_seq(canonical_seq)
if alias_name:
canonical_to_file[alias_name] = f
print('adding %d total canonical sequences' % len(canonical_to_file))
aliases_to_create.update(canonical_to_file)
if replace:
if not dry_run:
for k in sorted(aliases_to_replace):
os.remove(path.join(dstdir, k))
print('replacing %d files' % len(aliases_to_replace))
elif alias_exists:
print('aborting, aliases exist.', file=sys.stderr)
return
for k, v in sorted(aliases_to_create.items()):
if dry_run:
msg = 'replace ' if k in aliases_to_replace else ''
print('%s%s -> %s' % (msg, k, v))
else:
try:
if copy:
shutil.copy2(path.join(srcdir, v), path.join(dstdir, k))
else:
# fix this to create relative symlinks
if srcdir == dstdir:
os.symlink(v, path.join(dstdir, k))
else:
raise Exception('can\'t create cross-directory symlinks yet')
except Exception as e:
print('failed to create %s -> %s' % (k, v), file=sys.stderr)
raise Exception('oops, ' + str(e))
print('created %d %s' % (
len(aliases_to_create), 'copies' if copy else 'symlinks'))
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'-s', '--srcdir', help='directory containing files to alias',
required=True, metavar='dir')
parser.add_argument(
'-d', '--dstdir', help='directory to write aliases, default srcdir',
metavar='dir')
parser.add_argument(
'-a', '--aliasfile', help='alias file (default emoji_aliases.txt)',
metavar='file', default='emoji_aliases.txt')
parser.add_argument(
'-p', '--prefix', help='file name prefix (default emoji_u)',
metavar='pfx', default='emoji_u')
parser.add_argument(
'-e', '--ext', help='file name extension (default png)',
choices=['ai', 'png', 'svg'], default='png')
parser.add_argument(
'-r', '--replace', help='replace existing files/aliases',
action='store_true')
parser.add_argument(
'-c', '--copy', help='create a copy of the file, not a symlink',
action='store_true')
parser.add_argument(
'--canonical_names', help='include extra copies with canonical names '
'(including fe0f emoji presentation character)', action='store_true');
parser.add_argument(
'-n', '--dry_run', help='print out aliases to create only',
action='store_true')
args = parser.parse_args()
add_aliases(
args.srcdir, args.dstdir, args.aliasfile, args.prefix, args.ext,
args.replace, args.copy, args.canonical_names, args.dry_run)
if __name__ == '__main__':
main()

View file

@ -1,195 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2014 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Modify the Noto Color Emoji font to use GSUB rules for flags and keycaps."""
__author__ = "roozbeh@google.com (Roozbeh Pournader)"
import sys
from fontTools import agl
from fontTools import ttLib
from fontTools.ttLib.tables import otTables
from nototools import font_data
def create_script_list(script_tag='DFLT'):
"""Create a ScriptList for the GSUB table."""
def_lang_sys = otTables.DefaultLangSys()
def_lang_sys.ReqFeatureIndex = 0xFFFF
def_lang_sys.FeatureCount = 1
def_lang_sys.FeatureIndex = [0]
def_lang_sys.LookupOrder = None
script_record = otTables.ScriptRecord()
script_record.ScriptTag = script_tag
script_record.Script = otTables.Script()
script_record.Script.DefaultLangSys = def_lang_sys
script_record.Script.LangSysCount = 0
script_record.Script.LangSysRecord = []
script_list = otTables.ScriptList()
script_list.ScriptCount = 1
script_list.ScriptRecord = [script_record]
return script_list
def create_feature_list(feature_tag, lookup_count):
"""Create a FeatureList for the GSUB table."""
feature_record = otTables.FeatureRecord()
feature_record.FeatureTag = feature_tag
feature_record.Feature = otTables.Feature()
feature_record.Feature.LookupCount = lookup_count
feature_record.Feature.LookupListIndex = range(lookup_count)
feature_record.Feature.FeatureParams = None
feature_list = otTables.FeatureList()
feature_list.FeatureCount = 1
feature_list.FeatureRecord = [feature_record]
return feature_list
def create_lookup_list(lookups):
"""Create a LookupList for the GSUB table."""
lookup_list = otTables.LookupList()
lookup_list.LookupCount = len(lookups)
lookup_list.Lookup = lookups
return lookup_list
def get_glyph_name_or_create(char, font):
"""Return the glyph name for a character, creating if it doesn't exist."""
cmap = font_data.get_cmap(font)
if char in cmap:
return cmap[char]
glyph_name = agl.UV2AGL[char]
assert glyph_name not in font.glyphOrder
font['hmtx'].metrics[glyph_name] = [0, 0]
cmap[char] = glyph_name
if 'glyf' in font:
from fontTools.ttLib.tables import _g_l_y_f
empty_glyph = _g_l_y_f.Glyph()
font['glyf'].glyphs[glyph_name] = empty_glyph
font.glyphOrder.append(glyph_name)
return glyph_name
def create_lookup(table, font, flag=0):
"""Create a Lookup based on mapping table."""
cmap = font_data.get_cmap(font)
ligatures = {}
for output, (ch1, ch2) in table.iteritems():
output = cmap[output]
ch1 = get_glyph_name_or_create(ch1, font)
ch2 = get_glyph_name_or_create(ch2, font)
ligature = otTables.Ligature()
ligature.CompCount = 2
ligature.Component = [ch2]
ligature.LigGlyph = output
try:
ligatures[ch1].append(ligature)
except KeyError:
ligatures[ch1] = [ligature]
ligature_subst = otTables.LigatureSubst()
ligature_subst.ligatures = ligatures
lookup = otTables.Lookup()
lookup.LookupType = 4
lookup.LookupFlag = flag
lookup.SubTableCount = 1
lookup.SubTable = [ligature_subst]
return lookup
def create_simple_gsub(lookups, script='DFLT', feature='ccmp'):
"""Create a simple GSUB table."""
gsub_class = ttLib.getTableClass('GSUB')
gsub = gsub_class('GSUB')
gsub.table = otTables.GSUB()
gsub.table.Version = 1.0
gsub.table.ScriptList = create_script_list(script)
gsub.table.FeatureList = create_feature_list(feature, len(lookups))
gsub.table.LookupList = create_lookup_list(lookups)
return gsub
def reg_indicator(letter):
"""Return a regional indicator charater from corresponing capital letter.
"""
return 0x1F1E6 + ord(letter) - ord('A')
EMOJI_FLAGS = {
0xFE4E5: (reg_indicator('J'), reg_indicator('P')), # Japan
0xFE4E6: (reg_indicator('U'), reg_indicator('S')), # United States
0xFE4E7: (reg_indicator('F'), reg_indicator('R')), # France
0xFE4E8: (reg_indicator('D'), reg_indicator('E')), # Germany
0xFE4E9: (reg_indicator('I'), reg_indicator('T')), # Italy
0xFE4EA: (reg_indicator('G'), reg_indicator('B')), # United Kingdom
0xFE4EB: (reg_indicator('E'), reg_indicator('S')), # Spain
0xFE4EC: (reg_indicator('R'), reg_indicator('U')), # Russia
0xFE4ED: (reg_indicator('C'), reg_indicator('N')), # China
0xFE4EE: (reg_indicator('K'), reg_indicator('R')), # Korea
}
KEYCAP = 0x20E3
EMOJI_KEYCAPS = {
0xFE82C: (ord('#'), KEYCAP),
0xFE82E: (ord('1'), KEYCAP),
0xFE82F: (ord('2'), KEYCAP),
0xFE830: (ord('3'), KEYCAP),
0xFE831: (ord('4'), KEYCAP),
0xFE832: (ord('5'), KEYCAP),
0xFE833: (ord('6'), KEYCAP),
0xFE834: (ord('7'), KEYCAP),
0xFE835: (ord('8'), KEYCAP),
0xFE836: (ord('9'), KEYCAP),
0xFE837: (ord('0'), KEYCAP),
}
def main(argv):
"""Modify all the fonts given in the command line."""
for font_name in argv[1:]:
font = ttLib.TTFont(font_name)
assert 'GSUB' not in font
font['GSUB'] = create_simple_gsub([
create_lookup(EMOJI_KEYCAPS, font),
create_lookup(EMOJI_FLAGS, font)])
font_data.delete_from_cmap(
font, EMOJI_FLAGS.keys() + EMOJI_KEYCAPS.keys())
font.save(font_name+'-fixed')
if __name__ == '__main__':
main(sys.argv)

View file

@ -1,405 +0,0 @@
#!/usr/bin/env python3
"""Extend a ttx file with additional data.
Takes a ttx file and one or more directories containing image files named
after sequences of codepoints, extends the cmap, hmtx, GSUB, and GlyphOrder
tables in the source ttx file based on these sequences, and writes out a new
ttx file.
This can also apply aliases from an alias file."""
import argparse
import collections
import os
from os import path
import re
import sys
from fontTools import ttx
from fontTools.ttLib.tables import otTables
import add_emoji_gsub
import add_aliases
sys.path.append(
path.join(os.path.dirname(__file__), 'third_party', 'color_emoji'))
from png import PNG
def get_seq_to_file(image_dir, prefix, suffix):
"""Return a mapping from codepoint sequences to files in the given directory,
for files that match the prefix and suffix. File names with this prefix and
suffix should consist of codepoints in hex separated by underscore. 'fe0f'
(the codepoint of the emoji presentation variation selector) is stripped from
the sequence.
"""
start = len(prefix)
limit = -len(suffix)
seq_to_file = {}
for name in os.listdir(image_dir):
if not (name.startswith(prefix) and name.endswith(suffix)):
continue
try:
cps = [int(s, 16) for s in name[start:limit].split('_')]
seq = tuple(cp for cp in cps if cp != 0xfe0f)
except:
raise Exception('could not parse "%s"' % name)
for cp in cps:
if not (0 <= cp <= 0x10ffff):
raise Exception('bad codepoint(s) in "%s"' % name)
if seq in seq_to_file:
raise Exception('duplicate sequence for "%s" in %s' % (name, image_dir))
seq_to_file[seq] = path.join(image_dir, name)
return seq_to_file
def collect_seq_to_file(image_dirs, prefix, suffix):
"""Return a sequence to file mapping by calling get_seq_to_file on a list
of directories. When sequences for files in later directories match those
from earlier directories, the later file replaces the earlier one.
"""
seq_to_file = {}
for image_dir in image_dirs:
seq_to_file.update(get_seq_to_file(image_dir, prefix, suffix))
return seq_to_file
def remap_values(seq_to_file, map_fn):
return {k: map_fn(v) for k, v in seq_to_file.items()}
def get_png_file_to_advance_mapper(lineheight):
def map_fn(filename):
wid, ht = PNG(filename).get_size()
return int(round(float(lineheight) * wid / ht))
return map_fn
def cp_name(cp):
"""return uniXXXX or uXXXXX(X) as a name for the glyph mapped to this cp."""
return '%s%04X' % ('u' if cp > 0xffff else 'uni', cp)
def seq_name(seq):
"""Sequences of length one get the cp_name. Others start with 'u' followed by
two or more 4-to-6-digit hex strings separated by underscore."""
if len(seq) == 1:
return cp_name(seq[0])
return 'u' + '_'.join('%04X' % cp for cp in seq)
def collect_cps(seqs):
cps = set()
for seq in seqs:
cps.update(seq)
return cps
def get_glyphorder_cps_and_truncate(glyphOrder):
"""This scans glyphOrder for names that correspond to a single codepoint
using the 'u(ni)XXXXXX' syntax. All names that don't match are moved
to the front the glyphOrder list in their original order, and the
list is truncated. The ones that do match are returned as a set of
codepoints."""
glyph_name_re = re.compile(r'^u(?:ni)?([0-9a-fA-F]{4,6})$')
cps = set()
write_ix = 0
for ix, name in enumerate(glyphOrder):
m = glyph_name_re.match(name)
if m:
cps.add(int(m.group(1), 16))
else:
glyphOrder[write_ix] = name
write_ix += 1
del glyphOrder[write_ix:]
return cps
def get_all_seqs(font, seq_to_advance):
"""Copies the sequences from seq_to_advance and extends it with single-
codepoint sequences from the GlyphOrder table as well as those internal
to sequences in seq_to_advance. Reduces the GlyphOrder table. """
all_seqs = set(seq_to_advance.keys())
# using collect_cps includes cps internal to a seq
cps = collect_cps(all_seqs)
glyphOrder = font.getGlyphOrder()
# extract cps in glyphOrder and reduce glyphOrder to only those that remain
glyphOrder_cps = get_glyphorder_cps_and_truncate(glyphOrder)
cps.update(glyphOrder_cps)
# add new single codepoint sequences from glyphOrder and sequences
all_seqs.update((cp,) for cp in cps)
return all_seqs
def get_font_cmap(font):
"""Return the first cmap in the font, we assume it exists and is a unicode
cmap."""
return font['cmap'].tables[0].cmap
def add_glyph_data(font, seqs, seq_to_advance, vadvance):
"""Add hmtx and GlyphOrder data for all sequences in seqs, and ensures there's
a cmap entry for each single-codepoint sequence. Seqs not in seq_to_advance
will get a zero advance."""
# We allow the template cmap to omit mappings for single-codepoint glyphs
# defined in the template's GlyphOrder table. Similarly, the hmtx table can
# omit advances. We assume glyphs named 'uniXXXX' or 'uXXXXX(X)' in the
# GlyphOrder table correspond to codepoints based on the name; we don't
# attempt to handle other types of names and these must occur in the cmap and
# hmtx tables in the template.
#
# seq_to_advance maps sequences (including single codepoints) to advances.
# All codepoints in these sequences will be added to the cmap. Some cps
# in these sequences have no corresponding single-codepoint sequence, they
# will also get added.
#
# The added codepoints have no advance information, so will get a zero
# advance.
cmap = get_font_cmap(font)
hmtx = font['hmtx'].metrics
vmtx = font['vmtx'].metrics
# We don't expect sequences to be in the glyphOrder, since we removed all the
# single-cp sequences from it and don't expect it to already contain names
# corresponding to multiple-cp sequencess. But just in case, we use
# reverseGlyphMap to avoid duplicating names accidentally.
updatedGlyphOrder = False
reverseGlyphMap = font.getReverseGlyphMap()
# Order the glyphs by grouping all the single-codepoint sequences first,
# then order by sequence so that related sequences are together. We group
# by single-codepoint sequence first in order to keep these glyphs together--
# they're used in the coverage tables for some of the substitutions, and
# those tables can be more compact this way.
for seq in sorted(seqs, key=lambda s: (0 if len(s) == 1 else 1, s)):
name = seq_name(seq)
if len(seq) == 1:
cmap[seq[0]] = name
advance = seq_to_advance.get(seq, 0)
hmtx[name] = [advance, 0]
vmtx[name] = [vadvance, 0]
if name not in reverseGlyphMap:
font.glyphOrder.append(name)
updatedGlyphOrder=True
if updatedGlyphOrder:
delattr(font, '_reverseGlyphOrderDict')
def add_aliases_to_cmap(font, aliases):
"""Some aliases might map a single codepoint to some other sequence. These
should map directly to the glyph for that sequence in the cmap. (Others will
map via GSUB).
"""
if not aliases:
return
cp_aliases = [seq for seq in aliases if len(seq) == 1]
if not cp_aliases:
return
cmap = get_font_cmap(font)
for src_seq in cp_aliases:
cp = src_seq[0]
name = seq_name(aliases[src_seq])
cmap[cp] = name
def get_rtl_seq(seq):
"""Return the rtl variant of the sequence, if it has one, else the empty
sequence.
"""
# Sequences with ZWJ or TAG_END in them will reflect. Fitzpatrick modifiers
# however do not, so if we reflect we make a pass to swap them back into their
# logical order.
ZWJ = 0x200d
TAG_END = 0xe007f
def is_fitzpatrick(cp):
return 0x1f3fb <= cp <= 0x1f3ff
if not (ZWJ in seq or TAG_END in seq):
return ()
rev_seq = list(seq)
rev_seq.reverse()
for i in range(len(rev_seq)-1, 0, -1):
if is_fitzpatrick(rev_seq[i-1]):
rev_seq[i-1], rev_seq[i] = rev_seq[i], rev_seq[i-1]
return tuple(rev_seq)
def get_gsub_ligature_lookup(font):
"""If the font does not have a GSUB table, create one with a ligature
substitution lookup. If it does, ensure the first lookup is a properly
initialized ligature substitution lookup. Return the lookup."""
# The template might include more lookups after lookup 0, if it has a
# GSUB table.
if 'GSUB' not in font:
ligature_subst = otTables.LigatureSubst()
ligature_subst.ligatures = {}
lookup = otTables.Lookup()
lookup.LookupType = 4
lookup.LookupFlag = 0
lookup.SubTableCount = 1
lookup.SubTable = [ligature_subst]
font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
else:
lookup = font['GSUB'].table.LookupList.Lookup[0]
assert lookup.LookupFlag == 0
# importXML doesn't fully init GSUB structures, so help it out
st = lookup.SubTable[0]
if not hasattr(lookup, 'LookupType'):
assert st.LookupType == 4
setattr(lookup, 'LookupType', 4)
if not hasattr(st, 'ligatures'):
setattr(st, 'ligatures', {})
return lookup
def add_ligature_sequences(font, seqs, aliases):
"""Add ligature sequences."""
seq_to_target_name = {
seq: seq_name(seq) for seq in seqs if len(seq) > 1}
if aliases:
seq_to_target_name.update({
seq: seq_name(aliases[seq]) for seq in aliases if len(seq) > 1})
if not seq_to_target_name:
return
rtl_seq_to_target_name = {
get_rtl_seq(seq): name for seq, name in seq_to_target_name.items()}
seq_to_target_name.update(rtl_seq_to_target_name)
# sequences that don't have rtl variants get mapped to the empty sequence,
# delete it.
if () in seq_to_target_name:
del seq_to_target_name[()]
# organize by first codepoint in sequence
keyed_ligatures = collections.defaultdict(list)
for t in seq_to_target_name.items():
first_cp = t[0][0]
keyed_ligatures[first_cp].append(t)
def add_ligature(lookup, cmap, seq, name):
# The sequences consist of codepoints, but the entries in the ligature table
# are glyph names. Aliasing can give single codepoints names based on
# sequences (e.g. 'guardsman' with 'male guardsman') so we map the
# codepoints through the cmap to get the glyph names.
glyph_names = [cmap[cp] for cp in seq]
lig = otTables.Ligature()
lig.CompCount = len(seq)
lig.Component = glyph_names[1:]
lig.LigGlyph = name
ligatures = lookup.SubTable[0].ligatures
first_name = glyph_names[0]
try:
ligatures[first_name].append(lig)
except KeyError:
ligatures[first_name] = [lig]
lookup = get_gsub_ligature_lookup(font)
cmap = get_font_cmap(font)
for first_cp in sorted(keyed_ligatures):
pairs = keyed_ligatures[first_cp]
# Sort longest first, this ensures longer sequences with common prefixes
# are handled before shorter ones. The secondary sort is a standard
# sort on the codepoints in the sequence.
pairs.sort(key = lambda pair: (-len(pair[0]), pair[0]))
for seq, name in pairs:
add_ligature(lookup, cmap, seq, name)
def update_font_data(font, seq_to_advance, vadvance, aliases):
"""Update the font's cmap, hmtx, GSUB, and GlyphOrder tables."""
seqs = get_all_seqs(font, seq_to_advance)
add_glyph_data(font, seqs, seq_to_advance, vadvance)
add_aliases_to_cmap(font, aliases)
add_ligature_sequences(font, seqs, aliases)
def apply_aliases(seq_dict, aliases):
"""Aliases is a mapping from sequence to replacement sequence. We can use
an alias if the target is a key in the dictionary. Furthermore, if the
source is a key in the dictionary, we can delete it. This updates the
dictionary and returns the usable aliases."""
usable_aliases = {}
for k, v in aliases.items():
if v in seq_dict:
usable_aliases[k] = v
if k in seq_dict:
del seq_dict[k]
return usable_aliases
def update_ttx(in_file, out_file, image_dirs, prefix, ext, aliases_file):
if ext != '.png':
raise Exception('extension "%s" not supported' % ext)
seq_to_file = collect_seq_to_file(image_dirs, prefix, ext)
if not seq_to_file:
raise ValueError(
'no sequences with prefix "%s" and extension "%s" in %s' % (
prefix, ext, ', '.join(image_dirs)))
aliases = None
if aliases_file:
aliases = add_aliases.read_emoji_aliases(aliases_file)
aliases = apply_aliases(seq_to_file, aliases)
font = ttx.TTFont()
font.importXML(in_file)
lineheight = font['hhea'].ascent - font['hhea'].descent
map_fn = get_png_file_to_advance_mapper(lineheight)
seq_to_advance = remap_values(seq_to_file, map_fn)
vadvance = font['vhea'].advanceHeightMax if 'vhea' in font else lineheight
update_font_data(font, seq_to_advance, vadvance, aliases)
font.saveXML(out_file)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'-f', '--in_file', help='ttx input file', metavar='file', required=True)
parser.add_argument(
'-o', '--out_file', help='ttx output file', metavar='file', required=True)
parser.add_argument(
'-d', '--image_dirs', help='directories containing image files',
nargs='+', metavar='dir', required=True)
parser.add_argument(
'-p', '--prefix', help='file prefix (default "emoji_u")',
metavar='pfx', default='emoji_u')
parser.add_argument(
'-e', '--ext', help='file extension (default ".png", currently only '
'".png" is supported', metavar='ext', default='.png')
parser.add_argument(
'-a', '--aliases', help='process alias table', const='emoji_aliases.txt',
nargs='?', metavar='file')
args = parser.parse_args()
update_ttx(
args.in_file, args.out_file, args.image_dirs, args.prefix, args.ext,
args.aliases)
if __name__ == '__main__':
main()

View file

@ -1,295 +0,0 @@
#!/usr/bin/env python3
# Copyright 2015 Google, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Google Author(s): Doug Felt
"""Tool to update GSUB, hmtx, cmap, glyf tables with svg image glyphs."""
from __future__ import print_function
import argparse
import glob
import logging
import os
import re
import sys
from fontTools.ttLib.tables import otTables
from fontTools.ttLib.tables import _g_l_y_f
from fontTools.ttLib.tables import S_V_G_ as SVG
from fontTools import ttx
from nototools import tool_utils
import add_emoji_gsub
import svg_builder
class FontBuilder(object):
"""A utility for mutating a ttx font. This maintains glyph_order, cmap, and
hmtx tables, and optionally GSUB, glyf, and SVN tables as well."""
def __init__(self, font):
self.font = font;
self.glyph_order = font.getGlyphOrder()
self.cmap = font['cmap'].tables[0].cmap
self.hmtx = font['hmtx'].metrics
def init_gsub(self):
"""Call this if you are going to add ligatures to the font. Creates a GSUB
table if there isn't one already."""
if hasattr(self, 'ligatures'):
return
font = self.font
if 'GSUB' not in font:
ligature_subst = otTables.LigatureSubst()
ligature_subst.ligatures = {}
lookup = otTables.Lookup()
lookup.LookupType = 4
lookup.LookupFlag = 0
lookup.SubTableCount = 1
lookup.SubTable = [ligature_subst]
font['GSUB'] = add_emoji_gsub.create_simple_gsub([lookup])
else:
lookup = font['GSUB'].table.LookupList.Lookup[0]
assert lookup.LookupType == 4
assert lookup.LookupFlag == 0
self.ligatures = lookup.SubTable[0].ligatures
def init_glyf(self):
"""Call this if you need to create empty glyf entries in the font when you
add a new glyph."""
if hasattr(self, 'glyphs'):
return
font = self.font
if 'glyf' not in font:
glyf_table = _g_l_y_f.table__g_l_y_f()
glyf_table.glyphs = {}
glyf_table.glyphOrder = self.glyph_order
font['glyf'] = glyf_table
self.glyphs = font['glyf'].glyphs
def init_svg(self):
"""Call this if you expect to add SVG images in the font. This calls
init_glyf since SVG support currently requires fallback glyf records for
each SVG image."""
if hasattr(self, 'svgs'):
return
# svg requires glyf
self.init_glyf()
font = self.font
if 'SVG ' not in font:
svg_table = SVG.table_S_V_G_()
svg_table.docList = []
svg_table.colorPalettes = None
font['SVG '] = svg_table
self.svgs = font['SVG '].docList
def glyph_name(self, string):
return "_".join(["u%04X" % ord(char) for char in string])
def glyph_name_to_index(self, name):
return self.glyph_order.index(name) if name in self.glyph_order else -1;
def glyph_index_to_name(self, glyph_index):
if glyph_index < len(self.glyph_order):
return self.glyph_order[glyph_index]
return ''
def have_glyph(self, name):
return self.name_to_glyph_index >= 0
def _add_ligature(self, glyphstr):
lig = otTables.Ligature()
lig.CompCount = len(glyphstr)
lig.Component = [self.glyph_name(ch) for ch in glyphstr[1:]]
lig.LigGlyph = self.glyph_name(glyphstr)
first = self.glyph_name(glyphstr[0])
try:
self.ligatures[first].append(lig)
except KeyError:
self.ligatures[first] = [lig]
def _add_empty_glyph(self, glyphstr, name):
"""Create an empty glyph. If glyphstr is not a ligature, add a cmap entry
for it."""
if len(glyphstr) == 1:
self.cmap[ord(glyphstr)] = name
self.hmtx[name] = [0, 0]
self.glyph_order.append(name)
if hasattr(self, 'glyphs'):
self.glyphs[name] = _g_l_y_f.Glyph()
def add_components_and_ligature(self, glyphstr):
"""Convert glyphstr to a name and check if it already exists. If not, check
if it is a ligature (longer than one codepoint), and if it is, generate
empty glyphs with cmap entries for any missing ligature components and add a
ligature record. Then generate an empty glyph for the name. Return a tuple
with the name, index, and a bool indicating whether the glyph already
existed."""
name = self.glyph_name(glyphstr)
index = self.glyph_name_to_index(name)
exists = index >= 0
if not exists:
if len(glyphstr) > 1:
for char in glyphstr:
if ord(char) not in self.cmap:
char_name = self.glyph_name(char)
self._add_empty_glyph(char, char_name)
self._add_ligature(glyphstr)
index = len(self.glyph_order)
self._add_empty_glyph(glyphstr, name)
return name, index, exists
def add_svg(self, doc, hmetrics, name, index):
"""Add an svg table entry. If hmetrics is not None, update the hmtx table.
This expects the glyph has already been added."""
# sanity check to make sure name and index correspond.
assert name == self.glyph_index_to_name(index)
if hmetrics:
self.hmtx[name] = hmetrics
svg_record = (doc, index, index) # startGlyphId, endGlyphId are the same
self.svgs.append(svg_record)
def collect_glyphstr_file_pairs(prefix, ext, include=None, exclude=None, verbosity=1):
"""Scan files with the given prefix and extension, and return a list of
(glyphstr, filename) where glyphstr is the character or ligature, and filename
is the image file associated with it. The glyphstr is formed by decoding the
filename (exclusive of the prefix) as a sequence of hex codepoints separated
by underscore. Include, if defined, is a regex string to include only matched
filenames. Exclude, if defined, is a regex string to exclude matched
filenames, and is applied after include."""
image_files = {}
glob_pat = "%s*.%s" % (prefix, ext)
leading = len(prefix)
trailing = len(ext) + 1 # include dot
logging.info("Looking for images matching '%s'.", glob_pat)
ex_count = 0
ex = re.compile(exclude) if exclude else None
inc = re.compile(include) if include else None
if inc:
logging.info("Including images matching '%s'.", include)
if ex:
logging.info("Excluding images matching '%s'.", exclude)
for image_file in glob.glob(glob_pat):
if inc and not inc.search(image_file):
continue
if ex and ex.search(image_file):
if verbosity > 1:
print("Exclude %s" % image_file)
ex_count += 1
continue
codes = image_file[leading:-trailing]
if "_" in codes:
pieces = codes.split ("_")
u = "".join ([unichr(int(code, 16)) for code in pieces])
else:
u = unichr(int(codes, 16))
image_files[u] = image_file
if ex_count:
logging.info("Excluded %d files.", ex_count)
if not image_files:
raise Exception ("No image files matching '%s'.", glob_pat)
logging.info("Matched %s files.", len(image_files))
return image_files.items()
def sort_glyphstr_tuples(glyphstr_tuples):
"""The list contains tuples whose first element is a string representing a
character or ligature. It is sorted with shorter glyphstrs first, then
alphabetically. This ensures that ligature components are added to the font
before any ligatures that contain them."""
glyphstr_tuples.sort(key=lambda t: (len(t[0]), t[0]))
def add_image_glyphs(in_file, out_file, pairs):
"""Add images from pairs (glyphstr, filename) to .ttx file in_file and write
to .ttx file out_file."""
font = ttx.TTFont()
font.importXML(in_file)
sort_glyphstr_tuples(pairs)
font_builder = FontBuilder(font)
# we've already sorted by length, so the longest glyphstrs are at the end. To
# see if we have ligatures, we just need to check the last one.
if len(pairs[-1][0]) > 1:
font_builder.init_gsub()
img_builder = svg_builder.SvgBuilder(font_builder)
for glyphstr, filename in pairs:
logging.debug("Adding glyph for U+%s", ",".join(
["%04X" % ord(char) for char in glyphstr]))
img_builder.add_from_filename(glyphstr, filename)
font.saveXML(out_file)
logging.info("Added %s images to %s", len(pairs), out_file)
def main(argv):
usage = """This will search for files that have image_prefix followed by one
or more hex numbers (separated by underscore if more than one), and end in
".svg". For example, if image_prefix is "icons/u", then files with names like
"icons/u1F4A9.svg" or "icons/u1F1EF_1F1F5.svg" will be loaded. The script
then adds cmap, htmx, and potentially GSUB entries for the Unicode characters
found. The advance width will be chosen based on image aspect ratio. If
Unicode values outside the BMP are desired, the existing cmap table should be
of the appropriate (format 12) type. Only the first cmap table and the first
GSUB lookup (if existing) are modified."""
parser = argparse.ArgumentParser(
description='Update cmap, glyf, GSUB, and hmtx tables from image glyphs.',
epilog=usage)
parser.add_argument(
'in_file', help='Input ttx file name.', metavar='fname')
parser.add_argument(
'out_file', help='Output ttx file name.', metavar='fname')
parser.add_argument(
'image_prefix', help='Location and prefix of image files.',
metavar='path')
parser.add_argument(
'-i', '--include', help='include files whoses name matches this regex',
metavar='regex')
parser.add_argument(
'-e', '--exclude', help='exclude files whose name matches this regex',
metavar='regex')
parser.add_argument(
'-l', '--loglevel', help='log level name', default='warning')
args = parser.parse_args(argv)
tool_utils.setup_logging(args.loglevel)
pairs = collect_glyphstr_file_pairs(
args.image_prefix, 'svg', include=args.include, exclude=args.exclude)
add_image_glyphs(args.in_file, args.out_file, pairs)
if __name__ == '__main__':
main(sys.argv[1:])

View file

@ -1,463 +0,0 @@
#!/usr/bin/env python3
#
# Copyright 2016 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compare emoji image file namings against unicode property data."""
from __future__ import print_function
import argparse
import collections
import glob
import os
from os import path
import re
import sys
from nototools import unicode_data
import add_aliases
ZWJ = 0x200d
EMOJI_VS = 0xfe0f
END_TAG = 0xe007f
def _make_tag_set():
tag_set = set()
tag_set |= set(range(0xe0030, 0xe003a)) # 0-9
tag_set |= set(range(0xe0061, 0xe007b)) # a-z
tag_set.add(END_TAG)
return tag_set
TAG_SET = _make_tag_set()
_namedata = None
def seq_name(seq):
global _namedata
if not _namedata:
def strip_vs_map(seq_map):
return {
unicode_data.strip_emoji_vs(k): v
for k, v in seq_map.iteritems()}
_namedata = [
strip_vs_map(unicode_data.get_emoji_combining_sequences()),
strip_vs_map(unicode_data.get_emoji_flag_sequences()),
strip_vs_map(unicode_data.get_emoji_modifier_sequences()),
strip_vs_map(unicode_data.get_emoji_zwj_sequences()),
]
if len(seq) == 1:
return unicode_data.name(seq[0], None)
for data in _namedata:
if seq in data:
return data[seq]
if EMOJI_VS in seq:
non_vs_seq = unicode_data.strip_emoji_vs(seq)
for data in _namedata:
if non_vs_seq in data:
return data[non_vs_seq]
return None
def _check_no_vs(sorted_seq_to_filepath):
"""Our image data does not use emoji presentation variation selectors."""
for seq, fp in sorted_seq_to_filepath.iteritems():
if EMOJI_VS in seq:
print('check no VS: FE0F in path: %s' % fp)
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
"""Ensure all cps in these sequences are valid emoji cps or specific cps
used in forming emoji sequences. This is a 'pre-check' that reports
this specific problem."""
valid_cps = set(unicode_data.get_emoji())
if unicode_version is None or unicode_version >= unicode_data.PROPOSED_EMOJI_AGE:
valid_cps |= unicode_data.proposed_emoji_cps()
else:
valid_cps = set(
cp for cp in valid_cps if unicode_data.age(cp) <= unicode_version)
valid_cps.add(0x200d) # ZWJ
valid_cps.add(0x20e3) # combining enclosing keycap
valid_cps.add(0xfe0f) # variation selector (emoji presentation)
valid_cps.add(0xfe82b) # PUA value for unknown flag
valid_cps |= TAG_SET # used in subregion tag sequences
not_emoji = {}
for seq, fp in sorted_seq_to_filepath.iteritems():
for cp in seq:
if cp not in valid_cps:
if cp not in not_emoji:
not_emoji[cp] = []
not_emoji[cp].append(fp)
if len(not_emoji):
print(
'check valid emoji cps: %d non-emoji cp found' % len(not_emoji),
file=sys.stderr)
for cp in sorted(not_emoji):
fps = not_emoji[cp]
print(
'check valid emoji cps: %04x (in %d sequences)' % (cp, len(fps)),
file=sys.stderr)
def _check_zwj(sorted_seq_to_filepath):
"""Ensure zwj is only between two appropriate emoji. This is a 'pre-check'
that reports this specific problem."""
for seq, fp in sorted_seq_to_filepath.iteritems():
if ZWJ not in seq:
continue
if seq[0] == ZWJ:
print('check zwj: zwj at head of sequence in %s' % fp, file=sys.stderr)
if len(seq) == 1:
continue
if seq[-1] == ZWJ:
print('check zwj: zwj at end of sequence in %s' % fp, file=sys.stderr)
for i, cp in enumerate(seq):
if cp == ZWJ:
if i > 0:
pcp = seq[i-1]
if pcp != EMOJI_VS and not unicode_data.is_emoji(pcp):
print(
'check zwj: non-emoji %04x preceeds ZWJ in %s' % (pcp, fp),
file=sys.stderr)
if i < len(seq) - 1:
fcp = seq[i+1]
if not unicode_data.is_emoji(fcp):
print(
'check zwj: non-emoji %04x follows ZWJ in %s' % (fcp, fp),
file=sys.stderr)
def _check_flags(sorted_seq_to_filepath):
"""Ensure regional indicators are only in sequences of one or two, and
never mixed."""
for seq, fp in sorted_seq_to_filepath.iteritems():
have_reg = None
for cp in seq:
is_reg = unicode_data.is_regional_indicator(cp)
if have_reg == None:
have_reg = is_reg
elif have_reg != is_reg:
print(
'check flags: mix of regional and non-regional in %s' % fp,
file=sys.stderr)
if have_reg and len(seq) > 2:
# We provide dummy glyphs for regional indicators, so there are sequences
# with single regional indicator symbols, the len check handles this.
print(
'check flags: regional indicator sequence length != 2 in %s' % fp,
file=sys.stderr)
def _check_tags(sorted_seq_to_filepath):
"""Ensure tag sequences (for subregion flags) conform to the spec. We don't
validate against CLDR, just that there's a sequence of 2 or more tags starting
and ending with the appropriate codepoints."""
BLACK_FLAG = 0x1f3f4
BLACK_FLAG_SET = set([BLACK_FLAG])
for seq, fp in sorted_seq_to_filepath.iteritems():
seq_set = set(cp for cp in seq)
overlap_set = seq_set & TAG_SET
if not overlap_set:
continue
if seq[0] != BLACK_FLAG:
print('check tags: bad start tag in %s' % fp)
elif seq[-1] != END_TAG:
print('check tags: bad end tag in %s' % fp)
elif len(seq) < 4:
print('check tags: sequence too short in %s' % fp)
elif seq_set - TAG_SET != BLACK_FLAG_SET:
print('check tags: non-tag items in %s' % fp)
def _check_skintone(sorted_seq_to_filepath):
"""Ensure skin tone modifiers are not applied to emoji that are not defined
to take them. May appear standalone, though. Also check that emoji that take
skin tone modifiers have a complete set."""
base_to_modifiers = collections.defaultdict(set)
for seq, fp in sorted_seq_to_filepath.iteritems():
for i, cp in enumerate(seq):
if unicode_data.is_skintone_modifier(cp):
if i == 0:
if len(seq) > 1:
print(
'check skintone: skin color selector first in sequence %s' % fp,
file=sys.stderr)
# standalone are ok
continue
pcp = seq[i-1]
if not unicode_data.is_emoji_modifier_base(pcp):
print(
'check skintone: emoji skintone modifier applied to non-base ' +
'at %d: %s' % (i, fp), file=sys.stderr)
else:
if pcp not in base_to_modifiers:
base_to_modifiers[pcp] = set()
base_to_modifiers[pcp].add(cp)
for cp, modifiers in sorted(base_to_modifiers.iteritems()):
if len(modifiers) != 5:
print(
'check skintone: base %04x has %d modifiers defined (%s) in %s' % (
cp, len(modifiers),
', '.join('%04x' % cp for cp in sorted(modifiers)), fp),
file=sys.stderr)
def _check_zwj_sequences(sorted_seq_to_filepath, unicode_version):
"""Verify that zwj sequences are valid for the given unicode version."""
for seq, fp in sorted_seq_to_filepath.iteritems():
if ZWJ not in seq:
continue
age = unicode_data.get_emoji_sequence_age(seq)
if age is None or unicode_version is not None and age > unicode_version:
print('check zwj sequences: undefined sequence %s' % fp)
def _check_no_alias_sources(sorted_seq_to_filepath):
"""Check that we don't have sequences that we expect to be aliased to
some other sequence."""
aliases = add_aliases.read_default_emoji_aliases()
for seq, fp in sorted_seq_to_filepath.iteritems():
if seq in aliases:
print('check no alias sources: aliased sequence %s' % fp)
def _check_coverage(seq_to_filepath, unicode_version):
"""Ensure we have all and only the cps and sequences that we need for the
font as of this version."""
age = unicode_version
non_vs_to_canonical = {}
for k in seq_to_filepath:
if EMOJI_VS in k:
non_vs = unicode_data.strip_emoji_vs(k)
non_vs_to_canonical[non_vs] = k
aliases = add_aliases.read_default_emoji_aliases()
for k, v in sorted(aliases.items()):
if v not in seq_to_filepath and v not in non_vs_to_canonical:
alias_str = unicode_data.seq_to_string(k)
target_str = unicode_data.seq_to_string(v)
print('coverage: alias %s missing target %s' % (alias_str, target_str))
continue
if k in seq_to_filepath or k in non_vs_to_canonical:
alias_str = unicode_data.seq_to_string(k)
target_str = unicode_data.seq_to_string(v)
print('coverage: alias %s already exists as %s (%s)' % (
alias_str, target_str, seq_name(v)))
continue
filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]]
seq_to_filepath[k] = 'alias:' + filename
# check single emoji, this includes most of the special chars
emoji = sorted(unicode_data.get_emoji(age=age))
for cp in emoji:
if tuple([cp]) not in seq_to_filepath:
print(
'coverage: missing single %04x (%s)' % (
cp, unicode_data.name(cp, '<no name>')))
# special characters
# all but combining enclosing keycap are currently marked as emoji
for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
if cp not in emoji and tuple([cp]) not in seq_to_filepath:
print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))
# combining sequences
comb_seq_to_name = sorted(
unicode_data.get_emoji_combining_sequences(age=age).iteritems())
for seq, name in comb_seq_to_name:
if seq not in seq_to_filepath:
# strip vs and try again
non_vs_seq = unicode_data.strip_emoji_vs(seq)
if non_vs_seq not in seq_to_filepath:
print('coverage: missing combining sequence %s (%s)' %
(unicode_data.seq_to_string(seq), name))
# flag sequences
flag_seq_to_name = sorted(
unicode_data.get_emoji_flag_sequences(age=age).iteritems())
for seq, name in flag_seq_to_name:
if seq not in seq_to_filepath:
print('coverage: missing flag sequence %s (%s)' %
(unicode_data.seq_to_string(seq), name))
# skin tone modifier sequences
mod_seq_to_name = sorted(
unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
for seq, name in mod_seq_to_name:
if seq not in seq_to_filepath:
print('coverage: missing modifier sequence %s (%s)' % (
unicode_data.seq_to_string(seq), name))
# zwj sequences
# some of ours include the emoji presentation variation selector and some
# don't, and the same is true for the canonical sequences. normalize all
# of them to omit it to test coverage, but report the canonical sequence.
zwj_seq_without_vs = set()
for seq in seq_to_filepath:
if ZWJ not in seq:
continue
if EMOJI_VS in seq:
seq = tuple(cp for cp in seq if cp != EMOJI_VS)
zwj_seq_without_vs.add(seq)
for seq, name in sorted(
unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
if EMOJI_VS in seq:
test_seq = tuple(s for s in seq if s != EMOJI_VS)
else:
test_seq = seq
if test_seq not in zwj_seq_without_vs:
print('coverage: missing (canonical) zwj sequence %s (%s)' % (
unicode_data.seq_to_string(seq), name))
# check for 'unknown flag'
# this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
# don't start with our prefix so 'unknown_flag' would be excluded by default.
if tuple([0xfe82b]) not in seq_to_filepath:
print('coverage: missing unknown flag PUA fe82b')
def check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage):
sorted_seq_to_filepath = collections.OrderedDict(
sorted(seq_to_filepath.items()))
_check_no_vs(sorted_seq_to_filepath)
_check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version)
_check_zwj(sorted_seq_to_filepath)
_check_flags(sorted_seq_to_filepath)
_check_tags(sorted_seq_to_filepath)
_check_skintone(sorted_seq_to_filepath)
_check_zwj_sequences(sorted_seq_to_filepath, unicode_version)
_check_no_alias_sources(sorted_seq_to_filepath)
if coverage:
_check_coverage(sorted_seq_to_filepath, unicode_version)
def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):
"""Check names, and convert name to sequences for names that are ok,
returning a sequence to file path mapping. Reports bad segments
of a name to stderr."""
segment_re = re.compile(r'^[0-9a-f]{4,6}$')
result = {}
for name, dirname in name_to_dirpath.iteritems():
if not name.startswith(prefix):
print('expected prefix "%s" for "%s"' % (prefix, name))
continue
segments = name[len(prefix): -len(suffix)].split('_')
segfail = False
seq = []
for s in segments:
if not segment_re.match(s):
print('bad codepoint name "%s" in %s/%s' % (s, dirname, name))
segfail = True
continue
n = int(s, 16)
if n > 0x10ffff:
print('codepoint "%s" out of range in %s/%s' % (s, dirname, name))
segfail = True
continue
seq.append(n)
if not segfail:
result[tuple(seq)] = path.join(dirname, name)
return result
def collect_name_to_dirpath(directory, prefix, suffix, exclude=None):
"""Return a mapping from filename to path rooted at directory, ignoring files
that don't match suffix, and subtrees with names in exclude. Report when a
filename appears in more than one subdir; the first path found is kept."""
result = {}
for dirname, dirs, files in os.walk(directory, topdown=True):
if exclude:
dirs[:] = [d for d in dirs if d not in exclude]
if directory != '.':
dirname = path.join(directory, dirname)
for f in files:
if not f.endswith(suffix):
continue
if f in result:
print('duplicate file "%s" in %s and %s ' % (
f, dirname, result[f]), file=sys.stderr)
continue
result[f] = dirname
return result
def collect_name_to_dirpath_with_override(dirs, prefix, suffix, exclude=None):
"""Return a mapping from filename to a directory path rooted at a directory
in dirs, using collect_name_to_filepath. The last directory is retained. This
does not report an error if a file appears under more than one root directory,
so lets later root directories override earlier ones. Use 'exclude' to
name subdirectories (of any root) whose subtree you wish to skip."""
result = {}
for d in dirs:
result.update(collect_name_to_dirpath(d, prefix, suffix, exclude))
return result
def run_check(dirs, prefix, suffix, exclude, unicode_version, coverage):
msg = ''
if unicode_version:
msg = ' (%3.1f)' % unicode_version
print('Checking files with prefix "%s" and suffix "%s"%s in:\n %s' % (
prefix, suffix, msg, '\n '.join(dirs)))
name_to_dirpath = collect_name_to_dirpath_with_override(
dirs, prefix=prefix, suffix=suffix, exclude=exclude)
print('checking %d names' % len(name_to_dirpath))
seq_to_filepath = create_sequence_to_filepath(name_to_dirpath, prefix, suffix)
print('checking %d sequences' % len(seq_to_filepath))
check_sequence_to_filepath(seq_to_filepath, unicode_version, coverage)
print('done.')
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'-d', '--dirs', help='directory roots containing emoji images',
metavar='dir', nargs='+', required=True)
parser.add_argument(
'-e', '--exclude', help='names of source subdirs to exclude',
metavar='dir', nargs='+')
parser.add_argument(
'-c', '--coverage', help='test for complete coverage',
action='store_true')
parser.add_argument(
'-p', '--prefix', help='prefix to match, default "emoji_u"',
metavar='pfx', default='emoji_u')
parser.add_argument(
'-s', '--suffix', help='suffix to match, default ".png"', metavar='sfx',
default='.png')
parser.add_argument(
'-u', '--unicode_version', help='limit to this unicode version or before',
metavar='version', type=float)
args = parser.parse_args()
run_check(
args.dirs, args.prefix, args.suffix, args.exclude, args.unicode_version,
args.coverage)
if __name__ == '__main__':
main()

View file

@ -1,150 +0,0 @@
#!/usr/bin/env python3
# Copyright 2015 Google, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Google Author(s): Doug Felt
"""Tool to collect emoji svg glyphs into one directory for processing
by add_svg_glyphs. There are two sources, noto/color_emoji/svg and
noto/third_party/region-flags/svg. The add_svg_glyphs file expects
the file names to contain the character string that represents it
represented as a sequence of hex-encoded codepoints separated by
underscore. The files in noto/color_emoji/svg do this, and have the
prefix 'emoji_u', but the files in region-flags/svg just have the
two-letter code.
We create a directory and copy the files into it with the required
naming convention. First we do this for region-flags/svg, converting
the names, and then we do this for color_emoji/svg, so any duplicates
will be overwritten by what we assume are the preferred svg. We use
copies instead of symlinks so we can continue to optimize or modify
the files without messing with the originals."""
import argparse
import glob
import logging
import os
import os.path
import re
import shutil
import sys
from nototools import tool_utils
def _is_svg(f):
return f.endswith('.svg')
def _is_svg_and_startswith_emoji(f):
return f.endswith('.svg') and f.startswith('emoji_u')
def _flag_rename(f):
"""Converts a file name from two-letter upper-case ASCII to our expected
'emoji_uXXXXX_XXXXX form, mapping each character to the corresponding
regional indicator symbol."""
cp_strs = []
name, ext = os.path.splitext(f)
if len(name) != 2:
raise ValueError('illegal flag name "%s"' % f)
for cp in name:
if not ('A' <= cp <= 'Z'):
raise ValueError('illegal flag name "%s"' % f)
ncp = 0x1f1e6 - 0x41 + ord(cp)
cp_strs.append("%04x" % ncp)
return 'emoji_u%s%s' % ('_'.join(cp_strs), ext)
def copy_with_rename(src_dir, dst_dir, accept_pred=None, rename=None):
"""Copy files from src_dir to dst_dir that match accept_pred (all if None) and
rename using rename (if not None), replacing existing files. accept_pred
takes the filename and returns True if the file should be copied, rename takes
the filename and returns a new file name."""
count = 0
replace_count = 0
for src_filename in os.listdir(src_dir):
if accept_pred and not accept_pred(src_filename):
continue
dst_filename = rename(src_filename) if rename else src_filename
src = os.path.join(src_dir, src_filename)
dst = os.path.join(dst_dir, dst_filename)
if os.path.exists(dst):
logging.debug('Replacing existing file %s', dst)
os.unlink(dst)
replace_count += 1
shutil.copy2(src, dst)
logging.debug('cp -p %s %s', src, dst)
count += 1
if logging.getLogger().getEffectiveLevel() <= logging.INFO:
src_short = tool_utils.short_path(src_dir)
dst_short = tool_utils.short_path(dst_dir)
logging.info('Copied %d files (replacing %d) from %s to %s',
count, replace_count, src_short, dst_short)
def build_svg_dir(dst_dir, clean=False, emoji_dir='', flags_dir=''):
"""Copies/renames files from emoji_dir and then flags_dir, giving them the
standard format and prefix ('emoji_u' followed by codepoints expressed in hex
separated by underscore). If clean, removes the target dir before proceding.
If either emoji_dir or flags_dir are empty, skips them."""
dst_dir = tool_utils.ensure_dir_exists(dst_dir, clean=clean)
if not emoji_dir and not flags_dir:
logging.warning('Nothing to do.')
return
if emoji_dir:
copy_with_rename(
emoji_dir, dst_dir, accept_pred=_is_svg_and_startswith_emoji)
if flags_dir:
copy_with_rename(
flags_dir, dst_dir, accept_pred=_is_svg, rename=_flag_rename)
def main(argv):
DEFAULT_EMOJI_DIR = '[emoji]/svg'
DEFAULT_FLAGS_DIR = '[emoji]/third_party/region-flags/svg'
parser = argparse.ArgumentParser(
description='Collect svg files into target directory with prefix.')
parser.add_argument(
'dst_dir', help='Directory to hold copied files.', metavar='dir')
parser.add_argument(
'--clean', '-c', help='Replace target directory', action='store_true')
parser.add_argument(
'--flags_dir', '-f', metavar='dir', help='directory containing flag svg, '
'default %s' % DEFAULT_FLAGS_DIR, default=DEFAULT_FLAGS_DIR)
parser.add_argument(
'--emoji_dir', '-e', metavar='dir',
help='directory containing emoji svg, default %s' % DEFAULT_EMOJI_DIR,
default=DEFAULT_EMOJI_DIR)
parser.add_argument(
'-l', '--loglevel', help='log level name/value', default='warning')
args = parser.parse_args(argv)
tool_utils.setup_logging(args.loglevel)
args.flags_dir = tool_utils.resolve_path(args.flags_dir)
args.emoji_dir = tool_utils.resolve_path(args.emoji_dir)
build_svg_dir(
args.dst_dir, clean=args.clean, emoji_dir=args.emoji_dir,
flags_dir=args.flags_dir)
if __name__ == '__main__':
main(sys.argv[1:])

0
derived/accordion.svg Normal file
View file

View file

View file

View file

View file

View file

0
derived/artist.svg Normal file
View file

View file

View file

View file

0
derived/astronaut.svg Normal file
View file

0
derived/beaver.svg Normal file
View file

0
derived/beetle.svg Normal file
View file

0
derived/bell pepper.svg Normal file
View file

0
derived/bison.svg Normal file
View file

0
derived/black cat.svg Normal file
View file

0
derived/blueberries.svg Normal file
View file

0
derived/boomerang.svg Normal file
View file

View file

0
derived/bubble tea.svg Normal file
View file

0
derived/bucket.svg Normal file
View file

View file

0
derived/cockroach.svg Normal file
View file

0
derived/coin.svg Normal file
View file

View file

View file

View file

View file

View file

View file

0
derived/cook.svg Normal file
View file

View file

Some files were not shown because too many files have changed in this diff Show more