From 0d0db39af03db246ea66bcbd23d6ee7ac86fb92e Mon Sep 17 00:00:00 2001 From: Doug Felt Date: Tue, 7 Mar 2017 14:28:21 -0800 Subject: [PATCH 1/3] Update emoji html page generation. - Support --ignore_missing flag to skip missing data on output. When all_images is set, this skips sequences for which we have no image files. When all_images is not set, this skips sequences for which we have image files but are not in the canonical sequence list (e.g. older sequences for which we included skin tone variants but which later versions of unicode decided there shouldn't be). - Use alias information to add alias sequences when not using all_images and we have an image for the target sequence. - Use alias information to mark missing images with '-alias-' when we expect an alias (note, not only when we actually have one) - Embed tool name, date, and arguments in a comment in the generated html. --- add_aliases.py | 2 +- generate_emoji_html.py | 81 ++++++++++++++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/add_aliases.py b/add_aliases.py index fd523b278..4f00acbf8 100755 --- a/add_aliases.py +++ b/add_aliases.py @@ -35,7 +35,7 @@ def seq_to_str(seq): return '_'.join('%04x' % cp for cp in seq) -def read_emoji_aliases(): +def read_default_emoji_aliases(): alias_path = path.join(DATA_ROOT, 'emoji_aliases.txt') return read_emoji_aliases(alias_path) diff --git a/generate_emoji_html.py b/generate_emoji_html.py index 056593738..e40d666ff 100755 --- a/generate_emoji_html.py +++ b/generate_emoji_html.py @@ -23,6 +23,7 @@ builds an html page presenting the images along with their composition import argparse import codecs import collections +import datetime import glob import os from os import path @@ -33,6 +34,8 @@ import sys from nototools import tool_utils from nototools import unicode_data +import add_aliases + _default_dir = 'png/128' _default_ext = 'png' _default_prefix = 'emoji_u' @@ -54,12 +57,14 @@ def _merge_keys(dicts): return frozenset(keys) -def _generate_row_cells(key, font, dir_infos, basepaths, colors): +def _generate_row_cells(key, font, aliases, dir_infos, basepaths, colors): CELL_PREFIX = '' indices = range(len(basepaths)) def _cell(info, basepath): if key in info.filemap: return '' % path.join(basepath, info.filemap[key]) + if key in aliases: + return '-alias-' return '-missing-' def _text_cell(text_dir): @@ -164,7 +169,7 @@ def _collect_aux_info(dir_infos, keys): def _generate_content( - basedir, font, dir_infos, keys, annotations, standalone, colors): + basedir, font, dir_infos, keys, aliases, annotations, standalone, colors): """Generate an html table for the infos. Basedir is the parent directory of the content, filenames will be made relative to this if underneath it, else absolute. If font is not none, generate columns for the text rendered in the @@ -230,7 +235,7 @@ def _generate_content( lines.append(''.join(header_row)) for key in keys: - row = _generate_row_cells(key, font, dir_infos, basepaths, colors) + row = _generate_row_cells(key, font, aliases, dir_infos, basepaths, colors) row.append(_get_desc(key, dir_infos, basepaths)) row.append(_get_name(key, annotations)) lines.append(''.join(row)) @@ -323,18 +328,38 @@ def _get_dir_infos( return infos -def _get_keys(dir_infos, limit, all_emoji, emoji_sort): +def _add_aliases(keys, aliases): + to_add = {k for k, v in aliases.iteritems() if k not in keys and v in keys} + return keys | to_add + + +def _get_keys(dir_infos, aliases, limit, all_emoji, emoji_sort, ignore_missing): """Return a list of the key tuples to display. If all_emoji is - True, returns all emoji sequences, else the sequences available + true, start with all emoji sequences, else the sequences available in dir_infos (limited to the first dir_info if limit is True). + If ignore_missing is true and all_emoji is false, ignore sequences + that are not valid (e.g. skin tone variants of wrestlers). If + ignore_missing is true and all_emoji is true, ignore sequences + for which we have no assets (e.g. newly defined emoji). If not using + all_emoji, aliases are included if we have a target for them. The result is in emoji order if emoji_sort is true, else in unicode codepoint order.""" - if all_emoji: - keys = unicode_data.get_emoji_sequences() - elif len(dir_infos) == 1 or limit: - keys = frozenset(dir_infos[0].filemap.keys()) + + if all_emoji or ignore_missing: + all_keys = unicode_data.get_emoji_sequences() + if not all_emoji or ignore_missing: + if len(dir_infos) == 1 or limit: + avail_keys = frozenset(dir_infos[0].filemap.keys()) + else: + avail_keys = _merge_keys([info.filemap for info in dir_infos]) + if aliases: + avail_keys = _add_aliases(avail_keys, aliases) + + if not ignore_missing: + keys = all_keys if all_emoji else avail_keys else: - keys = _merge_keys([info.filemap for info in dir_infos]) + keys = set(all_keys) & avail_keys + if emoji_sort: sorted_keys = unicode_data.get_sorted_emoji_sequences(keys) else: @@ -342,6 +367,13 @@ def _get_keys(dir_infos, limit, all_emoji, emoji_sort): return sorted_keys +def _generate_info_text(args): + lines = ['%s: %r' % t for t in sorted(args.__dict__.iteritems())] + lines.append('generated by %s on %s' % ( + path.basename(__file__), datetime.datetime.now())) + return '\n '.join(lines) + + def _parse_annotation_file(afile): """Parse file and return a map from sequences to one of 'ok', 'warning', or 'error'. @@ -409,6 +441,10 @@ TEMPLATE = """ + +

{{title}}

{{content}} @@ -431,8 +467,8 @@ STYLE = """ """ def write_html_page( - filename, page_title, font, dir_infos, keys, annotations, standalone, - colors): + filename, page_title, font, dir_infos, keys, aliases, annotations, + standalone, colors, info): out_dir = path.dirname(filename) if font: @@ -457,13 +493,13 @@ def write_html_page( font = path.normpath(path.join(common_prefix, rel_font)) content = _generate_content( - path.dirname(filename), font, dir_infos, keys, annotations, standalone, - colors) + path.dirname(filename), font, dir_infos, keys, aliases, annotations, + standalone, colors) N_STYLE = STYLE if font: FONT_FACE_STYLE = """ """ % font N_STYLE += ' span.efont { font-family: "Emoji"; font-size:32pt }\n' else: @@ -478,7 +514,7 @@ def write_html_page( text = _instantiate_template( TEMPLATE, { 'title': page_title, 'fontFaceStyle': FONT_FACE_STYLE, - 'style': N_STYLE, 'content': content}) + 'style': N_STYLE, 'content': content, 'info':info}) with codecs.open(filename, 'w', 'utf-8') as f: f.write(text) @@ -526,6 +562,9 @@ def main(): '--all_emoji', help='use all emoji sequences', action='store_true') parser.add_argument( '--emoji_sort', help='use emoji sort order', action='store_true') + parser.add_argument( + '--ignore_missing', help='do not include missing emoji', + action='store_true') args = parser.parse_args() file_parts = path.splitext(args.outfile) @@ -548,12 +587,16 @@ def main(): args.image_dirs, args.exts, args.prefixes, args.titles, args.default_ext, args.default_prefix) + aliases = add_aliases.read_default_emoji_aliases() keys = _get_keys( - dir_infos, args.limit, args.all_emoji, args.emoji_sort) + dir_infos, aliases, args.limit, args.all_emoji, args.emoji_sort, + args.ignore_missing) + + info = _generate_info_text(args) write_html_page( - args.outfile, args.page_title, args.font, dir_infos, keys, - annotations, args.standalone, args.colors) + args.outfile, args.page_title, args.font, dir_infos, keys, aliases, + annotations, args.standalone, args.colors, info) if __name__ == "__main__": From d4da27eef8e93286388a7130036db51f41a395c0 Mon Sep 17 00:00:00 2001 From: Doug Felt Date: Tue, 7 Mar 2017 16:59:50 -0800 Subject: [PATCH 2/3] Canonicalize aliases. Forgot to canonicalize the aliases, so most of them wouldn't get used because the keys against which they're compared are canonical. Fixed that. Also report unused aliases. --- generate_emoji_html.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/generate_emoji_html.py b/generate_emoji_html.py index e40d666ff..4f3bad735 100755 --- a/generate_emoji_html.py +++ b/generate_emoji_html.py @@ -329,6 +329,14 @@ def _get_dir_infos( def _add_aliases(keys, aliases): + for k, v in sorted(aliases.iteritems()): + k_str = unicode_data.seq_to_string(k) + v_str = unicode_data.seq_to_string(v) + if k in keys: + msg = '' if v in keys else ' but it\'s not present' + print 'have alias image %s, should use %s%s' % (k_str, v_str, msg) + elif v not in keys: + print 'can\'t use alias %s, no image matching %s' % (k_str, v_str) to_add = {k for k, v in aliases.iteritems() if k not in keys and v in keys} return keys | to_add @@ -519,6 +527,13 @@ def write_html_page( f.write(text) +def _get_canonical_aliases(): + def canon(seq): + return unicode_data.get_canonical_emoji_sequence(seq) or seq + aliases = add_aliases.read_default_emoji_aliases() + return {canon(k): canon(v) for k, v in aliases.iteritems()} + + def main(): parser = argparse.ArgumentParser() parser.add_argument( @@ -587,7 +602,7 @@ def main(): args.image_dirs, args.exts, args.prefixes, args.titles, args.default_ext, args.default_prefix) - aliases = add_aliases.read_default_emoji_aliases() + aliases = _get_canonical_aliases() keys = _get_keys( dir_infos, aliases, args.limit, args.all_emoji, args.emoji_sort, args.ignore_missing) From 0d36d125aaf5f9e43ff291cedafec806d1c0ddd7 Mon Sep 17 00:00:00 2001 From: Doug Felt Date: Tue, 7 Mar 2017 17:54:41 -0800 Subject: [PATCH 3/3] Fix display of 'parts' of sequences in the sequence column. When relying on aliasing, a number of single character emoji can be replaced by sequence emoji (in particular, gendered variants). If these images aren't present, the current code that displays a sequence 'visually' fails to find an image for one of the parts, so bails and there's no visual presentation for those sequences. To fix this, we first canonicalize the part we're looking for, and try to find an image for that, and if we fail we check for an alias and try to find an image for that. --- generate_emoji_html.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/generate_emoji_html.py b/generate_emoji_html.py index 4f3bad735..c6712c73b 100755 --- a/generate_emoji_html.py +++ b/generate_emoji_html.py @@ -87,16 +87,30 @@ def _generate_row_cells(key, font, aliases, dir_infos, basepaths, colors): return row_cells -def _get_desc(key_tuple, dir_infos, basepaths): +def _get_desc(key_tuple, aliases, dir_infos, basepaths): CELL_PREFIX = '' def _get_filepath(cp): + def get_key_filepath(key): + for i in range(len(dir_infos)): + info = dir_infos[i] + if key in info.filemap: + basepath = basepaths[i] + return path.join(basepath, info.filemap[key]) + return None + cp_key = tuple([cp]) - for i in range(len(dir_infos)): - info = dir_infos[i] - if cp_key in info.filemap: - basepath = basepaths[i] - return path.join(basepath, info.filemap[cp_key]) - return None + cp_key = unicode_data.get_canonical_emoji_sequence(cp_key) or cp_key + fp = get_key_filepath(cp_key) + if not fp: + if cp_key in aliases: + fp = get_key_filepath(aliases[cp_key]) + else: + print 'no alias for %s' % unicode_data.seq_to_string(cp_key) + if not fp: + print 'no part for %s in %s' % ( + unicode_data.seq_to_string(cp_key), + unicode_data.seq_to_string(key_tuple)) + return fp def _get_part(cp): if cp == 0x200d: # zwj, common so replace with '+' @@ -236,7 +250,7 @@ def _generate_content( for key in keys: row = _generate_row_cells(key, font, aliases, dir_infos, basepaths, colors) - row.append(_get_desc(key, dir_infos, basepaths)) + row.append(_get_desc(key, aliases, dir_infos, basepaths)) row.append(_get_name(key, annotations)) lines.append(''.join(row))