From 0d0db39af03db246ea66bcbd23d6ee7ac86fb92e Mon Sep 17 00:00:00 2001
From: Doug Felt <dougfelt@google.com>
Date: Tue, 7 Mar 2017 14:28:21 -0800
Subject: [PATCH 1/3] Update emoji html page generation.

- Support --ignore_missing flag to skip missing data on output.
  When all_images is set, this skips sequences for which we have
  no image files.  When all_images is not set, this skips sequences
  for which we have image files but are not in the canonical
  sequence list (e.g. older sequences for which we included skin
  tone variants but which later versions of unicode decided there
  shouldn't be).
- Use alias information to add alias sequences when not using
  all_images and we have an image for the target sequence.
- Use alias information to mark missing images with '-alias-' when
  we expect an alias (note, not only when we actually have one)
- Embed tool name, date, and arguments in a comment in the generated
  html.
---
 add_aliases.py         |  2 +-
 generate_emoji_html.py | 81 ++++++++++++++++++++++++++++++++----------
 2 files changed, 63 insertions(+), 20 deletions(-)
diff --git a/add_aliases.py b/add_aliases.py
index fd523b278..4f00acbf8 100755
--- a/add_aliases.py
+++ b/add_aliases.py
@@ -35,7 +35,7 @@ def seq_to_str(seq):
   return '_'.join('%04x' % cp for cp in seq)
 
 
-def read_emoji_aliases():
+def read_default_emoji_aliases():
   alias_path = path.join(DATA_ROOT, 'emoji_aliases.txt')
   return read_emoji_aliases(alias_path)
 
diff --git a/generate_emoji_html.py b/generate_emoji_html.py
index 056593738..e40d666ff 100755
--- a/generate_emoji_html.py
+++ b/generate_emoji_html.py
@@ -23,6 +23,7 @@ builds an html page presenting the images along with their composition
 import argparse
 import codecs
 import collections
+import datetime
 import glob
 import os
 from os import path
@@ -33,6 +34,8 @@ import sys
 from nototools import tool_utils
 from nototools import unicode_data
 
+import add_aliases
+
 _default_dir = 'png/128'
 _default_ext = 'png'
 _default_prefix = 'emoji_u'
@@ -54,12 +57,14 @@ def _merge_keys(dicts):
   return frozenset(keys)
 
 
-def _generate_row_cells(key, font, dir_infos, basepaths, colors):
+def _generate_row_cells(key, font, aliases, dir_infos, basepaths, colors):
   CELL_PREFIX = '<td>'
   indices = range(len(basepaths))
   def _cell(info, basepath):
     if key in info.filemap:
       return '<img src="%s">' % path.join(basepath, info.filemap[key])
+    if key in aliases:
+      return '-alias-'
     return '-missing-'
 
   def _text_cell(text_dir):
@@ -164,7 +169,7 @@ def _collect_aux_info(dir_infos, keys):
 
 
 def _generate_content(
-    basedir, font, dir_infos, keys, annotations, standalone, colors):
+    basedir, font, dir_infos, keys, aliases, annotations, standalone, colors):
   """Generate an html table for the infos.  Basedir is the parent directory of
   the content, filenames will be made relative to this if underneath it, else
   absolute. If font is not none, generate columns for the text rendered in the
@@ -230,7 +235,7 @@ def _generate_content(
   lines.append('<th>'.join(header_row))
 
   for key in keys:
-    row = _generate_row_cells(key, font, dir_infos, basepaths, colors)
+    row = _generate_row_cells(key, font, aliases, dir_infos, basepaths, colors)
     row.append(_get_desc(key, dir_infos, basepaths))
     row.append(_get_name(key, annotations))
     lines.append(''.join(row))
@@ -323,18 +328,38 @@ def _get_dir_infos(
   return infos
 
 
-def _get_keys(dir_infos, limit, all_emoji, emoji_sort):
+def _add_aliases(keys, aliases):
+  to_add = {k for k, v in aliases.iteritems() if k not in keys and v in keys}
+  return keys | to_add
+
+
+def _get_keys(dir_infos, aliases, limit, all_emoji, emoji_sort, ignore_missing):
   """Return a list of the key tuples to display.  If all_emoji is
-  True, returns all emoji sequences, else the sequences available
+  true, start with all emoji sequences, else the sequences available
   in dir_infos (limited to the first dir_info if limit is True).
+  If ignore_missing is true and all_emoji is false, ignore sequences
+  that are not valid (e.g. skin tone variants of wrestlers).  If
+  ignore_missing is true and all_emoji is true, ignore sequences
+  for which we have no assets (e.g. newly defined emoji).  If not using
+  all_emoji, aliases are included if we have a target for them.
   The result is in emoji order if emoji_sort is true, else in
   unicode codepoint order."""
-  if all_emoji:
-    keys = unicode_data.get_emoji_sequences()
-  elif len(dir_infos) == 1 or limit:
-    keys = frozenset(dir_infos[0].filemap.keys())
+
+  if all_emoji or ignore_missing:
+    all_keys = unicode_data.get_emoji_sequences()
+  if not all_emoji or ignore_missing:
+    if len(dir_infos) == 1 or limit:
+      avail_keys = frozenset(dir_infos[0].filemap.keys())
+    else:
+      avail_keys = _merge_keys([info.filemap for info in dir_infos])
+    if aliases:
+      avail_keys = _add_aliases(avail_keys, aliases)
+
+  if not ignore_missing:
+    keys = all_keys if all_emoji else avail_keys
   else:
-    keys = _merge_keys([info.filemap for info in dir_infos])
+    keys = set(all_keys) & avail_keys
+
   if emoji_sort:
     sorted_keys = unicode_data.get_sorted_emoji_sequences(keys)
   else:
@@ -342,6 +367,13 @@ def _get_keys(dir_infos, limit, all_emoji, emoji_sort):
   return sorted_keys
 
 
+def _generate_info_text(args):
+  lines = ['%s: %r' % t for t in sorted(args.__dict__.iteritems())]
+  lines.append('generated by %s on %s' % (
+      path.basename(__file__), datetime.datetime.now()))
+  return '\n  '.join(lines)
+
+
 def _parse_annotation_file(afile):
   """Parse file and return a map from sequences to one of 'ok', 'warning',
   or 'error'.
@@ -409,6 +441,10 @@ TEMPLATE = """<!DOCTYPE html>
     <style>{{style}}</style>
   </head>
   <body>
+  <!--
+  {{info}}
+  -->
+  <h3>{{title}}</h3>
   {{content}}
   </body>
 </html>
@@ -431,8 +467,8 @@ STYLE = """
 """
 
 def write_html_page(
-    filename, page_title, font, dir_infos, keys, annotations, standalone,
-    colors):
+    filename, page_title, font, dir_infos, keys, aliases, annotations,
+    standalone, colors, info):
 
   out_dir = path.dirname(filename)
   if font:
@@ -457,13 +493,13 @@ def write_html_page(
         font = path.normpath(path.join(common_prefix, rel_font))
 
   content = _generate_content(
-      path.dirname(filename), font, dir_infos, keys, annotations, standalone,
-      colors)
+      path.dirname(filename), font, dir_infos, keys, aliases, annotations,
+      standalone, colors)
   N_STYLE = STYLE
   if font:
     FONT_FACE_STYLE = """
     <style>@font-face {
-      font-family: "Emoji"; src: url("%s");
+      font-family: "Emoji"; src: local("Noto Color Emoji"), url("%s");
     }</style>""" % font
     N_STYLE += '      span.efont { font-family: "Emoji"; font-size:32pt }\n'
   else:
@@ -478,7 +514,7 @@ def write_html_page(
   text = _instantiate_template(
       TEMPLATE, {
           'title': page_title, 'fontFaceStyle': FONT_FACE_STYLE,
-          'style': N_STYLE, 'content': content})
+          'style': N_STYLE, 'content': content, 'info':info})
   with codecs.open(filename, 'w', 'utf-8') as f:
     f.write(text)
 
@@ -526,6 +562,9 @@ def main():
       '--all_emoji', help='use all emoji sequences', action='store_true')
   parser.add_argument(
       '--emoji_sort', help='use emoji sort order', action='store_true')
+  parser.add_argument(
+      '--ignore_missing', help='do not include missing emoji',
+      action='store_true')
 
   args = parser.parse_args()
   file_parts = path.splitext(args.outfile)
@@ -548,12 +587,16 @@ def main():
       args.image_dirs, args.exts, args.prefixes, args.titles,
       args.default_ext, args.default_prefix)
 
+  aliases = add_aliases.read_default_emoji_aliases()
   keys = _get_keys(
-      dir_infos, args.limit, args.all_emoji, args.emoji_sort)
+      dir_infos, aliases, args.limit, args.all_emoji, args.emoji_sort,
+      args.ignore_missing)
+
+  info = _generate_info_text(args)
 
   write_html_page(
-      args.outfile, args.page_title, args.font, dir_infos, keys,
-      annotations, args.standalone, args.colors)
+      args.outfile, args.page_title, args.font, dir_infos, keys, aliases,
+      annotations, args.standalone, args.colors, info)
 
 
 if __name__ == "__main__":

From d4da27eef8e93286388a7130036db51f41a395c0 Mon Sep 17 00:00:00 2001
From: Doug Felt <dougfelt@google.com>
Date: Tue, 7 Mar 2017 16:59:50 -0800
Subject: [PATCH 2/3] Canonicalize aliases.

Forgot to canonicalize the aliases, so most of them wouldn't get used
because the keys against which they're compared are canonical.  Fixed
that.

Also report unused aliases.
---
 generate_emoji_html.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/generate_emoji_html.py b/generate_emoji_html.py
index e40d666ff..4f3bad735 100755
--- a/generate_emoji_html.py
+++ b/generate_emoji_html.py
@@ -329,6 +329,14 @@ def _get_dir_infos(
 
 
 def _add_aliases(keys, aliases):
+  for k, v in sorted(aliases.iteritems()):
+    k_str = unicode_data.seq_to_string(k)
+    v_str = unicode_data.seq_to_string(v)
+    if k in keys:
+      msg = '' if v in keys else ' but it\'s not present'
+      print 'have alias image %s, should use %s%s' % (k_str, v_str, msg)
+    elif v not in keys:
+      print 'can\'t use alias %s, no image matching %s' % (k_str, v_str)
   to_add = {k for k, v in aliases.iteritems() if k not in keys and v in keys}
   return keys | to_add
 
@@ -519,6 +527,13 @@ def write_html_page(
     f.write(text)
 
 
+def _get_canonical_aliases():
+  def canon(seq):
+    return unicode_data.get_canonical_emoji_sequence(seq) or seq
+  aliases = add_aliases.read_default_emoji_aliases()
+  return {canon(k): canon(v) for k, v in aliases.iteritems()}
+
+
 def main():
   parser = argparse.ArgumentParser()
   parser.add_argument(
@@ -587,7 +602,7 @@ def main():
       args.image_dirs, args.exts, args.prefixes, args.titles,
       args.default_ext, args.default_prefix)
 
-  aliases = add_aliases.read_default_emoji_aliases()
+  aliases = _get_canonical_aliases()
   keys = _get_keys(
       dir_infos, aliases, args.limit, args.all_emoji, args.emoji_sort,
       args.ignore_missing)

From 0d36d125aaf5f9e43ff291cedafec806d1c0ddd7 Mon Sep 17 00:00:00 2001
From: Doug Felt <dougfelt@google.com>
Date: Tue, 7 Mar 2017 17:54:41 -0800
Subject: [PATCH 3/3] Fix display of 'parts' of sequences in the sequence
 column.

When relying on aliasing, a number of single character emoji can be
replaced by sequence emoji (in particular, gendered variants).  If
these images aren't present, the current code that displays a sequence
'visually' fails to find an image for one of the parts, so bails and
there's no visual presentation for those sequences.

To fix this, we first canonicalize the part we're looking for, and try
to find an image for that, and if we fail we check for an alias and
try to find an image for that.
---
 generate_emoji_html.py | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/generate_emoji_html.py b/generate_emoji_html.py
index 4f3bad735..c6712c73b 100755
--- a/generate_emoji_html.py
+++ b/generate_emoji_html.py
@@ -87,16 +87,30 @@ def _generate_row_cells(key, font, aliases, dir_infos, basepaths, colors):
   return row_cells
 
 
-def _get_desc(key_tuple, dir_infos, basepaths):
+def _get_desc(key_tuple, aliases, dir_infos, basepaths):
   CELL_PREFIX = '<td>'
   def _get_filepath(cp):
+    def get_key_filepath(key):
+      for i in range(len(dir_infos)):
+        info = dir_infos[i]
+        if key in info.filemap:
+          basepath = basepaths[i]
+          return path.join(basepath, info.filemap[key])
+      return None
+
     cp_key = tuple([cp])
-    for i in range(len(dir_infos)):
-      info = dir_infos[i]
-      if cp_key in info.filemap:
-        basepath = basepaths[i]
-        return path.join(basepath, info.filemap[cp_key])
-    return None
+    cp_key = unicode_data.get_canonical_emoji_sequence(cp_key) or cp_key
+    fp = get_key_filepath(cp_key)
+    if not fp:
+      if cp_key in aliases:
+        fp = get_key_filepath(aliases[cp_key])
+      else:
+        print 'no alias for %s' % unicode_data.seq_to_string(cp_key)
+    if not fp:
+      print 'no part for %s in %s' % (
+          unicode_data.seq_to_string(cp_key),
+          unicode_data.seq_to_string(key_tuple))
+    return fp
 
   def _get_part(cp):
     if cp == 0x200d:  # zwj, common so replace with '+'
@@ -236,7 +250,7 @@ def _generate_content(
 
   for key in keys:
     row = _generate_row_cells(key, font, aliases, dir_infos, basepaths, colors)
-    row.append(_get_desc(key, dir_infos, basepaths))
+    row.append(_get_desc(key, aliases, dir_infos, basepaths))
     row.append(_get_name(key, annotations))
     lines.append(''.join(row))