Add check for emoji sequence coverage.

2025-07-23 15:59:35 +00:00 · 2017-01-19 10:50:40 -08:00 · 2017-01-19 10:50:40 -08:00 · dd2244a9b8
commit dd2244a9b8
parent 2a6c35a1bd
1 changed files with 41 additions and 1 deletions
--- a/check_emoji_sequences.py
+++ b/check_emoji_sequences.py
@ -26,6 +26,9 @@ import sys
 from nototools import unicode_data
 ZWJ = 0x200d
 EMOJI_VS = 0xfe0f
 def _is_regional_indicator(cp):
  return 0x1f1e6 <= cp <= 0x1f1ff
@ -128,11 +131,47 @@ def _check_skintone(sorted_seq_to_filepath):
          base_to_modifiers[cp] = set()
  for cp, modifiers in sorted(base_to_modifiers.iteritems()):
    if len(modifiers) != 5:
-      print 'emoji base %04x has %d modifiers defined (%s) in %s' % (
+      print >> sys.stderr, 'emoji base %04x has %d modifiers defined (%s) in %s' % (
          cp, len(modifiers),
          ', '.join('%04x' % cp for cp in sorted(modifiers)), fp)
 def _check_zwj_sequences(seq_to_filepath):
  """Verify that zwj sequences are valid."""
  zwj_sequence_to_type = unicode_data.get_emoji_zwj_sequences()
  # strip emoji variant selectors and add these back in
  zwj_sequence_without_vs_to_type_canonical = {}
  for seq, seq_type in zwj_sequence_to_type.iteritems():
    if EMOJI_VS in seq:
      stripped_seq = tuple(s for s in seq if s != EMOJI_VS)
      zwj_sequence_without_vs_to_type_canonical[stripped_seq] = (seq_type, seq)
  zwj_seq_to_filepath = {
      seq: fp for seq, fp in seq_to_filepath.iteritems()
      if ZWJ in seq}
  for seq, fp in zwj_seq_to_filepath.iteritems():
    if seq not in zwj_sequence_to_type:
      if seq not in zwj_sequence_without_vs_to_type_canonical:
        print >> sys.stderr, 'zwj sequence not defined: %s' % fp
      else:
        _, can = zwj_sequence_without_vs_to_type_canonical[seq]
        print >> sys.stderr, 'canonical sequence %s contains vs: %s' % (
            _seq_string(can), fp)
  # check that all zwj sequences are covered
  for seq in zwj_seq_to_filepath:
    if seq in zwj_sequence_to_type:
      del zwj_sequence_to_type[seq]
    elif seq in zwj_sequence_without_vs_to_type_canonical:
      canon_seq = zwj_sequence_without_vs_to_type_canonical[seq][1]
      del zwj_sequence_to_type[canon_seq]
  if zwj_sequence_to_type:
    print >> sys.stderr, 'missing %d zwj sequences' % len(zwj_sequence_to_type)
    for seq, seq_type in sorted(zwj_sequence_to_type.items()):
      print >> sys.stderr, '  %s: %s' % (_seq_string(seq), seq_type)
 def check_sequence_to_filepath(seq_to_filepath):
  sorted_seq_to_filepath = collections.OrderedDict(
      sorted(seq_to_filepath.items()))
@ -140,6 +179,7 @@ def check_sequence_to_filepath(seq_to_filepath):
  _check_zwj(sorted_seq_to_filepath)
  _check_flags(sorted_seq_to_filepath)
  _check_skintone(sorted_seq_to_filepath)
  _check_zwj_sequences(sorted_seq_to_filepath)
 def create_sequence_to_filepath(name_to_dirpath, prefix, suffix):