From 1f2cde6c5eb91292daebc6c6534e2cf5153ece83 Mon Sep 17 00:00:00 2001 From: Doug Felt Date: Tue, 21 Feb 2017 11:39:26 -0800 Subject: [PATCH] Map to canonical keys when we read in file name mappings. It's a bit cleaner to canonicalize the keys when we read the file names. This means we can just use the one canonical key, instead of using the original to get the file and the canonical one to render text and show the decoding. --- generate_emoji_html.py | 59 ++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/generate_emoji_html.py b/generate_emoji_html.py index 5d3a80b99..594382882 100755 --- a/generate_emoji_html.py +++ b/generate_emoji_html.py @@ -53,30 +53,31 @@ def _merge_keys(dicts): keys.extend(d.keys()) return frozenset(keys) -def _generate_row_cells(key, canonical_key, font, dir_infos, basepaths, colors): + +def _generate_row_cells(key, font, dir_infos, basepaths, colors): CELL_PREFIX = '' indices = range(len(basepaths)) - def _cell(key, info, basepath): + def _cell(info, basepath): if key in info.filemap: - return '' % path.join( - basepath, info.filemap[key]) + return '' % path.join(basepath, info.filemap[key]) return '-missing-' - def _text_cell(key, text_dir): - text = ''.join(unichr(cp) for cp in canonical_key) + + def _text_cell(text_dir): + text = ''.join(unichr(cp) for cp in key) return '%s' % (text_dir, text) if font: row_cells = [ - CELL_PREFIX + _text_cell(key, text_dir) + CELL_PREFIX + _text_cell(text_dir) for text_dir in ('ltr', 'rtl')] else: row_cells = [] row_cells.extend( - [CELL_PREFIX + _cell(key, dir_infos[i], basepaths[i]) + [CELL_PREFIX + _cell(dir_infos[i], basepaths[i]) for i in indices]) if len(colors) > 1: ix = indices[-1] - extension = CELL_PREFIX + _cell(key, dir_infos[ix], basepaths[ix]) + extension = CELL_PREFIX + _cell(dir_infos[ix], basepaths[ix]) row_cells.extend([extension] * (len(colors) - 1)) return row_cells @@ -230,25 +231,16 @@ def _generate_content( lines.append(''.join(header_row)) for key in sorted(all_keys): - row = [] - canonical_key = unicode_data.get_canonical_emoji_sequence(key) - if not canonical_key: - canonical_key = key + row = _generate_row_cells(key, font, dir_infos, basepaths, colors) + row.append(_get_desc(key, dir_infos, basepaths)) + row.append(_get_name(key, annotate)) + lines.append(''.join(row)) - row.extend( - _generate_row_cells( - key, canonical_key, font, dir_infos, basepaths, colors)) - row.append(_get_desc(canonical_key, dir_infos, basepaths)) - row.append(_get_name(canonical_key, annotate)) - try: - lines.append(''.join(row)) - except: - raise Exception('couldn\'t decode %s' % row) return '\n '.join(lines) + '\n' def _get_image_data(image_dir, ext, prefix): - """Return a map from a tuple of cp sequences to a filename. + """Return a map from a canonical tuple of cp sequences to a filename. This filters by file extension, and expects the rest of the files to match the prefix followed by a sequence of hex codepoints separated @@ -267,19 +259,24 @@ def _get_image_data(image_dir, ext, prefix): fails.append('"%s" did not match: "%s"' % (expect_re.pattern, filename)) continue seq = m.group(1) + this_failed = False try: cps = tuple(int(s, 16) for s in seq.split('_')) + for cp in cps: + if (cp > 0x10ffff): + fails.append('cp out of range: ' + filename) + this_failed = True + break + if this_failed: + continue + canonical_cps = unicode_data.get_canonical_emoji_sequence(cps) + if canonical_cps: + # if it is unrecognized, just leave it alone, else replace with + # canonical sequence. + cps = canonical_cps except: fails.append('bad cp sequence: ' + filename) continue - this_failed = False - for cp in cps: - if (cp > 0x10ffff): - fails.append('cp out of range: ' + filename) - this_failed = True - break - if this_failed: - continue if cps in result: fails.append('duplicate sequence: %s and %s' (result[cps], filename)) continue