From 0d603f5af52f14d948adbfcebd67a87d06c9393f Mon Sep 17 00:00:00 2001
From: Doug Felt <dougfelt@google.com>
Date: Tue, 23 May 2017 11:29:15 -0700
Subject: [PATCH] Update svg_cleaner.

Some of the SVGs we're getting aren't clean, so tweak the cleaner
to handle this.
---
 svg_cleaner.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/svg_cleaner.py b/svg_cleaner.py
index bf0057c93..f61ccf7bc 100755
--- a/svg_cleaner.py
+++ b/svg_cleaner.py
@@ -15,6 +15,16 @@
 #
 # Google Author(s): Doug Felt
 
+"""Clean SVG.
+
+svgo could do this, but we're fussy.  Also, emacs doesn't understand
+that 'style' defaults to 'text/css' and svgo strips this out by
+default.
+
+The files we're getting that are exported from AI contain lots of extra
+data so that it can reimport the svg, and we don't need it."""
+
+
 import argparse
 import codecs
 import logging
@@ -125,13 +135,19 @@ class SvgCleaner(object):
       for k, v in node.attrs.items():
         if node.name == 'svg' and k in [
             'x', 'y', 'id', 'version', 'viewBox', 'width', 'height',
-            'enable-background', 'xml:space']:
+            'enable-background', 'xml:space', 'xmlns:graph', 'xmlns:i',
+            'xmlns:x']:
           if k == 'viewBox':
             viewBox = v
           elif k == 'width':
             width = v
           elif k == 'height':
             height = v
+          elif k.startswith('xmlns:') and 'ns.adobe.com' not in v:
+            # keep if not an adobe namespace
+            logging.debug('keep "%s" = "%s"' % (k, v))
+            nattrs[k] = v
+          logging.debug('removing %s=%s' % (k, v))
           continue
         v = re.sub('\s+', ' ', v)
         nattrs[k] = v
@@ -145,7 +161,6 @@ class SvgCleaner(object):
         nattrs['height'] = height
       node.attrs = nattrs
 
-
       # scan contents. remove any empty text nodes, or empty 'g' element nodes.
       # if a 'g' element has no attrs and only one subnode, replace it with the
       # subnode.
@@ -157,8 +172,19 @@ class SvgCleaner(object):
         elif n.name == 'g':
           if not n.contents:
             continue
+          if 'i:extraneous' in n.attrs:
+            del n.attrs['i:extraneous']
           if not n.attrs and len(n.contents) == 1:
             n = n.contents[0]
+        elif n.name == 'i:pgf' or n.name == 'foreignObject':
+          continue
+        elif n.name =='switch' and len(n.contents) == 1:
+          n = n.contents[0]
+        elif n.name == 'style':
+          # some emacsen don't default 'style' properly, so leave this in.
+          if False and n.attrs.get('type') == 'text/css':
+            del n.attrs['type']
+
         node.contents[wpos] = n
         wpos += 1
       if wpos < len(node.contents):
@@ -168,7 +194,9 @@ class SvgCleaner(object):
       text = node.text.strip()
       # common case is text is empty (line endings between elements)
       if text:
-        text = re.sub(r'\s+', ' ', text)
+        # main goal here is to leave linefeeds in for style elements
+        text = re.sub(r'[ \t]*\n+[ \t]*', '\n', text)
+        text = re.sub(r'[ \t]+', ' ', text)
       node.text = text
 
     def clean(self, node):
@@ -197,7 +225,20 @@ class SvgCleaner(object):
         margin = '  ' * indent
         line = [margin]
         line.append('<%s' % node.name)
-        for k in sorted(node.attrs.keys()):
+        # custom sort attributes of svg, yes this is a hack
+        if node.name == 'svg':
+          def svgsort(k):
+            if k == 'width': return (0, None)
+            elif k == 'height': return (1, None)
+            else: return (2, k)
+          ks = sorted(node.attrs.keys(), key=svgsort)
+        else:
+          def defsort(k):
+            if k == 'id': return (0, None)
+            elif k == 'class': return (1, None)
+            else: return (2, k)
+          ks = sorted(node.attrs.keys(), key=defsort)
+        for k in ks:
           v = node.attrs[k]
           line.append(' %s=%s' % (k, saxutils.quoteattr(v)))
         if node.contents:
@@ -250,6 +291,7 @@ def clean_svg_files(in_dir, out_dir, match_pat=None, clean=False):
     if regex and not regex.match(file_name):
       continue
     in_path = os.path.join(in_dir, file_name)
+    logging.debug('read: %s', in_path)
     with open(in_path) as in_fp:
       result = cleaner.clean_svg(in_fp.read())
     out_path = os.path.join(out_dir, file_name)