Commit 16bedb73 authored by Kazuhiko Shiozaki's avatar Kazuhiko Shiozaki

patches/ppml: speed up convert().

* for bulk substitution, re.sub is much faster than join of dict
  lookup result per byte.
parent bab79d26
...@@ -38,11 +38,14 @@ reprs2={} ...@@ -38,11 +38,14 @@ reprs2={}
reprs2['<'] = "\\074" reprs2['<'] = "\\074"
reprs2['>'] = "\\076" reprs2['>'] = "\\076"
reprs2['&'] = "\\046" reprs2['&'] = "\\046"
### patch begin: create a conversion table for [\x00-\xff]. this table is
### used for a binary string. reprs_re = re.compile('|'.join(re.escape(k) for k in reprs.keys()))
reprs3 = reprs.copy() def sub_reprs(m):
for c in map(chr,range(32, 256)): reprs3[c] = reprs.get(c, repr(c)[1:-1]) return reprs[m.group(0)]
### patch end
reprs2_re = re.compile('|'.join(re.escape(k) for k in reprs2.keys()))
def sub_reprs2(m):
return reprs2[m.group(0)]
def convert(S): def convert(S):
new = '' new = ''
...@@ -55,15 +58,15 @@ def convert(S): ...@@ -55,15 +58,15 @@ def convert(S):
except UnicodeDecodeError: except UnicodeDecodeError:
return 'base64', base64.encodestring(S)[:-1] return 'base64', base64.encodestring(S)[:-1]
else: else:
new = ''.join([reprs.get(x, x) for x in S]) new = reprs_re.sub(sub_reprs, S)
### patch end ### patch end
if len(new) > (1.4*len(S)): if len(new) > (1.4*len(S)):
return 'base64', base64.encodestring(S)[:-1] return 'base64', base64.encodestring(S)[:-1]
elif '>' in new or '<' in S or '&' in S: elif '>' in new or '<' in S or '&' in S:
if not ']]>' in S: if not ']]>' in S:
return 'cdata', '<![CDATA[\n\n%s\n\n]]>' % new return 'cdata', '<![CDATA[\n\n' + new + '\n\n]]>'
else: else:
return 'repr', ''.join([reprs2.get(x, x) for x in new]) return 'repr', reprs2_re.sub(sub_reprs2, new)
return 'repr', new return 'repr', new
ppml.convert = convert ppml.convert = convert
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment