Commit fe6cdd14 authored by Ezio Melotti's avatar Ezio Melotti

Merged revisions 78729 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r78729 | ezio.melotti | 2010-03-06 17:24:08 +0200 (Sat, 06 Mar 2010) | 1 line

  #6509: fix re.sub to work properly when the pattern, the string, and the replacement were all bytes. Patch by Antoine Pitrou.
........
parent f6c2191d
......@@ -786,12 +786,18 @@ def parse_template(source, pattern):
groups = []
groupsappend = groups.append
literals = [None] * len(p)
if isinstance(source, str):
encode = lambda x: x
else:
# The tokenizer implicitly decodes bytes objects as latin-1, we must
# therefore re-encode the final representation.
encode = lambda x: x.encode('latin1')
for c, s in p:
if c is MARK:
groupsappend((i, s))
# literal[i] is already None
else:
literals[i] = s
literals[i] = encode(s)
i = i + 1
return groups, literals
......
......@@ -696,6 +696,24 @@ class ReTests(unittest.TestCase):
self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
self.assertRaises(ValueError, re.compile, '(?au)\w')
def test_bug_6509(self):
# Replacement strings of both types must parse properly.
# all strings
pat = re.compile('a(\w)')
self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
pat = re.compile('a(.)')
self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
pat = re.compile('..')
self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
# all bytes
pat = re.compile(b'a(\w)')
self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
pat = re.compile(b'a(.)')
self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
pat = re.compile(b'..')
self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
def test_dealloc(self):
# issue 3299: check for segfault in debug build
import _sre
......
......@@ -100,6 +100,9 @@ Core and Builtins
Library
-------
- Issue #6509: fix re.sub to work properly when the pattern, the string, and
the replacement were all bytes. Patch by Antoine Pitrou.
- Issue #1054943: Fix unicodedata.normalize('NFC', text) for the Public Review
Issue #29
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment