Commit b92ed7cf authored by Ezio Melotti's avatar Ezio Melotti

#6509: fix re.sub to work properly when the pattern, the string, and the...

#6509: fix re.sub to work properly when the pattern, the string, and the replacement were all bytes. Patch by Antoine Pitrou.
parent 64fb18e1
...@@ -786,12 +786,18 @@ def parse_template(source, pattern): ...@@ -786,12 +786,18 @@ def parse_template(source, pattern):
groups = [] groups = []
groupsappend = groups.append groupsappend = groups.append
literals = [None] * len(p) literals = [None] * len(p)
if isinstance(source, str):
encode = lambda x: x
else:
# The tokenizer implicitly decodes bytes objects as latin-1, we must
# therefore re-encode the final representation.
encode = lambda x: x.encode('latin1')
for c, s in p: for c, s in p:
if c is MARK: if c is MARK:
groupsappend((i, s)) groupsappend((i, s))
# literal[i] is already None # literal[i] is already None
else: else:
literals[i] = s literals[i] = encode(s)
i = i + 1 i = i + 1
return groups, literals return groups, literals
......
...@@ -717,6 +717,24 @@ class ReTests(unittest.TestCase): ...@@ -717,6 +717,24 @@ class ReTests(unittest.TestCase):
self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE) self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
self.assertRaises(ValueError, re.compile, '(?au)\w') self.assertRaises(ValueError, re.compile, '(?au)\w')
def test_bug_6509(self):
# Replacement strings of both types must parse properly.
# all strings
pat = re.compile('a(\w)')
self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
pat = re.compile('a(.)')
self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
pat = re.compile('..')
self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
# all bytes
pat = re.compile(b'a(\w)')
self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
pat = re.compile(b'a(.)')
self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
pat = re.compile(b'..')
self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
def test_dealloc(self): def test_dealloc(self):
# issue 3299: check for segfault in debug build # issue 3299: check for segfault in debug build
import _sre import _sre
......
...@@ -268,6 +268,9 @@ C-API ...@@ -268,6 +268,9 @@ C-API
Library Library
------- -------
- Issue #6509: fix re.sub to work properly when the pattern, the string, and
the replacement were all bytes. Patch by Antoine Pitrou.
- The sqlite3 module was updated to pysqlite 2.6.0. This fixes several obscure - The sqlite3 module was updated to pysqlite 2.6.0. This fixes several obscure
bugs and allows loading SQLite extensions from shared libraries. bugs and allows loading SQLite extensions from shared libraries.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment