Issue #19327: Fixed the working of regular expressions with too big charset.

22fb0dec · Serhiy Storchaka · 2147857d · 22fb0dec · 22fb0dec · 22fb0dec
Commit 22fb0dec authored Oct 24, 2013 by Serhiy Storchaka
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

Lib/sre_compile.py Lib/sre_compile.py +1 -1

Lib/test/test_re.py Lib/test/test_re.py +2 -0

Misc/NEWS Misc/NEWS +2 -0

No files found.
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -343,7 +343,7 @@ def _optimize_unicode(charset, fixup):
    else:
        code = 'I'
    # Convert block indices to byte array of 256 bytes
-    mapping = array.array('b', mapping).tostring()
+    mapping = array.array('B', mapping).tostring()
    # Convert byte array to word array
    mapping = array.array(code, mapping)
    assert mapping.itemsize == _sre.CODESIZE

--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -427,6 +427,8 @@ class ReTests(unittest.TestCase):
                                  u"\u2222").group(1), u"\u2222")
        self.assertEqual(re.match(u"([\u2222\u2223])",
                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
+        r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
+        self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
    def test_big_codesize(self):
        # Issue #1160

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -40,6 +40,8 @@ Core and Builtins
 Library
 -------
+- Issue #19327: Fixed the working of regular expressions with too big charset.
 - Issue #19350: Increasing the test coverage of macurl2path. Patch by Colin
  Williams.