Commit e5162bd9 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #19327: Fixed the working of regular expressions with too big charset.

parents 775d111a f2e07046
...@@ -339,7 +339,7 @@ def _optimize_unicode(charset, fixup): ...@@ -339,7 +339,7 @@ def _optimize_unicode(charset, fixup):
else: else:
code = 'I' code = 'I'
# Convert block indices to byte array of 256 bytes # Convert block indices to byte array of 256 bytes
mapping = array.array('b', mapping).tobytes() mapping = array.array('B', mapping).tobytes()
# Convert byte array to word array # Convert byte array to word array
mapping = array.array(code, mapping) mapping = array.array(code, mapping)
assert mapping.itemsize == _sre.CODESIZE assert mapping.itemsize == _sre.CODESIZE
......
...@@ -482,6 +482,9 @@ class ReTests(unittest.TestCase): ...@@ -482,6 +482,9 @@ class ReTests(unittest.TestCase):
"\u2222").group(1), "\u2222") "\u2222").group(1), "\u2222")
self.assertEqual(re.match("([\u2222\u2223])", self.assertEqual(re.match("([\u2222\u2223])",
"\u2222", re.UNICODE).group(1), "\u2222") "\u2222", re.UNICODE).group(1), "\u2222")
r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
self.assertEqual(re.match(r,
"\uff01", re.UNICODE).group(), "\uff01")
def test_big_codesize(self): def test_big_codesize(self):
# Issue #1160 # Issue #1160
......
...@@ -19,6 +19,8 @@ Core and Builtins ...@@ -19,6 +19,8 @@ Core and Builtins
Library Library
------- -------
- Issue #19327: Fixed the working of regular expressions with too big charset.
- Issue #17400: New 'is_global' attribute for ipaddress to tell if an address - Issue #17400: New 'is_global' attribute for ipaddress to tell if an address
is allocated by IANA for global or private networks. is allocated by IANA for global or private networks.
......
...@@ -447,7 +447,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch) ...@@ -447,7 +447,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
count = *(set++); count = *(set++);
if (sizeof(SRE_CODE) == 2) { if (sizeof(SRE_CODE) == 2) {
block = ((char*)set)[ch >> 8]; block = ((unsigned char*)set)[ch >> 8];
set += 128; set += 128;
if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15))) if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
return ok; return ok;
...@@ -457,7 +457,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch) ...@@ -457,7 +457,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
* warnings when c's type supports only numbers < N+1 */ * warnings when c's type supports only numbers < N+1 */
if (!(ch & ~65535)) if (!(ch & ~65535))
block = ((char*)set)[ch >> 8]; block = ((unsigned char*)set)[ch >> 8];
else else
block = -1; block = -1;
set += 64; set += 64;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment