Commit c1b59d45 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #16688: Fix backreferences did make case-insensitive regex fail on non-ASCII strings.

Patch by Matthew Barnett.
parent 2d8298dc
...@@ -968,6 +968,11 @@ class ReTests(unittest.TestCase): ...@@ -968,6 +968,11 @@ class ReTests(unittest.TestCase):
self.assertEqual(r, s) self.assertEqual(r, s)
self.assertEqual(n, size + 1) self.assertEqual(n, size + 1)
def test_bug_16688(self):
# Issue 16688: Backreferences make case-insensitive regex fail on
# non-ASCII strings.
self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
def run_re_tests(): def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
......
...@@ -70,6 +70,7 @@ Anton Barkovsky ...@@ -70,6 +70,7 @@ Anton Barkovsky
Nick Barnes Nick Barnes
Quentin Barnes Quentin Barnes
David Barnett David Barnett
Matthew Barnett
Richard Barran Richard Barran
Cesar Eduardo Barros Cesar Eduardo Barros
Des Barry Des Barry
......
...@@ -124,6 +124,9 @@ Core and Builtins ...@@ -124,6 +124,9 @@ Core and Builtins
Library Library
------- -------
- Issue #16688: Fix backreferences did make case-insensitive regex fail on
non-ASCII strings. Patch by Matthew Barnett.
- Issue #16485: Fix file descriptor not being closed if file header patching - Issue #16485: Fix file descriptor not being closed if file header patching
fails on closing of aifc file. fails on closing of aifc file.
......
...@@ -492,7 +492,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) ...@@ -492,7 +492,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
Py_ssize_t i; Py_ssize_t i;
/* adjust end */ /* adjust end */
if (maxcount < end - ptr && maxcount != 65535) if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
end = ptr + maxcount*state->charsize; end = ptr + maxcount*state->charsize;
switch (pattern[0]) { switch (pattern[0]) {
...@@ -583,7 +583,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern) ...@@ -583,7 +583,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
Py_ssize_t i; Py_ssize_t i;
/* check minimal length */ /* check minimal length */
if (pattern[3] && (end - ptr) < pattern[3]) if (pattern[3] && (end - ptr)/state->charsize < pattern[3])
return 0; return 0;
/* check known prefix */ /* check known prefix */
...@@ -801,7 +801,7 @@ entrance: ...@@ -801,7 +801,7 @@ entrance:
/* <INFO> <1=skip> <2=flags> <3=min> ... */ /* <INFO> <1=skip> <2=flags> <3=min> ... */
if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) { if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
TRACE(("reject (got %d chars, need %d)\n", TRACE(("reject (got %d chars, need %d)\n",
(end - ctx->ptr), ctx->pattern[3])); (end - ctx->ptr)/state->charsize, ctx->pattern[3]));
RETURN_FAILURE; RETURN_FAILURE;
} }
ctx->pattern += ctx->pattern[1] + 1; ctx->pattern += ctx->pattern[1] + 1;
...@@ -1329,9 +1329,10 @@ entrance: ...@@ -1329,9 +1329,10 @@ entrance:
RETURN_FAILURE; RETURN_FAILURE;
while (p < e) { while (p < e) {
if (ctx->ptr >= end || if (ctx->ptr >= end ||
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p)) state->lower(SRE_CHARGET(state, ctx->ptr, 0)) !=
state->lower(SRE_CHARGET(state, p, 0)))
RETURN_FAILURE; RETURN_FAILURE;
p++; p += state->charsize;
ctx->ptr += state->charsize; ctx->ptr += state->charsize;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment