Commit 4e7be06a authored by Gustavo Niemeyer's avatar Gustavo Niemeyer

Fixed bug #470582, using a modified version of patch #527371,

from Greg Chapman.

* Modules/_sre.c
  (lastmark_restore): New function, implementing algorithm to restore
  a state to a given lastmark. In addition to the similar algorithm used
  in a few places of SRE_MATCH, restore lastindex when restoring lastmark.
  (SRE_MATCH): Replace lastmark inline restoring by lastmark_restore(),
  function. Also include it where missing. In SRE_OP_MARK, set lastindex
  only if i > lastmark.

* Lib/test/re_tests.py
* Lib/test/test_sre.py
  Included regression tests for the fixed bugs.

* Misc/NEWS
  Mention fixes.
parent 3c2c4334
...@@ -646,6 +646,8 @@ xyzabc ...@@ -646,6 +646,8 @@ xyzabc
(r'a[^>]*?b', 'a>b', FAIL), (r'a[^>]*?b', 'a>b', FAIL),
# bug 490573: minimizing repeat problem # bug 490573: minimizing repeat problem
(r'^a*?$', 'foo', FAIL), (r'^a*?$', 'foo', FAIL),
# bug 470582: nested groups problem
(r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
] ]
try: try:
......
...@@ -78,6 +78,11 @@ test(r"""sre.match(r'(a)|(b)', 'b').start(1)""", -1) ...@@ -78,6 +78,11 @@ test(r"""sre.match(r'(a)|(b)', 'b').start(1)""", -1)
test(r"""sre.match(r'(a)|(b)', 'b').end(1)""", -1) test(r"""sre.match(r'(a)|(b)', 'b').end(1)""", -1)
test(r"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1)) test(r"""sre.match(r'(a)|(b)', 'b').span(1)""", (-1, -1))
# bug described in patch 527371
test(r"""sre.match(r'(a)?a','a').lastindex""", None)
test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1)
test(r"""sre.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup""", 'a')
if verbose: if verbose:
print 'Running tests on sre.sub' print 'Running tests on sre.sub'
......
...@@ -352,6 +352,13 @@ Extension modules ...@@ -352,6 +352,13 @@ Extension modules
to the value of the PYTHON_API_VERSION macro with which the to the value of the PYTHON_API_VERSION macro with which the
interpreter was compiled. interpreter was compiled.
- Fixed bug #470582: sre module would return a tuple (None, 'a', 'ab')
when applying the regular expression '^((a)c)?(ab)$' on 'ab'. It now
returns (None, None, 'ab'), as expected. Also fixed handling of
lastindex/lastgroup match attributes in a similar cases. For example,
when running the expression r'(a)(b)?b' over 'ab', lastindex must be
1, not 2.
Library Library
------- -------
......
...@@ -353,6 +353,18 @@ mark_restore(SRE_STATE* state, int lo, int hi) ...@@ -353,6 +353,18 @@ mark_restore(SRE_STATE* state, int lo, int hi)
return 0; return 0;
} }
void lastmark_restore(SRE_STATE *state, int lastmark)
{
if (state->lastmark > lastmark) {
memset(
state->mark + lastmark + 1, 0,
(state->lastmark - lastmark) * sizeof(void*)
);
state->lastmark = lastmark;
state->lastindex = (lastmark == 0) ? -1 : (lastmark-1)/2+1;
}
}
/* generate 8-bit version */ /* generate 8-bit version */
#define SRE_CHAR unsigned char #define SRE_CHAR unsigned char
...@@ -860,10 +872,11 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) ...@@ -860,10 +872,11 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
/* <MARK> <gid> */ /* <MARK> <gid> */
TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0])); TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
i = pattern[0]; i = pattern[0];
if (i & 1) if (i > state->lastmark) {
state->lastindex = i/2 + 1;
if (i > state->lastmark)
state->lastmark = i; state->lastmark = i;
if (i & 1)
state->lastindex = i/2 + 1;
}
state->mark[i] = ptr; state->mark[i] = ptr;
pattern++; pattern++;
break; break;
...@@ -920,13 +933,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) ...@@ -920,13 +933,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
i = SRE_MATCH(state, pattern + 1, level + 1); i = SRE_MATCH(state, pattern + 1, level + 1);
if (i) if (i)
return i; return i;
if (state->lastmark > lastmark) { lastmark_restore(state, lastmark);
memset(
state->mark + lastmark + 1, 0,
(state->lastmark - lastmark) * sizeof(void*)
);
state->lastmark = lastmark;
}
} }
return 0; return 0;
...@@ -997,13 +1004,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) ...@@ -997,13 +1004,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
return i; return i;
ptr--; ptr--;
count--; count--;
if (state->lastmark > lastmark) { lastmark_restore(state, lastmark);
memset(
state->mark + lastmark + 1, 0,
(state->lastmark - lastmark) * sizeof(void*)
);
state->lastmark = lastmark;
}
} }
} }
return 0; return 0;
...@@ -1071,9 +1072,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) ...@@ -1071,9 +1072,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
if (i) if (i)
return i; return i;
i = mark_restore(state, 0, lastmark); i = mark_restore(state, 0, lastmark);
state->lastmark = lastmark;
if (i < 0) if (i < 0)
return i; return i;
lastmark_restore(state, lastmark);
rp->count = count - 1; rp->count = count - 1;
state->ptr = ptr; state->ptr = ptr;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment