Commit 21009b9c authored by Fredrik Lundh's avatar Fredrik Lundh

an SRE bugfix a day keeps Guido away...

#462270: sub-tle difference between pre.sub and sre.sub.  PRE ignored
an empty match at the previous location, SRE didn't.

also synced with Secret Labs "sreopen" codebase.
parent 18d8d5a7
...@@ -45,7 +45,7 @@ The special characters are: ...@@ -45,7 +45,7 @@ The special characters are:
"|" A|B, creates an RE that will match either A or B. "|" A|B, creates an RE that will match either A or B.
(...) Matches the RE inside the parentheses. (...) Matches the RE inside the parentheses.
The contents can be retrieved or matched later in the string. The contents can be retrieved or matched later in the string.
(?iLmsx) Set the I, L, M, S, or X flag for the RE. (?iLmsx) Set the I, L, M, S, or X flag for the RE (see below).
(?:...) Non-grouping version of regular parentheses. (?:...) Non-grouping version of regular parentheses.
(?P<name>...) The substring matched by the group is accessible by name. (?P<name>...) The substring matched by the group is accessible by name.
(?P=name) Matches the text matched earlier by the group named name. (?P=name) Matches the text matched earlier by the group named name.
...@@ -80,7 +80,6 @@ This module exports the following functions: ...@@ -80,7 +80,6 @@ This module exports the following functions:
findall Find all occurrences of a pattern in a string. findall Find all occurrences of a pattern in a string.
compile Compile a pattern into a RegexObject. compile Compile a pattern into a RegexObject.
purge Clear the regular expression cache. purge Clear the regular expression cache.
template Compile a template pattern, returning a pattern object.
escape Backslash all non-alphanumerics in a string. escape Backslash all non-alphanumerics in a string.
Some of the functions in this module takes flags as optional parameters: Some of the functions in this module takes flags as optional parameters:
...@@ -90,11 +89,12 @@ Some of the functions in this module takes flags as optional parameters: ...@@ -90,11 +89,12 @@ Some of the functions in this module takes flags as optional parameters:
"$" matches the end of lines as well as the string. "$" matches the end of lines as well as the string.
S DOTALL "." matches any character at all, including the newline. S DOTALL "." matches any character at all, including the newline.
X VERBOSE Ignore whitespace and comments for nicer looking RE's. X VERBOSE Ignore whitespace and comments for nicer looking RE's.
U UNICODE Use unicode locale. U UNICODE Make \w, \W, \b, \B, dependent on the Unicode locale.
This module also defines an exception 'error'. This module also defines an exception 'error'.
""" """
import sre_compile import sre_compile
import sre_parse import sre_parse
...@@ -104,7 +104,7 @@ __all__ = [ "match", "search", "sub", "subn", "split", "findall", ...@@ -104,7 +104,7 @@ __all__ = [ "match", "search", "sub", "subn", "split", "findall",
"U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
"UNICODE", "error" ] "UNICODE", "error" ]
__version__ = "2.1b2" __version__ = "2.1.1"
# this module works under 1.5.2 and later. don't use string methods # this module works under 1.5.2 and later. don't use string methods
import string import string
...@@ -269,6 +269,9 @@ def _subn(pattern, template, text, count=0, sub=0): ...@@ -269,6 +269,9 @@ def _subn(pattern, template, text, count=0, sub=0):
b, e = m.span() b, e = m.span()
if i < b: if i < b:
append(text[i:b]) append(text[i:b])
elif i == b == e and n:
append(text[i:b])
continue # ignore empty match at previous position
append(filter(m)) append(filter(m))
i = e i = e
n = n + 1 n = n + 1
......
...@@ -123,6 +123,10 @@ test(r"""sre.sub('\r\n', r'\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n') ...@@ -123,6 +123,10 @@ test(r"""sre.sub('\r\n', r'\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
test(r"""sre.sub(r'\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n') test(r"""sre.sub(r'\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
test(r"""sre.sub('\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n') test(r"""sre.sub('\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
# Test for empty sub() behaviour, see SF bug #462270
test(r"""sre.sub('x*', '-', 'abxd')""", '-a-b-d-')
test(r"""sre.sub('x+', '-', 'abxd')""", 'ab-d')
if verbose: if verbose:
print 'Running tests on symbolic references' print 'Running tests on symbolic references'
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
* 2001-04-28 fl added __copy__ methods (work in progress) * 2001-04-28 fl added __copy__ methods (work in progress)
* 2001-05-14 fl fixes for 1.5.2 * 2001-05-14 fl fixes for 1.5.2
* 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
* 2001-09-18 fl
* *
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
* *
...@@ -133,6 +134,8 @@ static char copyright[] = ...@@ -133,6 +134,8 @@ static char copyright[] =
#define SRE_ALNUM_MASK 8 #define SRE_ALNUM_MASK 8
#define SRE_WORD_MASK 16 #define SRE_WORD_MASK 16
/* FIXME: this assumes ASCII. create tables in init_sre() instead */
static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
...@@ -1141,6 +1144,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) ...@@ -1141,6 +1144,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
} }
/* can't end up here */ /* can't end up here */
/* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
} }
LOCAL(int) LOCAL(int)
...@@ -2624,16 +2628,17 @@ init_sre(void) ...@@ -2624,16 +2628,17 @@ init_sre(void)
m = Py_InitModule("_" SRE_MODULE, _functions); m = Py_InitModule("_" SRE_MODULE, _functions);
d = PyModule_GetDict(m); d = PyModule_GetDict(m);
PyDict_SetItemString( x = PyInt_FromLong(SRE_MAGIC);
d, "MAGIC", (x = (PyObject*) PyInt_FromLong(SRE_MAGIC)) if (x) {
); PyDict_SetItemString(d, "MAGIC", x);
Py_XDECREF(x); Py_DECREF(x);
}
PyDict_SetItemString(
d, "copyright", (x = (PyObject*)PyString_FromString(copyright))
);
Py_XDECREF(x);
x = PyString_FromString(copyright);
if (x) {
PyDict_SetItemString(d, "copyright", x);
Py_DECREF(x);
}
} }
#endif /* !defined(SRE_RECURSIVE) */ #endif /* !defined(SRE_RECURSIVE) */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment