Commit 77b36573 authored by Georg Brandl's avatar Georg Brandl

Add re.fullmatch() function and regex.fullmatch() method, which anchor the

pattern at both ends of the string to match.

Patch by Matthew Barnett.
Closes #16203.
parent a440d2b9
......@@ -584,6 +584,16 @@ form.
instead (see also :ref:`search-vs-match`).
.. function:: fullmatch(pattern, string, flags=0)
If the whole *string* matches the regular expression *pattern*, return a
corresponding :ref:`match object <match-objects>`. Return ``None`` if the
string does not match the pattern; note that this is different from a
zero-length match.
.. versionadded:: 3.4
.. function:: split(pattern, string, maxsplit=0, flags=0)
Split *string* by the occurrences of *pattern*. If capturing parentheses are
......@@ -778,6 +788,24 @@ attributes:
:meth:`~regex.search` instead (see also :ref:`search-vs-match`).
.. method:: regex.fullmatch(string[, pos[, endpos]])
If the whole *string* matches this regular expression, return a corresponding
:ref:`match object <match-objects>`. Return ``None`` if the string does not
match the pattern; note that this is different from a zero-length match.
The optional *pos* and *endpos* parameters have the same meaning as for the
:meth:`~regex.search` method.
>>> pattern = re.compile("o[gh]")
>>> pattern.fullmatch("dog") # No match as "o" is not at the start of "dog".
>>> pattern.fullmatch("ogre") # No match as not the full string matches.
>>> pattern.fullmatch("doggie", 1, 3) # Matches within given limits.
<_sre.SRE_Match object at ...>
.. versionadded:: 3.4
.. method:: regex.split(string, maxsplit=0)
Identical to the :func:`split` function, using the compiled pattern.
......
......@@ -86,6 +86,7 @@ resulting RE will match the second character.
This module exports the following functions:
match Match a regular expression pattern to the beginning of a string.
fullmatch Match a regular expression pattern to all of a string.
search Search a string for the presence of a pattern.
sub Substitute occurrences of a pattern found in a string.
subn Same as sub, but also return the number of substitutions made.
......@@ -123,7 +124,7 @@ import sre_compile
import sre_parse
# public symbols
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
__all__ = [ "match", "fullmatch", "search", "sub", "subn", "split", "findall",
"compile", "purge", "template", "escape", "A", "I", "L", "M", "S", "X",
"U", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
"UNICODE", "error" ]
......@@ -154,6 +155,11 @@ def match(pattern, string, flags=0):
a match object, or None if no match was found."""
return _compile(pattern, flags).match(string)
def fullmatch(pattern, string, flags=0):
"""Try to apply the pattern to all of the string, returning
a match object, or None if no match was found."""
return _compile(pattern, flags).fullmatch(string)
def search(pattern, string, flags=0):
"""Scan through string looking for a match to the pattern, returning
a match object, or None if no match was found."""
......
......@@ -1061,6 +1061,30 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(1), "")
self.assertEqual(m.group(2), "y")
def test_fullmatch(self):
# Issue 16203: Proposal: add re.fullmatch() method.
self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
self.assertEqual(re.fullmatch(r"a|ab", "ab").span(), (0, 2))
self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
self.assertEqual(re.fullmatch(r"abc$", "abc\n"), None)
self.assertEqual(re.fullmatch(r"abc\Z", "abc\n"), None)
self.assertEqual(re.fullmatch(r"(?m)abc$", "abc\n"), None)
self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))
self.assertEqual(
re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
self.assertEqual(
re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
self.assertEqual(
re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:
......
......@@ -22,6 +22,7 @@
* 2002-11-09 fl fixed empty sub/subn return type
* 2003-04-18 mvl fully support 4-byte codes
* 2003-10-17 gn implemented non recursive scheme
* 2013-02-04 mrab added fullmatch primitive
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
......@@ -746,11 +747,12 @@ do { \
#define JUMP_ASSERT 12
#define JUMP_ASSERT_NOT 13
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
#define DO_JUMP(jumpvalue, jumplabel, nextpattern, matchall) \
DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
nextctx->last_ctx_pos = ctx_pos; \
nextctx->jump = jumpvalue; \
nextctx->pattern = nextpattern; \
nextctx->match_all = matchall; \
ctx_pos = alloc_pos; \
ctx = nextctx; \
goto entrance; \
......@@ -769,6 +771,7 @@ typedef struct {
SRE_CODE chr;
SRE_REPEAT* rep;
} u;
int match_all;
} SRE_MATCH_CONTEXT;
/* check if string matches the given pattern. returns <0 for
......@@ -791,6 +794,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
ctx->last_ctx_pos = -1;
ctx->jump = JUMP_NONE;
ctx->pattern = pattern;
ctx->match_all = state->match_all;
ctx_pos = alloc_pos;
entrance:
......@@ -864,6 +868,8 @@ entrance:
case SRE_OP_SUCCESS:
/* end of pattern */
TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
if (ctx->match_all && ctx->ptr != state->end)
RETURN_FAILURE;
state->ptr = ctx->ptr;
RETURN_SUCCESS;
......@@ -972,7 +978,7 @@ entrance:
!SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) SRE_CHARGET(state, ctx->ptr, 0))))
continue;
state->ptr = ctx->ptr;
DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1, ctx->match_all);
if (ret) {
if (ctx->u.rep)
MARK_POP_DISCARD(ctx->lastmark);
......@@ -1019,7 +1025,8 @@ entrance:
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
RETURN_FAILURE;
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
(!ctx->match_all || ctx->ptr == state->end)) {
/* tail is empty. we're finished */
state->ptr = ctx->ptr;
RETURN_SUCCESS;
......@@ -1042,7 +1049,7 @@ entrance:
break;
state->ptr = ctx->ptr;
DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
ctx->pattern+ctx->pattern[0]);
ctx->pattern+ctx->pattern[0], ctx->match_all);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
......@@ -1059,7 +1066,7 @@ entrance:
while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
state->ptr = ctx->ptr;
DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
ctx->pattern+ctx->pattern[0]);
ctx->pattern+ctx->pattern[0], ctx->match_all);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
......@@ -1104,7 +1111,8 @@ entrance:
ctx->ptr += state->charsize * ctx->count;
}
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
(!ctx->match_all || ctx->ptr == state->end)) {
/* tail is empty. we're finished */
state->ptr = ctx->ptr;
RETURN_SUCCESS;
......@@ -1116,7 +1124,7 @@ entrance:
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
state->ptr = ctx->ptr;
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
ctx->pattern+ctx->pattern[0]);
ctx->pattern+ctx->pattern[0], ctx->match_all);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
......@@ -1155,7 +1163,7 @@ entrance:
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0], ctx->match_all);
state->repeat = ctx->u.rep->prev;
PyObject_FREE(ctx->u.rep);
......@@ -1187,7 +1195,7 @@ entrance:
/* not enough matches */
ctx->u.rep->count = ctx->count;
DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
ctx->u.rep->pattern+3);
ctx->u.rep->pattern+3, ctx->match_all);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
......@@ -1209,7 +1217,7 @@ entrance:
DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
ctx->u.rep->pattern+3);
ctx->u.rep->pattern+3, ctx->match_all);
DATA_POP(&ctx->u.rep->last_ptr);
if (ret) {
MARK_POP_DISCARD(ctx->lastmark);
......@@ -1225,7 +1233,7 @@ entrance:
/* cannot match more repeated items here. make sure the
tail matches */
state->repeat = ctx->u.rep->prev;
DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern, ctx->match_all);
RETURN_ON_SUCCESS(ret);
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
......@@ -1250,7 +1258,7 @@ entrance:
/* not enough matches */
ctx->u.rep->count = ctx->count;
DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
ctx->u.rep->pattern+3);
ctx->u.rep->pattern+3, ctx->match_all);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
......@@ -1264,7 +1272,7 @@ entrance:
/* see if the tail matches */
state->repeat = ctx->u.rep->prev;
DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern, ctx->match_all);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
......@@ -1285,7 +1293,7 @@ entrance:
DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
ctx->u.rep->pattern+3);
ctx->u.rep->pattern+3, ctx->match_all);
DATA_POP(&ctx->u.rep->last_ptr);
if (ret) {
RETURN_ON_ERROR(ret);
......@@ -1378,7 +1386,7 @@ entrance:
state->ptr = ctx->ptr - state->charsize * ctx->pattern[1];
if (state->ptr < state->beginning)
RETURN_FAILURE;
DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2, 0);
RETURN_ON_FAILURE(ret);
ctx->pattern += ctx->pattern[0];
break;
......@@ -1390,7 +1398,7 @@ entrance:
ctx->ptr, ctx->pattern[1]));
state->ptr = ctx->ptr - state->charsize * ctx->pattern[1];
if (state->ptr >= state->beginning) {
DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2, 0);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_FAILURE;
......@@ -1909,6 +1917,44 @@ pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
return pattern_new_match(self, &state, status);
}
static PyObject*
pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE state;
Py_ssize_t status;
PyObject* string;
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:fullmatch", kwlist,
&string, &start, &end))
return NULL;
string = state_init(&state, self, string, start, end);
if (!string)
return NULL;
state.match_all = 1;
state.ptr = state.start;
TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
if (state.logical_charsize == 1) {
status = sre_match(&state, PatternObject_GetCode(self));
} else {
status = sre_umatch(&state, PatternObject_GetCode(self));
}
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
if (PyErr_Occurred())
return NULL;
state_fini(&state);
return pattern_new_match(self, &state, status);
}
static PyObject*
pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
{
......@@ -2530,6 +2576,10 @@ PyDoc_STRVAR(pattern_match_doc,
"match(string[, pos[, endpos]]) -> match object or None.\n\
Matches zero or more characters at the beginning of the string");
PyDoc_STRVAR(pattern_fullmatch_doc,
"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
Matches against all of the string");
PyDoc_STRVAR(pattern_search_doc,
"search(string[, pos[, endpos]]) -> match object or None.\n\
Scan through string looking for a match, and return a corresponding\n\
......@@ -2565,6 +2615,8 @@ PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
static PyMethodDef pattern_methods[] = {
{"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
pattern_match_doc},
{"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
pattern_fullmatch_doc},
{"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
pattern_search_doc},
{"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
......
......@@ -89,6 +89,7 @@ typedef struct {
SRE_REPEAT *repeat;
/* hooks */
SRE_TOLOWER_HOOK lower;
int match_all;
} SRE_STATE;
typedef struct {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment