Commit 51fe8e78 authored by Gustavo Niemeyer's avatar Gustavo Niemeyer

Implemented non-recursive SRE matching.

parent b9db49db
......@@ -297,6 +297,15 @@ assertions, the contained pattern must only match strings of some
fixed length. Patterns which start with negative lookbehind
assertions may match at the beginning of the string being searched.
\item[\code{(?(\var{id/name})yes-pattern|no-pattern)}] Will try to match
with \regexp{yes-pattern} if the group with given \var{id} or \var{name}
exists, and with \regexp{no-pattern} if it doesn't. \regexp{|no-pattern}
is optional and can be omitted. For example,
\regexp{(<)?(\e w+@\e w+(?:\e .\e w+)+)(?(1)>)} is a poor email matching
pattern, which will match with \code{'<user@host.com>'} as well as
\code{'user@host.com'}, but not with \code{'<user@host.com'}.
\versionadded{2.3}
\end{list}
The special sequences consist of \character{\e} and a character from the
......
......@@ -145,6 +145,19 @@ def _compile(code, pattern, flags):
else:
emit(OPCODES[op])
emit(av-1)
elif op is GROUPREF_EXISTS:
emit(OPCODES[op])
emit((av[0]-1)*2)
skipyes = len(code); emit(0)
_compile(code, av[1], flags)
if av[2]:
emit(OPCODES[JUMP])
skipno = len(code); emit(0)
code[skipyes] = len(code) - skipyes + 1
_compile(code, av[2], flags)
code[skipno] = len(code) - skipno
else:
code[skipyes] = len(code) - skipyes + 1
else:
raise ValueError, ("unsupported operand type", op)
......
......@@ -13,7 +13,7 @@
# update when constants are added or removed
MAGIC = 20030419
MAGIC = 20031017
# max code word in this release
......@@ -42,6 +42,7 @@ CATEGORY = "category"
CHARSET = "charset"
GROUPREF = "groupref"
GROUPREF_IGNORE = "groupref_ignore"
GROUPREF_EXISTS = "groupref_exists"
IN = "in"
IN_IGNORE = "in_ignore"
INFO = "info"
......@@ -108,7 +109,7 @@ OPCODES = [
CALL,
CATEGORY,
CHARSET, BIGCHARSET,
GROUPREF, GROUPREF_IGNORE,
GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
IN, IN_IGNORE,
INFO,
JUMP,
......
......@@ -364,6 +364,20 @@ def _parse_sub(source, state, nested=1):
subpattern.append((BRANCH, (None, items)))
return subpattern
def _parse_sub_cond(source, state, condgroup):
item_yes = _parse(source, state)
if source.match("|"):
item_no = _parse(source, state)
if source.match("|"):
raise error, "conditional backref with more than two branches"
else:
item_no = None
if source.next and not source.match(")", 0):
raise error, "pattern not properly closed"
subpattern = SubPattern(state)
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
return subpattern
def _parse(source, state):
# parse a simple pattern
......@@ -499,6 +513,7 @@ def _parse(source, state):
elif this == "(":
group = 1
name = None
condgroup = None
if source.match("?"):
group = 0
# options
......@@ -568,6 +583,26 @@ def _parse(source, state):
else:
subpattern.append((ASSERT_NOT, (dir, p)))
continue
elif source.match("("):
# conditional backreference group
condname = ""
while 1:
char = source.get()
if char is None:
raise error, "unterminated name"
if char == ")":
break
condname = condname + char
group = 2
if isname(condname):
condgroup = state.groupdict.get(condname)
if condgroup is None:
raise error, "unknown group name"
else:
try:
condgroup = atoi(condname)
except ValueError:
raise error, "bad character in group name"
else:
# flags
if not source.next in FLAGS:
......@@ -581,7 +616,10 @@ def _parse(source, state):
group = None
else:
group = state.opengroup(name)
p = _parse_sub(source, state)
if condgroup:
p = _parse_sub_cond(source, state, condgroup)
else:
p = _parse_sub(source, state)
if not source.match(")"):
raise error, "unbalanced parenthesis"
if group is not None:
......
......@@ -169,7 +169,6 @@ class ReTests(unittest.TestCase):
self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
def test_re_groupref_exists(self):
return # not yet
self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
('(', 'a'))
self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
......@@ -405,19 +404,20 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
20003)
self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
# non-simple '*?' still recurses and hits the recursion limit
self.assertRaises(RuntimeError, re.search, '(a|b)*?c', 10000*'ab'+'cd')
# non-simple '*?' still used to hit the recursion limit, before the
# non-recursive scheme was implemented.
self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
def test_bug_612074(self):
pat=u"["+re.escape(u"\u2039")+u"]"
self.assertEqual(re.compile(pat) and 1, 1)
def test_stack_overflow(self):
# nasty case that overflows the straightforward recursive
# nasty cases that used to overflow the straightforward recursive
# implementation of repeated groups.
self.assertRaises(RuntimeError, re.match, '(x)*', 50000*'x')
self.assertRaises(RuntimeError, re.match, '(x)*y', 50000*'x'+'y')
self.assertRaises(RuntimeError, re.match, '(x)*?y', 50000*'x'+'y')
self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
def test_scanner(self):
def s_ident(scanner, token): return token
......
......@@ -61,6 +61,10 @@ Extension modules
- Bug #814613: INET_ADDRSTRLEN fix needed for all compilers on SGI
- Implemented non-recursive SRE matching scheme (#757624).
- Implemented (?(id/name)yes|no) support in SRE (#572936).
Library
-------
......
......@@ -21,6 +21,7 @@
* 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
* 2002-11-09 fl fixed empty sub/subn return type
* 2003-04-18 mvl fully support 4-byte codes
* 2003-10-17 gn implemented non recursive scheme
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
......@@ -91,6 +92,9 @@ static char copyright[] =
#endif
#endif
/* enables usage of recursive scheme */
#undef USE_RECURSION
/* enables fast searching */
#define USE_FAST_SEARCH
......@@ -275,82 +279,33 @@ sre_category(SRE_CODE category, unsigned int ch)
/* helpers */
static void
mark_fini(SRE_STATE* state)
data_stack_dealloc(SRE_STATE* state)
{
if (state->mark_stack) {
free(state->mark_stack);
state->mark_stack = NULL;
if (state->data_stack) {
free(state->data_stack);
state->data_stack = NULL;
}
state->mark_stack_size = state->mark_stack_base = 0;
state->data_stack_size = state->data_stack_base = 0;
}
static int
mark_save(SRE_STATE* state, int lo, int hi, int *mark_stack_base)
data_stack_grow(SRE_STATE* state, int size)
{
void* stack;
int size;
int minsize, newsize;
if (hi <= lo)
return 0;
size = (hi - lo) + 1;
newsize = state->mark_stack_size;
minsize = state->mark_stack_base + size;
if (newsize < minsize) {
/* create new stack */
if (!newsize) {
newsize = 512;
if (newsize < minsize)
newsize = minsize;
TRACE(("allocate stack %d\n", newsize));
stack = malloc(sizeof(void*) * newsize);
} else {
/* grow the stack */
while (newsize < minsize)
newsize += newsize;
TRACE(("grow stack to %d\n", newsize));
stack = realloc(state->mark_stack, sizeof(void*) * newsize);
}
int minsize, cursize;
minsize = state->data_stack_base+size;
cursize = state->data_stack_size;
if (cursize < minsize) {
void* stack;
cursize = minsize+minsize/4+1024;
TRACE(("allocate/grow stack %d\n", cursize));
stack = realloc(state->data_stack, cursize);
if (!stack) {
mark_fini(state);
data_stack_dealloc(state);
return SRE_ERROR_MEMORY;
}
state->mark_stack = stack;
state->mark_stack_size = newsize;
state->data_stack = stack;
state->data_stack_size = cursize;
}
TRACE(("copy %d:%d to %d (%d)\n", lo, hi, state->mark_stack_base, size));
memcpy(state->mark_stack + state->mark_stack_base, state->mark + lo,
size * sizeof(void*));
state->mark_stack_base += size;
*mark_stack_base = state->mark_stack_base;
return 0;
}
static int
mark_restore(SRE_STATE* state, int lo, int hi, int *mark_stack_base)
{
int size;
if (hi <= lo)
return 0;
size = (hi - lo) + 1;
state->mark_stack_base = *mark_stack_base - size;
TRACE(("copy %d:%d from %d\n", lo, hi, state->mark_stack_base));
memcpy(state->mark + lo, state->mark_stack + state->mark_stack_base,
size * sizeof(void*));
return 0;
}
......@@ -362,6 +317,7 @@ mark_restore(SRE_STATE* state, int lo, int hi, int *mark_stack_base)
#define SRE_CHARSET sre_charset
#define SRE_INFO sre_info
#define SRE_MATCH sre_match
#define SRE_MATCH_CONTEXT sre_match_context
#define SRE_SEARCH sre_search
#define SRE_LITERAL_TEMPLATE sre_literal_template
......@@ -374,6 +330,7 @@ mark_restore(SRE_STATE* state, int lo, int hi, int *mark_stack_base)
#undef SRE_LITERAL_TEMPLATE
#undef SRE_SEARCH
#undef SRE_MATCH
#undef SRE_MATCH_CONTEXT
#undef SRE_INFO
#undef SRE_CHARSET
#undef SRE_COUNT
......@@ -388,6 +345,7 @@ mark_restore(SRE_STATE* state, int lo, int hi, int *mark_stack_base)
#define SRE_CHARSET sre_ucharset
#define SRE_INFO sre_uinfo
#define SRE_MATCH sre_umatch
#define SRE_MATCH_CONTEXT sre_umatch_context
#define SRE_SEARCH sre_usearch
#define SRE_LITERAL_TEMPLATE sre_uliteral_template
#endif
......@@ -500,6 +458,9 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
for (;;) {
switch (*set++) {
case SRE_OP_FAILURE:
return !ok;
case SRE_OP_LITERAL:
/* <LITERAL> <code> */
if (ch == set[0])
......@@ -507,11 +468,11 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
set++;
break;
case SRE_OP_RANGE:
/* <RANGE> <lower> <upper> */
if (set[0] <= ch && ch <= set[1])
case SRE_OP_CATEGORY:
/* <CATEGORY> <code> */
if (sre_category(set[0], (int) ch))
return ok;
set += 2;
set += 1;
break;
case SRE_OP_CHARSET:
......@@ -529,6 +490,17 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
}
break;
case SRE_OP_RANGE:
/* <RANGE> <lower> <upper> */
if (set[0] <= ch && ch <= set[1])
return ok;
set += 2;
break;
case SRE_OP_NEGATE:
ok = !ok;
break;
case SRE_OP_BIGCHARSET:
/* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
{
......@@ -556,20 +528,6 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
break;
}
case SRE_OP_CATEGORY:
/* <CATEGORY> <code> */
if (sre_category(set[0], (int) ch))
return ok;
set += 1;
break;
case SRE_OP_NEGATE:
ok = !ok;
break;
case SRE_OP_FAILURE:
return !ok;
default:
/* internal error -- there's not much we can do about it
here, so let's just pretend it didn't match... */
......@@ -594,6 +552,13 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
switch (pattern[0]) {
case SRE_OP_IN:
/* repeated set */
TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
ptr++;
break;
case SRE_OP_ANY:
/* repeated dot wildcard. */
TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
......@@ -640,13 +605,6 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
ptr++;
break;
case SRE_OP_IN:
/* repeated set */
TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
ptr++;
break;
default:
/* repeated single character pattern */
TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
......@@ -724,35 +682,173 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
*/
#define LASTMARK_SAVE() \
do { \
lastmark = state->lastmark; \
lastindex = state->lastindex; \
ctx->lastmark = state->lastmark; \
ctx->lastindex = state->lastindex; \
} while (0)
#define LASTMARK_RESTORE() \
do { \
if (state->lastmark > lastmark) { \
memset(state->mark + lastmark + 1, 0, \
(state->lastmark - lastmark) * sizeof(void*)); \
state->lastmark = lastmark; \
state->lastindex = lastindex; \
} \
state->lastmark = ctx->lastmark; \
state->lastindex = ctx->lastindex; \
} while (0)
#define RETURN_ERROR(i) do { return i; } while(0)
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
#define RETURN_ON_ERROR(i) \
do { if (i < 0) RETURN_ERROR(i); } while (0)
#define RETURN_ON_SUCCESS(i) \
do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
#define RETURN_ON_FAILURE(i) \
do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
#define SFY(x) #x
#define DATA_STACK_ALLOC(state, type, ptr) \
do { \
alloc_pos = state->data_stack_base; \
TRACE(("allocating %s in %d (%d)\n", \
SFY(type), alloc_pos, sizeof(type))); \
if (state->data_stack_size < alloc_pos+sizeof(type)) { \
int j = data_stack_grow(state, sizeof(type)); \
if (j < 0) return j; \
if (ctx_pos != -1) \
DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
} \
ptr = (type*)(state->data_stack+alloc_pos); \
state->data_stack_base += sizeof(type); \
} while (0)
#define DATA_STACK_LOOKUP(state, type, ptr) \
do { \
TRACE(("looking up %s in %d (%d)\n", SFY(type), \
state->data_stack_base-sizeof(type), sizeof(type))); \
ptr = (type*)(state->data_stack+state->data_stack_base-sizeof(type)); \
} while (0)
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
do { \
TRACE(("looking up %s at %d\n", SFY(type), pos)); \
ptr = (type*)(state->data_stack+pos); \
} while (0)
#define DATA_STACK_PUSH(state, data, size) \
do { \
TRACE(("copy data in %p to %d (%d)\n", \
data, state->data_stack_base, size)); \
if (state->data_stack_size < state->data_stack_base+size) { \
int j = data_stack_grow(state, size); \
if (j < 0) return j; \
if (ctx_pos != -1) \
DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
} \
memcpy(state->data_stack+state->data_stack_base, data, size); \
state->data_stack_base += size; \
} while (0)
#define DATA_STACK_POP(state, data, size, discard) \
do { \
TRACE(("copy data to %p from %d (%d)\n", \
data, state->data_stack_base-size, size)); \
memcpy(data, state->data_stack+state->data_stack_base-size, size); \
if (discard) \
state->data_stack_base -= size; \
} while (0)
#define DATA_STACK_POP_DISCARD(state, size) \
do { \
TRACE(("discard data from %d (%d)\n", \
state->data_stack_base-size, size)); \
state->data_stack_base -= size; \
} while(0)
#define DATA_PUSH(x) \
DATA_STACK_PUSH(state, (x), sizeof(*(x)))
#define DATA_POP(x) \
DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
#define DATA_POP_KEEP(x) \
DATA_STACK_POP(state, (x), sizeof(*(x)), 0)
#define DATA_POP_DISCARD(x) \
DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
#define DATA_ALLOC(t,p) \
DATA_STACK_ALLOC(state, t, p)
#define DATA_LOOKUP(t,p) \
DATA_STACK_LOOKUP(state, t, p)
#define DATA_LOOKUP_AT(t,p,pos) \
DATA_STACK_LOOKUP_AT(state,t,p,pos)
#define MARK_PUSH(lastmark) \
do if (lastmark > 0) { \
i = lastmark; /* ctx->lastmark may change if reallocated */ \
DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
} while (0)
#define MARK_POP(lastmark) \
do if (lastmark > 0) { \
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
} while (0)
#define MARK_POP_KEEP(lastmark) \
do if (lastmark > 0) { \
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
} while (0)
#define MARK_POP_DISCARD(lastmark) \
do if (lastmark > 0) { \
DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
} while (0)
#define JUMP_NONE 0
#define JUMP_MAX_UNTIL_1 1
#define JUMP_MAX_UNTIL_2 2
#define JUMP_MAX_UNTIL_3 3
#define JUMP_MIN_UNTIL_1 4
#define JUMP_MIN_UNTIL_2 5
#define JUMP_MIN_UNTIL_3 6
#define JUMP_REPEAT 7
#define JUMP_REPEAT_ONE_1 8
#define JUMP_REPEAT_ONE_2 9
#define JUMP_MIN_REPEAT_ONE 10
#define JUMP_BRANCH 11
#define JUMP_ASSERT 12
#define JUMP_ASSERT_NOT 13
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
nextctx->last_ctx_pos = ctx_pos; \
nextctx->jump = jumpvalue; \
nextctx->pattern = nextpattern; \
ctx_pos = alloc_pos; \
ctx = nextctx; \
goto entrance; \
jumplabel: \
while (0) /* gcc doesn't like labels at end of scopes */ \
typedef struct {
int last_ctx_pos;
int jump;
SRE_CHAR* ptr;
SRE_CODE* pattern;
int count;
int lastmark;
int lastindex;
union {
SRE_CODE chr;
SRE_REPEAT* rep;
} u;
} SRE_MATCH_CONTEXT;
/* check if string matches the given pattern. returns <0 for
error, 0 for failure, and 1 for success */
LOCAL(int)
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
{
/* check if string matches the given pattern. returns <0 for
error, 0 for failure, and 1 for success */
SRE_CHAR* end = state->end;
SRE_CHAR* ptr = state->ptr;
int i, count;
SRE_REPEAT* rp;
int lastmark, lastindex, mark_stack_base;
SRE_CODE chr;
int alloc_pos, ctx_pos = -1;
int i, ret = 0;
int jump;
SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
SRE_MATCH_CONTEXT* ctx;
SRE_MATCH_CONTEXT* nextctx;
TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
TRACE(("|%p|%p|ENTER %d\n", pattern, state->ptr, level));
#if defined(USE_STACKCHECK)
if (level % 10 == 0 && PyOS_CheckStack())
......@@ -764,241 +860,204 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
return SRE_ERROR_RECURSION_LIMIT;
#endif
if (pattern[0] == SRE_OP_INFO) {
DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
ctx->last_ctx_pos = -1;
ctx->jump = JUMP_NONE;
ctx->pattern = pattern;
ctx_pos = alloc_pos;
entrance:
ctx->ptr = state->ptr;
if (ctx->pattern[0] == SRE_OP_INFO) {
/* optimization info block */
/* <INFO> <1=skip> <2=flags> <3=min> ... */
if (pattern[3] && (end - ptr) < pattern[3]) {
if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
TRACE(("reject (got %d chars, need %d)\n",
(end - ptr), pattern[3]));
return 0;
(end - ctx->ptr), ctx->pattern[3]));
RETURN_FAILURE;
}
pattern += pattern[1] + 1;
ctx->pattern += ctx->pattern[1] + 1;
}
for (;;) {
switch (*pattern++) {
switch (*ctx->pattern++) {
case SRE_OP_FAILURE:
/* immediate failure */
TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
return 0;
case SRE_OP_MARK:
/* set mark */
/* <MARK> <gid> */
TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
if (i & 1)
state->lastindex = i/2 + 1;
if (i > state->lastmark) {
/* state->lastmark is the highest valid index in the
state->mark array. If it is increased by more than 1,
the intervening marks must be set to NULL to signal
that these marks have not been encountered. */
int j = state->lastmark + 1;
while (j < i)
state->mark[j++] = NULL;
state->lastmark = i;
}
state->mark[i] = ctx->ptr;
ctx->pattern++;
break;
case SRE_OP_LITERAL:
/* match literal string */
/* <LITERAL> <code> */
TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_NOT_LITERAL:
/* match anything that is not literal character */
/* <NOT_LITERAL> <code> */
TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_SUCCESS:
/* end of pattern */
TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
state->ptr = ptr;
return 1;
TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
state->ptr = ctx->ptr;
RETURN_SUCCESS;
case SRE_OP_AT:
/* match at given position */
/* <AT> <code> */
TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
if (!SRE_AT(state, ptr, *pattern))
return 0;
pattern++;
TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
RETURN_FAILURE;
ctx->pattern++;
break;
case SRE_OP_CATEGORY:
/* match at given category */
/* <CATEGORY> <code> */
TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
if (ptr >= end || !sre_category(pattern[0], ptr[0]))
return 0;
pattern++;
ptr++;
break;
case SRE_OP_LITERAL:
/* match literal string */
/* <LITERAL> <code> */
TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
return 0;
pattern++;
ptr++;
break;
case SRE_OP_NOT_LITERAL:
/* match anything that is not literal character */
/* <NOT_LITERAL> <code> */
TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
return 0;
pattern++;
ptr++;
TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_ANY:
/* match anything (except a newline) */
/* <ANY> */
TRACE(("|%p|%p|ANY\n", pattern, ptr));
if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
return 0;
ptr++;
TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
RETURN_FAILURE;
ctx->ptr++;
break;
case SRE_OP_ANY_ALL:
/* match anything */
/* <ANY_ALL> */
TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
if (ptr >= end)
return 0;
ptr++;
TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end)
RETURN_FAILURE;
ctx->ptr++;
break;
case SRE_OP_IN:
/* match set member (or non_member) */
/* <IN> <skip> <set> */
TRACE(("|%p|%p|IN\n", pattern, ptr));
if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
return 0;
pattern += pattern[0];
ptr++;
break;
case SRE_OP_GROUPREF:
/* match backreference */
TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
i = pattern[0];
{
SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
if (!p || !e || e < p)
return 0;
while (p < e) {
if (ptr >= end || *ptr != *p)
return 0;
p++; ptr++;
}
}
pattern++;
break;
case SRE_OP_GROUPREF_IGNORE:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
i = pattern[0];
{
SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
if (!p || !e || e < p)
return 0;
while (p < e) {
if (ptr >= end ||
state->lower(*ptr) != state->lower(*p))
return 0;
p++; ptr++;
}
}
pattern++;
TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
RETURN_FAILURE;
ctx->pattern += ctx->pattern[0];
ctx->ptr++;
break;
case SRE_OP_LITERAL_IGNORE:
TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
if (ptr >= end ||
state->lower(*ptr) != state->lower(*pattern))
return 0;
pattern++;
ptr++;
TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
ctx->pattern, ctx->ptr, ctx->pattern[0]));
if (ctx->ptr >= end ||
state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_NOT_LITERAL_IGNORE:
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
if (ptr >= end ||
state->lower(*ptr) == state->lower(*pattern))
return 0;
pattern++;
ptr++;
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
ctx->pattern, ctx->ptr, *ctx->pattern));
if (ctx->ptr >= end ||
state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
RETURN_FAILURE;
ctx->pattern++;
ctx->ptr++;
break;
case SRE_OP_IN_IGNORE:
TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
if (ptr >= end
|| !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
return 0;
pattern += pattern[0];
ptr++;
break;
case SRE_OP_MARK:
/* set mark */
/* <MARK> <gid> */
TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
i = pattern[0];
if (i & 1)
state->lastindex = i/2 + 1;
if (i > state->lastmark)
state->lastmark = i;
state->mark[i] = ptr;
pattern++;
TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
if (ctx->ptr >= end
|| !SRE_CHARSET(ctx->pattern+1,
(SRE_CODE)state->lower(*ctx->ptr)))
RETURN_FAILURE;
ctx->pattern += ctx->pattern[0];
ctx->ptr++;
break;
case SRE_OP_JUMP:
case SRE_OP_INFO:
/* jump forward */
/* <JUMP> <offset> */
TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
pattern += pattern[0];
break;
case SRE_OP_ASSERT:
/* assert subpattern */
/* <ASSERT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
state->ptr = ptr - pattern[1];
if (state->ptr < state->beginning)
return 0;
i = SRE_MATCH(state, pattern + 2, level + 1);
if (i <= 0)
return i;
pattern += pattern[0];
break;
case SRE_OP_ASSERT_NOT:
/* assert not subpattern */
/* <ASSERT_NOT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
state->ptr = ptr - pattern[1];
if (state->ptr >= state->beginning) {
i = SRE_MATCH(state, pattern + 2, level + 1);
if (i < 0)
return i;
if (i)
return 0;
}
pattern += pattern[0];
TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
ctx->pattern += ctx->pattern[0];
break;
case SRE_OP_BRANCH:
/* alternation */
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
LASTMARK_SAVE();
if (state->repeat) {
i = mark_save(state, 0, lastmark, &mark_stack_base);
if (i < 0)
return i;
}
for (; pattern[0]; pattern += pattern[0]) {
if (pattern[1] == SRE_OP_LITERAL &&
(ptr >= end || (SRE_CODE) *ptr != pattern[2]))
ctx->u.rep = state->repeat;
if (ctx->u.rep)
MARK_PUSH(ctx->lastmark);
for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
if (ctx->pattern[1] == SRE_OP_LITERAL &&
(ctx->ptr >= end ||
(SRE_CODE) *ctx->ptr != ctx->pattern[2]))
continue;
if (pattern[1] == SRE_OP_IN &&
(ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
if (ctx->pattern[1] == SRE_OP_IN &&
(ctx->ptr >= end ||
!SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
continue;
state->ptr = ptr;
i = SRE_MATCH(state, pattern + 1, level + 1);
if (i)
return i;
if (state->repeat) {
i = mark_restore(state, 0, lastmark, &mark_stack_base);
if (i < 0)
return i;
state->ptr = ctx->ptr;
#ifdef USE_RECURSION
ret = SRE_MATCH(state, ctx->pattern+1, level+1);
#else
DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
#endif
if (ret) {
if (ctx->u.rep)
MARK_POP_DISCARD(ctx->lastmark);
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
if (ctx->u.rep)
MARK_POP_KEEP(ctx->lastmark);
LASTMARK_RESTORE();
}
return 0;
if (ctx->u.rep)
MARK_POP_DISCARD(ctx->lastmark);
RETURN_FAILURE;
case SRE_OP_REPEAT_ONE:
/* match repeated sequence (maximizing regexp) */
......@@ -1010,70 +1069,88 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
pattern[1], pattern[2]));
TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[1], ctx->pattern[2]));
if (ptr + pattern[1] > end)
return 0; /* cannot match */
if (ctx->ptr + ctx->pattern[1] > end)
RETURN_FAILURE; /* cannot match */
state->ptr = ptr;
state->ptr = ctx->ptr;
count = SRE_COUNT(state, pattern + 3, pattern[2], level + 1);
if (count < 0)
return count;
ctx->count = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2],
level+1);
RETURN_ON_ERROR(ctx->count);
ptr += count;
ctx->ptr += ctx->count;
/* when we arrive here, count contains the number of
matches, and ptr points to the tail of the target
matches, and ctx->ptr points to the tail of the target
string. check if the rest of the pattern matches,
and backtrack if not. */
if (count < (int) pattern[1])
return 0;
if (ctx->count < (int) ctx->pattern[1])
RETURN_FAILURE;
if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
/* tail is empty. we're finished */
state->ptr = ptr;
return 1;
state->ptr = ctx->ptr;
RETURN_SUCCESS;
}
LASTMARK_SAVE();
if (pattern[pattern[0]] == SRE_OP_LITERAL) {
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
/* tail starts with a literal. skip positions where
the rest of the pattern cannot possibly match */
chr = pattern[pattern[0]+1];
ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
for (;;) {
while (count >= (int) pattern[1] &&
(ptr >= end || *ptr != chr)) {
ptr--;
count--;
while (ctx->count >= (int) ctx->pattern[1] &&
(ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
ctx->ptr--;
ctx->count--;
}
if (count < (int) pattern[1])
if (ctx->count < (int) ctx->pattern[1])
break;
state->ptr = ptr;
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
if (i)
return i;
ptr--;
count--;
state->ptr = ctx->ptr;
#ifdef USE_RECURSION
ret = SRE_MATCH(state, ctx->pattern+ctx->pattern[0],
level+1);
#else
DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
ctx->pattern+ctx->pattern[0]);
#endif
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
LASTMARK_RESTORE();
ctx->ptr--;
ctx->count--;
}
} else {
/* general case */
while (count >= (int) pattern[1]) {
state->ptr = ptr;
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
if (i)
return i;
ptr--;
count--;
while (ctx->count >= (int) ctx->pattern[1]) {
state->ptr = ctx->ptr;
#ifdef USE_RECURSION
ret = SRE_MATCH(state, ctx->pattern+ctx->pattern[0],
level+1);
#else
DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
ctx->pattern+ctx->pattern[0]);
#endif
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->ptr--;
ctx->count--;
LASTMARK_RESTORE();
}
}
return 0;
RETURN_FAILURE;
case SRE_OP_MIN_REPEAT_ONE:
/* match repeated sequence (minimizing regexp) */
......@@ -1085,76 +1162,92 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
pattern[1], pattern[2]));
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[1], ctx->pattern[2]));
if (ptr + pattern[1] > end)
return 0; /* cannot match */
if (ctx->ptr + ctx->pattern[1] > end)
RETURN_FAILURE; /* cannot match */
state->ptr = ptr;
state->ptr = ctx->ptr;
if (pattern[1] == 0)
count = 0;
if (ctx->pattern[1] == 0)
ctx->count = 0;
else {
/* count using pattern min as the maximum */
count = SRE_COUNT(state, pattern + 3, pattern[1], level + 1);
if (count < 0)
return count; /* exception */
if (count < (int) pattern[1])
return 0; /* did not match minimum number of times */
ptr += count; /* advance past minimum matches of repeat */
ctx->count = SRE_COUNT(state, ctx->pattern+3,
ctx->pattern[1], level+1);
RETURN_ON_ERROR(ctx->count);
if (ctx->count < (int) ctx->pattern[1])
/* didn't match minimum number of times */
RETURN_FAILURE;
/* advance past minimum matches of repeat */
ctx->ptr += ctx->count;
}
if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
/* tail is empty. we're finished */
state->ptr = ptr;
return 1;
state->ptr = ctx->ptr;
RETURN_SUCCESS;
} else {
/* general case */
int matchmax = ((int)pattern[2] == 65535);
int c;
LASTMARK_SAVE();
while (matchmax || count <= (int) pattern[2]) {
state->ptr = ptr;
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
if (i)
return i;
state->ptr = ptr;
c = SRE_COUNT(state, pattern+3, 1, level+1);
if (c < 0)
return c;
if (c == 0)
while ((int)ctx->pattern[2] == 65535
|| ctx->count <= (int)ctx->pattern[2]) {
state->ptr = ctx->ptr;
#ifdef USE_RECURSION
ret = SRE_MATCH(state, ctx->pattern+ctx->pattern[0],
level+1);
#else
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
ctx->pattern+ctx->pattern[0]);
#endif
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
state->ptr = ctx->ptr;
ret = SRE_COUNT(state, ctx->pattern+3, 1, level+1);
RETURN_ON_ERROR(ret);
if (ret == 0)
break;
assert(c == 1);
ptr++;
count++;
assert(ret == 1);
ctx->ptr++;
ctx->count++;
LASTMARK_RESTORE();
}
}
return 0;
RETURN_FAILURE;
case SRE_OP_REPEAT:
/* create repeat context. all the hard work is done
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
pattern[1], pattern[2]));
rep.count = -1;
rep.pattern = pattern;
TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[1], ctx->pattern[2]));
/* install new repeat context */
rep.prev = state->repeat;
state->repeat = &rep;
state->ptr = ptr;
i = SRE_MATCH(state, pattern + pattern[0], level + 1);
state->repeat = rep.prev;
ctx->u.rep = (SRE_REPEAT*) malloc(sizeof(*ctx->u.rep));
ctx->u.rep->count = -1;
ctx->u.rep->pattern = ctx->pattern;
ctx->u.rep->prev = state->repeat;
ctx->u.rep->last_ptr = NULL;
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
#ifdef USE_RECURSION
ret = SRE_MATCH(state, ctx->pattern+ctx->pattern[0], level+1);
#else
DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
#endif
state->repeat = ctx->u.rep->prev;
free(ctx->u.rep);
return i;
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
RETURN_FAILURE;
case SRE_OP_MAX_UNTIL:
/* maximizing repeat */
......@@ -1163,119 +1256,328 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
/* FIXME: we probably need to deal with zero-width
matches in here... */
rp = state->repeat;
if (!rp)
return SRE_ERROR_STATE;
ctx->u.rep = state->repeat;
if (!ctx->u.rep)
RETURN_ERROR(SRE_ERROR_STATE);
state->ptr = ptr;
state->ptr = ctx->ptr;
count = rp->count + 1;
ctx->count = ctx->u.rep->count+1;
TRACE(("|%p|%p|MAX_UNTIL %d\n", pattern, ptr, count));
TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
ctx->ptr, ctx->count));
if (count < rp->pattern[1]) {
if (ctx->count < ctx->u.rep->pattern[1]) {
/* not enough matches */
rp->count = count;
ctx->u.rep->count = ctx->count;
#ifdef USE_RECURSION
/* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
rp->count = count - 1;
state->ptr = ptr;
return 0;
ret = SRE_MATCH(state, ctx->u.rep->pattern+3, level+1);
#else
DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
ctx->u.rep->pattern+3);
#endif
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
RETURN_FAILURE;
}
if (count < rp->pattern[2] || rp->pattern[2] == 65535) {
if ((ctx->count < ctx->u.rep->pattern[2] ||
ctx->u.rep->pattern[2] == 65535) &&
state->ptr != ctx->u.rep->last_ptr) {
/* we may have enough matches, but if we can
match another item, do so */
rp->count = count;
ctx->u.rep->count = ctx->count;
LASTMARK_SAVE();
i = mark_save(state, 0, lastmark, &mark_stack_base);
if (i < 0)
return i;
MARK_PUSH(ctx->lastmark);
/* zero-width match protection */
DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr;
#ifdef USE_RECURSION
/* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
i = mark_restore(state, 0, lastmark, &mark_stack_base);
if (i < 0)
return i;
ret = SRE_MATCH(state, ctx->u.rep->pattern+3, level+1);
#else
DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
ctx->u.rep->pattern+3);
#endif
DATA_POP(&ctx->u.rep->last_ptr);
if (ret) {
MARK_POP_DISCARD(ctx->lastmark);
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
MARK_POP(ctx->lastmark);
LASTMARK_RESTORE();
rp->count = count - 1;
state->ptr = ptr;
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
}
/* cannot match more repeated items here. make sure the
tail matches */
state->repeat = rp->prev;
i = SRE_MATCH(state, pattern, level + 1);
if (i)
return i;
state->repeat = rp;
state->ptr = ptr;
return 0;
state->repeat = ctx->u.rep->prev;
#ifdef USE_RECURSION
ret = SRE_MATCH(state, ctx->pattern, level+1);
#else
DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
#endif
RETURN_ON_SUCCESS(ret);
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
RETURN_FAILURE;
case SRE_OP_MIN_UNTIL:
/* minimizing repeat */
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
rp = state->repeat;
if (!rp)
return SRE_ERROR_STATE;
ctx->u.rep = state->repeat;
if (!ctx->u.rep)
RETURN_ERROR(SRE_ERROR_STATE);
state->ptr = ptr;
state->ptr = ctx->ptr;
count = rp->count + 1;
ctx->count = ctx->u.rep->count+1;
TRACE(("|%p|%p|MIN_UNTIL %d %p\n", pattern, ptr, count,
rp->pattern));
TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
ctx->ptr, ctx->count, ctx->u.rep->pattern));
if (count < rp->pattern[1]) {
if (ctx->count < ctx->u.rep->pattern[1]) {
/* not enough matches */
rp->count = count;
ctx->u.rep->count = ctx->count;
#ifdef USE_RECURSION
/* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
rp->count = count-1;
state->ptr = ptr;
return 0;
ret = SRE_MATCH(state, ctx->u.rep->pattern+3, level+1);
#else
DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
ctx->u.rep->pattern+3);
#endif
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
RETURN_FAILURE;
}
LASTMARK_SAVE();
/* see if the tail matches */
state->repeat = rp->prev;
i = SRE_MATCH(state, pattern, level + 1);
if (i)
return i;
state->ptr = ptr;
state->repeat = rp;
state->repeat = ctx->u.rep->prev;
#ifdef USE_RECURSION
ret = SRE_MATCH(state, ctx->pattern, level+1);
#else
DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
#endif
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
if (count >= rp->pattern[2] && rp->pattern[2] != 65535)
return 0;
state->repeat = ctx->u.rep;
state->ptr = ctx->ptr;
LASTMARK_RESTORE();
rp->count = count;
if (ctx->count >= ctx->u.rep->pattern[2]
&& ctx->u.rep->pattern[2] != 65535)
RETURN_FAILURE;
ctx->u.rep->count = ctx->count;
#ifdef USE_RECURSION
/* RECURSIVE */
i = SRE_MATCH(state, rp->pattern + 3, level + 1);
if (i)
return i;
rp->count = count - 1;
state->ptr = ptr;
ret = SRE_MATCH(state, ctx->u.rep->pattern+3, level+1);
#else
DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
ctx->u.rep->pattern+3);
#endif
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_SUCCESS;
}
ctx->u.rep->count = ctx->count-1;
state->ptr = ctx->ptr;
RETURN_FAILURE;
return 0;
case SRE_OP_GROUPREF:
/* match backreference */
TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
{
int groupref = i+i;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end || *ctx->ptr != *p)
RETURN_FAILURE;
p++; ctx->ptr++;
}
}
}
ctx->pattern++;
break;
case SRE_OP_GROUPREF_IGNORE:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
i = ctx->pattern[0];
{
int groupref = i+i;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p)
RETURN_FAILURE;
while (p < e) {
if (ctx->ptr >= end ||
state->lower(*ctx->ptr) != state->lower(*p))
RETURN_FAILURE;
p++; ctx->ptr++;
}
}
}
ctx->pattern++;
break;
case SRE_OP_GROUPREF_EXISTS:
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[0]));
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
i = ctx->pattern[0];
{
int groupref = i+i;
if (groupref >= state->lastmark) {
ctx->pattern += ctx->pattern[1];
break;
} else {
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
if (!p || !e || e < p) {
ctx->pattern += ctx->pattern[1];
break;
}
}
}
ctx->pattern += 2;
break;
case SRE_OP_ASSERT:
/* assert subpattern */
/* <ASSERT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[1]));
state->ptr = ctx->ptr - ctx->pattern[1];
if (state->ptr < state->beginning)
RETURN_FAILURE;
#ifdef USE_RECURSION
ret = SRE_MATCH(state, ctx->pattern+2, level+1);
#else
DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
#endif
RETURN_ON_FAILURE(ret);
ctx->pattern += ctx->pattern[0];
break;
case SRE_OP_ASSERT_NOT:
/* assert not subpattern */
/* <ASSERT_NOT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[1]));
state->ptr = ctx->ptr - ctx->pattern[1];
if (state->ptr >= state->beginning) {
#ifdef USE_RECURSION
ret = SRE_MATCH(state, ctx->pattern+2, level+1);
#else
DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
#endif
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_FAILURE;
}
}
ctx->pattern += ctx->pattern[0];
break;
case SRE_OP_FAILURE:
/* immediate failure */
TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
RETURN_FAILURE;
default:
TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, pattern[-1]));
return SRE_ERROR_ILLEGAL;
TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
ctx->pattern[-1]));
RETURN_ERROR(SRE_ERROR_ILLEGAL);
}
}
/* can't end up here */
/* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
exit:
ctx_pos = ctx->last_ctx_pos;
jump = ctx->jump;
DATA_POP_DISCARD(ctx);
if (ctx_pos == -1)
return ret;
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
#ifndef USE_RECURSION
switch (jump) {
case JUMP_MAX_UNTIL_2:
TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
goto jump_max_until_2;
case JUMP_MAX_UNTIL_3:
TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
goto jump_max_until_3;
case JUMP_MIN_UNTIL_2:
TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
goto jump_min_until_2;
case JUMP_MIN_UNTIL_3:
TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
goto jump_min_until_3;
case JUMP_BRANCH:
TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
goto jump_branch;
case JUMP_MAX_UNTIL_1:
TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
goto jump_max_until_1;
case JUMP_MIN_UNTIL_1:
TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
goto jump_min_until_1;
case JUMP_REPEAT:
TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
goto jump_repeat;
case JUMP_REPEAT_ONE_1:
TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
goto jump_repeat_one_1;
case JUMP_REPEAT_ONE_2:
TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
goto jump_repeat_one_2;
case JUMP_MIN_REPEAT_ONE:
TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
goto jump_min_repeat_one;
case JUMP_ASSERT:
TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
goto jump_assert;
case JUMP_ASSERT_NOT:
TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
goto jump_assert_not;
case JUMP_NONE:
TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
break;
}
#endif
return ret; /* should never get here */
}
LOCAL(int)
......@@ -1511,16 +1813,15 @@ sre_getlower(PyObject* self, PyObject* args)
LOCAL(void)
state_reset(SRE_STATE* state)
{
state->lastmark = 0;
/* FIXME: dynamic! */
memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);
/*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
state->lastmark = -1;
state->lastindex = -1;
state->repeat = NULL;
mark_fini(state);
data_stack_dealloc(state);
}
static void*
......@@ -1600,6 +1901,7 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
memset(state, 0, sizeof(SRE_STATE));
state->lastmark = -1;
state->lastindex = -1;
ptr = getstring(string, &length, &charsize);
......@@ -1647,7 +1949,7 @@ LOCAL(void)
state_fini(SRE_STATE* state)
{
Py_XDECREF(state->string);
mark_fini(state);
data_stack_dealloc(state);
}
/* calculate offset from start of string */
......@@ -1661,7 +1963,7 @@ state_getslice(SRE_STATE* state, int index, PyObject* string, int empty)
index = (index - 1) * 2;
if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
if (empty)
/* want empty string */
i = j = 0;
......
......@@ -55,6 +55,7 @@ typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
typedef struct SRE_REPEAT_T {
int count;
SRE_CODE* pattern; /* points to REPEAT operator arguments */
void* last_ptr; /* helper to check for infinite loops */
struct SRE_REPEAT_T *prev; /* points to previous repeat context */
} SRE_REPEAT;
......@@ -74,10 +75,11 @@ typedef struct {
int lastmark;
void* mark[SRE_MARK_SIZE];
/* dynamically allocated stuff */
void** mark_stack;
int mark_stack_size;
int mark_stack_base;
SRE_REPEAT *repeat; /* current repeat context */
char* data_stack;
int data_stack_size;
int data_stack_base;
/* current repeat context */
SRE_REPEAT *repeat;
/* hooks */
SRE_TOLOWER_HOOK lower;
} SRE_STATE;
......
......@@ -11,7 +11,7 @@
* See the _sre.c file for information on usage and redistribution.
*/
#define SRE_MAGIC 20030419
#define SRE_MAGIC 20031017
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
......@@ -25,24 +25,25 @@
#define SRE_OP_CHARSET 10
#define SRE_OP_BIGCHARSET 11
#define SRE_OP_GROUPREF 12
#define SRE_OP_GROUPREF_IGNORE 13
#define SRE_OP_IN 14
#define SRE_OP_IN_IGNORE 15
#define SRE_OP_INFO 16
#define SRE_OP_JUMP 17
#define SRE_OP_LITERAL 18
#define SRE_OP_LITERAL_IGNORE 19
#define SRE_OP_MARK 20
#define SRE_OP_MAX_UNTIL 21
#define SRE_OP_MIN_UNTIL 22
#define SRE_OP_NOT_LITERAL 23
#define SRE_OP_NOT_LITERAL_IGNORE 24
#define SRE_OP_NEGATE 25
#define SRE_OP_RANGE 26
#define SRE_OP_REPEAT 27
#define SRE_OP_REPEAT_ONE 28
#define SRE_OP_SUBPATTERN 29
#define SRE_OP_MIN_REPEAT_ONE 30
#define SRE_OP_GROUPREF_EXISTS 13
#define SRE_OP_GROUPREF_IGNORE 14
#define SRE_OP_IN 15
#define SRE_OP_IN_IGNORE 16
#define SRE_OP_INFO 17
#define SRE_OP_JUMP 18
#define SRE_OP_LITERAL 19
#define SRE_OP_LITERAL_IGNORE 20
#define SRE_OP_MARK 21
#define SRE_OP_MAX_UNTIL 22
#define SRE_OP_MIN_UNTIL 23
#define SRE_OP_NOT_LITERAL 24
#define SRE_OP_NOT_LITERAL_IGNORE 25
#define SRE_OP_NEGATE 26
#define SRE_OP_RANGE 27
#define SRE_OP_REPEAT 28
#define SRE_OP_REPEAT_ONE 29
#define SRE_OP_SUBPATTERN 30
#define SRE_OP_MIN_REPEAT_ONE 31
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
#define SRE_AT_BEGINNING_STRING 2
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment