Commit dda6696b authored by Guido van Rossum's avatar Guido van Rossum

AMK's revised version of the previous patch.

parent 0ef1b079
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
*************************************************/ *************************************************/
#define PCRE_VERSION "1.07 16-Feb-1998" #define PCRE_VERSION "1.09 28-Apr-1998"
/* This is a library of functions to support regular expressions whose syntax /* This is a library of functions to support regular expressions whose syntax
...@@ -80,11 +80,12 @@ only some permitted at run or study time. */ ...@@ -80,11 +80,12 @@ only some permitted at run or study time. */
#ifdef FOR_PYTHON #ifdef FOR_PYTHON
#define PUBLIC_OPTIONS \ #define PUBLIC_OPTIONS \
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_LOCALE) PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY| \
PCRE_LOCALE)
#else #else
#define PUBLIC_OPTIONS \ #define PUBLIC_OPTIONS \
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA) PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY)
#endif #endif
#define PUBLIC_EXEC_OPTIONS \ #define PUBLIC_EXEC_OPTIONS \
(PCRE_CASELESS|PCRE_ANCHORED|PCRE_MULTILINE|PCRE_NOTBOL|PCRE_NOTEOL| \ (PCRE_CASELESS|PCRE_ANCHORED|PCRE_MULTILINE|PCRE_NOTBOL|PCRE_NOTEOL| \
......
...@@ -34,6 +34,7 @@ extern "C" { ...@@ -34,6 +34,7 @@ extern "C" {
#define PCRE_EXTRA 0x0040 #define PCRE_EXTRA 0x0040
#define PCRE_NOTBOL 0x0080 #define PCRE_NOTBOL 0x0080
#define PCRE_NOTEOL 0x0100 #define PCRE_NOTEOL 0x0100
#define PCRE_UNGREEDY 0x0400
#ifdef FOR_PYTHON #ifdef FOR_PYTHON
#define PCRE_LOCALE 0x0200 #define PCRE_LOCALE 0x0200
#endif #endif
......
...@@ -1216,6 +1216,7 @@ compile_branch(int options, int *brackets, uschar **codeptr, ...@@ -1216,6 +1216,7 @@ compile_branch(int options, int *brackets, uschar **codeptr,
int repeat_type, op_type; int repeat_type, op_type;
int repeat_min, repeat_max; int repeat_min, repeat_max;
int bravalue, length; int bravalue, length;
int greedy_default, greedy_non_default;
register int c; register int c;
register uschar *code = *codeptr; register uschar *code = *codeptr;
const uschar *ptr = *ptrptr; const uschar *ptr = *ptrptr;
...@@ -1224,6 +1225,11 @@ uschar *previous = NULL; ...@@ -1224,6 +1225,11 @@ uschar *previous = NULL;
uschar class[32]; uschar class[32];
uschar *class_flag; /* Pointer to the single-byte flag for OP_CLASS_L */ uschar *class_flag; /* Pointer to the single-byte flag for OP_CLASS_L */
/* Set up the default and non-default settings for greediness */
greedy_default = ((options & PCRE_UNGREEDY) != 0);
greedy_non_default = greedy_default ^ 1;
/* Switch on next character until the end of the branch */ /* Switch on next character until the end of the branch */
for (;; ptr++) for (;; ptr++)
...@@ -1536,10 +1542,13 @@ for (;; ptr++) ...@@ -1536,10 +1542,13 @@ for (;; ptr++)
goto FAILED; goto FAILED;
} }
/* If the next character is '?' this is a minimizing repeat. Advance to the /* If the next character is '?' this is a minimizing repeat, by default,
but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
next character. */ next character. */
if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0; if (ptr[1] == '?')
{ repeat_type = greedy_non_default; ptr++; }
else repeat_type = greedy_default;
/* If the maximum is zero then the minimum must also be zero; Perl allows /* If the maximum is zero then the minimum must also be zero; Perl allows
this case, so we do too - by simply omitting the item altogether. */ this case, so we do too - by simply omitting the item altogether. */
...@@ -1628,14 +1637,20 @@ for (;; ptr++) ...@@ -1628,14 +1637,20 @@ for (;; ptr++)
/* If the mininum is 1 and the previous item was a character string, /* If the mininum is 1 and the previous item was a character string,
we either have to put back the item that got cancelled if the string we either have to put back the item that got cancelled if the string
length was 1, or add the character back onto the end of a longer length was 1, or add the character back onto the end of a longer
string. For a character type nothing need be done; it will just get put string. For a character type nothing need be done; it will just get
back naturally. */ put back naturally. Note that the final character is always going to
get added below. */
else if (*previous == OP_CHARS) else if (*previous == OP_CHARS)
{ {
if (code == previous) code += 2; else previous[1]++; if (code == previous) code += 2; else previous[1]++;
} }
/* For a single negated character we also have to put back the
item that got cancelled. */
else if (*previous == OP_NOT) code++;
/* If the maximum is unlimited, insert an OP_STAR. */ /* If the maximum is unlimited, insert an OP_STAR. */
if (repeat_max < 0) if (repeat_max < 0)
...@@ -2484,7 +2499,7 @@ while ((c = *(++ptr)) != 0) ...@@ -2484,7 +2499,7 @@ while ((c = *(++ptr)) != 0)
ptr += 2; ptr += 2;
break; break;
} }
/* Else fall thourh */ /* Else fall through */
/* Else loop setting valid options until ) is met. Anything else is an /* Else loop setting valid options until ) is met. Anything else is an
error. */ error. */
...@@ -2725,14 +2740,15 @@ printf("Length = %d top_bracket = %d top_backref=%d\n", ...@@ -2725,14 +2740,15 @@ printf("Length = %d top_bracket = %d top_backref=%d\n",
if (re->options != 0) if (re->options != 0)
{ {
printf("%s%s%s%s%s%s%s\n", printf("%s%s%s%s%s%s%s%s\n",
((re->options & PCRE_ANCHORED) != 0)? "anchored " : "", ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
((re->options & PCRE_CASELESS) != 0)? "caseless " : "", ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
((re->options & PCRE_EXTENDED) != 0)? "extended " : "", ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
((re->options & PCRE_MULTILINE) != 0)? "multiline " : "", ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
((re->options & PCRE_DOTALL) != 0)? "dotall " : "", ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "", ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
((re->options & PCRE_EXTRA) != 0)? "extra " : ""); ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
} }
if ((re->options & PCRE_FIRSTSET) != 0) if ((re->options & PCRE_FIRSTSET) != 0)
...@@ -3070,7 +3086,7 @@ static int grow_stack(match_data *md) ...@@ -3070,7 +3086,7 @@ static int grow_stack(match_data *md)
if (md->offset_top == NULL || md->eptr == NULL || md->ecode == NULL || if (md->offset_top == NULL || md->eptr == NULL || md->ecode == NULL ||
md->off_num == NULL || md->r1 == NULL || md->r2 == NULL) md->off_num == NULL || md->r1 == NULL || md->r2 == NULL)
{ {
PyErr_SetString(PyExc_MemoryError, "Can't increase failure stack for re operation"); PyErr_NoMemory();
longjmp(md->error_env, 1); longjmp(md->error_env, 1);
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment