Commit 03d6ee38 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #18684: Fixed reading out of the buffer in the re module.

parent 0357268d
...@@ -66,6 +66,8 @@ Core and Builtins ...@@ -66,6 +66,8 @@ Core and Builtins
Library Library
------- -------
- Issue #18684: Fixed reading out of the buffer in the re module.
- Issue #24259: tarfile now raises a ReadError if an archive is truncated - Issue #24259: tarfile now raises a ReadError if an archive is truncated
inside a data segment. inside a data segment.
......
...@@ -883,7 +883,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw) ...@@ -883,7 +883,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
} }
if (state.start == state.ptr) { if (state.start == state.ptr) {
if (last == state.end) if (last == state.end || state.ptr == state.end)
break; break;
/* skip one character */ /* skip one character */
state.start = (void*) ((char*) state.ptr + state.charsize); state.start = (void*) ((char*) state.ptr + state.charsize);
...@@ -1081,6 +1081,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, ...@@ -1081,6 +1081,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
next: next:
/* move on */ /* move on */
if (state.ptr == state.end)
break;
if (state.ptr == state.start) if (state.ptr == state.start)
state.start = (void*) ((char*) state.ptr + state.charsize); state.start = (void*) ((char*) state.ptr + state.charsize);
else else
...@@ -2567,6 +2569,9 @@ scanner_match(ScannerObject* self, PyObject *unused) ...@@ -2567,6 +2569,9 @@ scanner_match(ScannerObject* self, PyObject *unused)
PyObject* match; PyObject* match;
Py_ssize_t status; Py_ssize_t status;
if (state->start == NULL)
Py_RETURN_NONE;
state_reset(state); state_reset(state);
state->ptr = state->start; state->ptr = state->start;
...@@ -2578,10 +2583,14 @@ scanner_match(ScannerObject* self, PyObject *unused) ...@@ -2578,10 +2583,14 @@ scanner_match(ScannerObject* self, PyObject *unused)
match = pattern_new_match((PatternObject*) self->pattern, match = pattern_new_match((PatternObject*) self->pattern,
state, status); state, status);
if (status == 0 || state->ptr == state->start) if (status == 0)
state->start = NULL;
else if (state->ptr != state->start)
state->start = state->ptr;
else if (state->ptr != state->end)
state->start = (void*) ((char*) state->ptr + state->charsize); state->start = (void*) ((char*) state->ptr + state->charsize);
else else
state->start = state->ptr; state->start = NULL;
return match; return match;
} }
...@@ -2594,6 +2603,9 @@ scanner_search(ScannerObject* self, PyObject *unused) ...@@ -2594,6 +2603,9 @@ scanner_search(ScannerObject* self, PyObject *unused)
PyObject* match; PyObject* match;
Py_ssize_t status; Py_ssize_t status;
if (state->start == NULL)
Py_RETURN_NONE;
state_reset(state); state_reset(state);
state->ptr = state->start; state->ptr = state->start;
...@@ -2605,10 +2617,14 @@ scanner_search(ScannerObject* self, PyObject *unused) ...@@ -2605,10 +2617,14 @@ scanner_search(ScannerObject* self, PyObject *unused)
match = pattern_new_match((PatternObject*) self->pattern, match = pattern_new_match((PatternObject*) self->pattern,
state, status); state, status);
if (status == 0 || state->ptr == state->start) if (status == 0)
state->start = NULL;
else if (state->ptr != state->start)
state->start = state->ptr;
else if (state->ptr != state->end)
state->start = (void*) ((char*) state->ptr + state->charsize); state->start = (void*) ((char*) state->ptr + state->charsize);
else else
state->start = state->ptr; state->start = NULL;
return match; return match;
} }
......
...@@ -30,7 +30,7 @@ SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) ...@@ -30,7 +30,7 @@ SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
SRE_IS_LINEBREAK((int) ptr[-1])); SRE_IS_LINEBREAK((int) ptr[-1]));
case SRE_AT_END: case SRE_AT_END:
return (((void*) (ptr+1) == state->end && return (((SRE_CHAR *)state->end - ptr == 1 &&
SRE_IS_LINEBREAK((int) ptr[0])) || SRE_IS_LINEBREAK((int) ptr[0])) ||
((void*) ptr == state->end)); ((void*) ptr == state->end));
...@@ -1093,9 +1093,9 @@ entrance: ...@@ -1093,9 +1093,9 @@ entrance:
/* <ASSERT> <skip> <back> <pattern> */ /* <ASSERT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[1])); ctx->ptr, ctx->pattern[1]));
state->ptr = ctx->ptr - ctx->pattern[1]; if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
if (state->ptr < state->beginning)
RETURN_FAILURE; RETURN_FAILURE;
state->ptr = ctx->ptr - ctx->pattern[1];
DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
RETURN_ON_FAILURE(ret); RETURN_ON_FAILURE(ret);
ctx->pattern += ctx->pattern[0]; ctx->pattern += ctx->pattern[0];
...@@ -1106,8 +1106,8 @@ entrance: ...@@ -1106,8 +1106,8 @@ entrance:
/* <ASSERT_NOT> <skip> <back> <pattern> */ /* <ASSERT_NOT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
ctx->ptr, ctx->pattern[1])); ctx->ptr, ctx->pattern[1]));
state->ptr = ctx->ptr - ctx->pattern[1]; if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
if (state->ptr >= state->beginning) { state->ptr = ctx->ptr - ctx->pattern[1];
DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
if (ret) { if (ret) {
RETURN_ON_ERROR(ret); RETURN_ON_ERROR(ret);
...@@ -1199,12 +1199,20 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) ...@@ -1199,12 +1199,20 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
SRE_CODE* overlap = NULL; SRE_CODE* overlap = NULL;
int flags = 0; int flags = 0;
if (ptr > end)
return 0;
if (pattern[0] == SRE_OP_INFO) { if (pattern[0] == SRE_OP_INFO) {
/* optimization info block */ /* optimization info block */
/* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
flags = pattern[2]; flags = pattern[2];
if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
TRACE(("reject (got %u chars, need %u)\n",
(unsigned int)(end - ptr), pattern[3]));
return 0;
}
if (pattern[3] > 1) { if (pattern[3] > 1) {
/* adjust end point (but make sure we leave at least one /* adjust end point (but make sure we leave at least one
character in there, so literal search will work) */ character in there, so literal search will work) */
...@@ -1322,15 +1330,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) ...@@ -1322,15 +1330,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
break; break;
ptr++; ptr++;
} }
} else } else {
/* general case */ /* general case */
while (ptr <= end) { assert(ptr <= end);
while (1) {
TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
state->start = state->ptr = ptr++; state->start = state->ptr = ptr;
status = SRE(match)(state, pattern, 0); status = SRE(match)(state, pattern, 0);
if (status != 0) if (status != 0 || ptr >= end)
break; break;
ptr++;
} }
}
return status; return status;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment