Commit 730a9e57 authored by Fredrik Lundh's avatar Fredrik Lundh

another major speedup: let sre.sub/subn check for escapes in the

template string, and don't call the template compiler if we can
avoid it.
parent 8ed6e497
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
* 2001-10-18 fl fixed group reset issue (from Matthew Mueller) * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
* 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
* 2001-10-21 fl added sub/subn primitive * 2001-10-21 fl added sub/subn primitive
* 2001-10-22 fl check for literal sub/subn templates
* *
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
* *
...@@ -359,6 +360,7 @@ mark_restore(SRE_STATE* state, int lo, int hi) ...@@ -359,6 +360,7 @@ mark_restore(SRE_STATE* state, int lo, int hi)
#define SRE_INFO sre_info #define SRE_INFO sre_info
#define SRE_MATCH sre_match #define SRE_MATCH sre_match
#define SRE_SEARCH sre_search #define SRE_SEARCH sre_search
#define SRE_LITERAL_TEMPLATE sre_literal_template
#if defined(HAVE_UNICODE) #if defined(HAVE_UNICODE)
...@@ -366,6 +368,7 @@ mark_restore(SRE_STATE* state, int lo, int hi) ...@@ -366,6 +368,7 @@ mark_restore(SRE_STATE* state, int lo, int hi)
#include "_sre.c" #include "_sre.c"
#undef SRE_RECURSIVE #undef SRE_RECURSIVE
#undef SRE_LITERAL_TEMPLATE
#undef SRE_SEARCH #undef SRE_SEARCH
#undef SRE_MATCH #undef SRE_MATCH
#undef SRE_INFO #undef SRE_INFO
...@@ -383,6 +386,7 @@ mark_restore(SRE_STATE* state, int lo, int hi) ...@@ -383,6 +386,7 @@ mark_restore(SRE_STATE* state, int lo, int hi)
#define SRE_INFO sre_uinfo #define SRE_INFO sre_uinfo
#define SRE_MATCH sre_umatch #define SRE_MATCH sre_umatch
#define SRE_SEARCH sre_usearch #define SRE_SEARCH sre_usearch
#define SRE_LITERAL_TEMPLATE sre_uliteral_template
#endif #endif
#endif /* SRE_RECURSIVE */ #endif /* SRE_RECURSIVE */
...@@ -1282,6 +1286,15 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) ...@@ -1282,6 +1286,15 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
return status; return status;
} }
LOCAL(int)
SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
{
/* check if given string is a literal template (i.e. no escapes) */
while (len-- > 0)
if (*ptr++ == '\\')
return 0;
return 1;
}
#if !defined(SRE_RECURSIVE) #if !defined(SRE_RECURSIVE)
...@@ -1388,27 +1401,24 @@ state_reset(SRE_STATE* state) ...@@ -1388,27 +1401,24 @@ state_reset(SRE_STATE* state)
mark_fini(state); mark_fini(state);
} }
LOCAL(PyObject*) static void*
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, getstring(PyObject* string, int* p_length, int* p_charsize)
int start, int end)
{ {
/* prepare state object */ /* given a python object, return a data pointer, a length (in
characters), and a character size. return NULL if the object
is not a string (or not compatible) */
PyBufferProcs *buffer; PyBufferProcs *buffer;
int size, bytes; int size, bytes, charsize;
void* ptr; void* ptr;
memset(state, 0, sizeof(SRE_STATE));
state->lastindex = -1;
#if defined(HAVE_UNICODE) #if defined(HAVE_UNICODE)
if (PyUnicode_Check(string)) { if (PyUnicode_Check(string)) {
/* unicode strings doesn't always support the buffer interface */ /* unicode strings doesn't always support the buffer interface */
ptr = (void*) PyUnicode_AS_DATA(string); ptr = (void*) PyUnicode_AS_DATA(string);
bytes = PyUnicode_GET_DATA_SIZE(string); bytes = PyUnicode_GET_DATA_SIZE(string);
size = PyUnicode_GET_SIZE(string); size = PyUnicode_GET_SIZE(string);
state->charsize = sizeof(Py_UNICODE); charsize = sizeof(Py_UNICODE);
} else { } else {
#endif #endif
...@@ -1436,10 +1446,10 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, ...@@ -1436,10 +1446,10 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
#endif #endif
if (PyString_Check(string) || bytes == size) if (PyString_Check(string) || bytes == size)
state->charsize = 1; charsize = 1;
#if defined(HAVE_UNICODE) #if defined(HAVE_UNICODE)
else if (bytes == (int) (size * sizeof(Py_UNICODE))) else if (bytes == (int) (size * sizeof(Py_UNICODE)))
state->charsize = sizeof(Py_UNICODE); charsize = sizeof(Py_UNICODE);
#endif #endif
else { else {
PyErr_SetString(PyExc_TypeError, "buffer size mismatch"); PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
...@@ -1450,16 +1460,42 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, ...@@ -1450,16 +1460,42 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
} }
#endif #endif
*p_length = size;
*p_charsize = charsize;
return ptr;
}
LOCAL(PyObject*)
state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
int start, int end)
{
/* prepare state object */
int length;
int charsize;
void* ptr;
memset(state, 0, sizeof(SRE_STATE));
state->lastindex = -1;
ptr = getstring(string, &length, &charsize);
if (!ptr)
return NULL;
/* adjust boundaries */ /* adjust boundaries */
if (start < 0) if (start < 0)
start = 0; start = 0;
else if (start > size) else if (start > length)
start = size; start = length;
if (end < 0) if (end < 0)
end = 0; end = 0;
else if (end > size) else if (end > length)
end = size; end = length;
state->charsize = charsize;
state->beginning = ptr; state->beginning = ptr;
...@@ -2038,6 +2074,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string, ...@@ -2038,6 +2074,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
PyObject* filter; PyObject* filter;
PyObject* args; PyObject* args;
PyObject* match; PyObject* match;
void* ptr;
int status; int status;
int n; int n;
int i, b, e; int i, b, e;
...@@ -2049,8 +2086,27 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string, ...@@ -2049,8 +2086,27 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
Py_INCREF(filter); Py_INCREF(filter);
filter_is_callable = 1; filter_is_callable = 1;
} else { } else {
/* if not callable, call the template compiler. it may return /* if not callable, check if it's a literal string */
either a filter function or a literal string */ int literal;
ptr = getstring(template, &n, &b);
if (ptr) {
if (b == 1) {
literal = sre_literal_template(ptr, n);
} else {
#if defined(HAVE_UNICODE)
literal = sre_uliteral_template(ptr, n);
#endif
}
} else {
PyErr_Clear();
literal = 0;
}
if (literal) {
filter = template;
Py_INCREF(filter);
filter_is_callable = 0;
} else {
/* not a literal; hand it over to the template compiler */
filter = call( filter = call(
SRE_MODULE, "_subx", SRE_MODULE, "_subx",
Py_BuildValue("OO", self, template) Py_BuildValue("OO", self, template)
...@@ -2059,6 +2115,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string, ...@@ -2059,6 +2115,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
return NULL; return NULL;
filter_is_callable = PyCallable_Check(filter); filter_is_callable = PyCallable_Check(filter);
} }
}
string = state_init(&state, self, string, 0, INT_MAX); string = state_init(&state, self, string, 0, INT_MAX);
if (!string) if (!string)
...@@ -2132,10 +2189,12 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string, ...@@ -2132,10 +2189,12 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
} }
/* add to list */ /* add to list */
if (item != Py_None) {
status = PyList_Append(list, item); status = PyList_Append(list, item);
Py_DECREF(item); Py_DECREF(item);
if (status < 0) if (status < 0)
goto error; goto error;
}
i = e; i = e;
n = n + 1; n = n + 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment