Commit 02758d66 authored by Andrew Dalke's avatar Andrew Dalke

Eeked out another 3% or so performance in split whitespace by cleaning up the algorithm.

parent 1d2b0e3f
...@@ -1460,7 +1460,7 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; ...@@ -1460,7 +1460,7 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
else \ else \
Py_DECREF(str); Py_DECREF(str);
#define SPLIT_ADD(data, left, right) \ #define SPLIT_ADD(data, left, right) { \
str = PyString_FromStringAndSize((data) + (left), \ str = PyString_FromStringAndSize((data) + (left), \
(right) - (left)); \ (right) - (left)); \
if (str == NULL) \ if (str == NULL) \
...@@ -1475,11 +1475,16 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; ...@@ -1475,11 +1475,16 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
else \ else \
Py_DECREF(str); \ Py_DECREF(str); \
} \ } \
count++; count++; }
/* Always force the list to the expected size. */ /* Always force the list to the expected size. */
#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count; #define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count;
#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
static PyObject * static PyObject *
split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
{ {
...@@ -1490,23 +1495,22 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) ...@@ -1490,23 +1495,22 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
if (list == NULL) if (list == NULL)
return NULL; return NULL;
for (i = j = 0; i < len; ) { i = j = 0;
while (i < len && isspace(Py_CHARMASK(s[i])))
i++; while (maxsplit-- > 0) {
j = i; SKIP_SPACE(s, i, len);
while (i < len && !isspace(Py_CHARMASK(s[i]))) if (i==len) break;
i++; j = i; i++;
if (j < i) { SKIP_NONSPACE(s, i, len);
if (maxsplit-- <= 0) SPLIT_ADD(s, j, i);
break;
SPLIT_ADD(s, j, i);
while (i < len && isspace(Py_CHARMASK(s[i])))
i++;
j = i;
}
} }
if (j < len) {
SPLIT_ADD(s, j, len); if (i < len) {
/* Only occurs when maxsplit was reached */
/* Skip any remaining whitespace and copy to end of string */
SKIP_SPACE(s, i, len);
if (i != len)
SPLIT_ADD(s, i, len);
} }
FIX_PREALLOC_SIZE(list); FIX_PREALLOC_SIZE(list);
return list; return list;
...@@ -1680,23 +1684,22 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) ...@@ -1680,23 +1684,22 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
if (list == NULL) if (list == NULL)
return NULL; return NULL;
for (i = j = len - 1; i >= 0; ) { i = j = len-1;
while (i >= 0 && isspace(Py_CHARMASK(s[i])))
i--; while (maxsplit-- > 0) {
j = i; RSKIP_SPACE(s, i);
while (i >= 0 && !isspace(Py_CHARMASK(s[i]))) if (i<0) break;
i--; j = i; i--;
if (j > i) { RSKIP_NONSPACE(s, i);
if (maxsplit-- <= 0) SPLIT_ADD(s, i + 1, j + 1);
break; }
SPLIT_ADD(s, i + 1, j + 1); if (i >= 0) {
while (i >= 0 && isspace(Py_CHARMASK(s[i]))) /* Only occurs when maxsplit was reached */
i--; /* Skip any remaining whitespace and copy to beginning of string */
j = i; RSKIP_SPACE(s, i);
} if (i >= 0)
} SPLIT_ADD(s, 0, i + 1);
if (j >= 0) {
SPLIT_ADD(s, 0, j + 1);
} }
FIX_PREALLOC_SIZE(list); FIX_PREALLOC_SIZE(list);
if (PyList_Reverse(list) < 0) if (PyList_Reverse(list) < 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment