Commit 26015494 authored by Skip Montanaro's avatar Skip Montanaro

When splitting, avoid making a copy of the string if the split doesn't find

anything (issue 1538).
parent 54662460
...@@ -12,6 +12,9 @@ What's New in Python 2.6 alpha 1? ...@@ -12,6 +12,9 @@ What's New in Python 2.6 alpha 1?
Core and builtins Core and builtins
----------------- -----------------
- Issue #1538: Avoid copying string in split/rsplit if the split
char is not found.
- Issue #1553: An erroneous __length_hint__ can make list() raise a - Issue #1553: An erroneous __length_hint__ can make list() raise a
SystemError. SystemError.
......
...@@ -1403,8 +1403,9 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; ...@@ -1403,8 +1403,9 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; } #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Py_LOCAL_INLINE(PyObject *) Py_LOCAL_INLINE(PyObject *)
split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
{ {
const char *s = PyString_AS_STRING(self);
Py_ssize_t i, j, count=0; Py_ssize_t i, j, count=0;
PyObject *str; PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
...@@ -1419,6 +1420,13 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) ...@@ -1419,6 +1420,13 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
if (i==len) break; if (i==len) break;
j = i; i++; j = i; i++;
SKIP_NONSPACE(s, i, len); SKIP_NONSPACE(s, i, len);
if (j == 0 && i == len && PyString_CheckExact(self)) {
/* No whitespace in self, so just use it as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
break;
}
SPLIT_ADD(s, j, i); SPLIT_ADD(s, j, i);
} }
...@@ -1437,8 +1445,9 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) ...@@ -1437,8 +1445,9 @@ split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
} }
Py_LOCAL_INLINE(PyObject *) Py_LOCAL_INLINE(PyObject *)
split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{ {
const char *s = PyString_AS_STRING(self);
register Py_ssize_t i, j, count=0; register Py_ssize_t i, j, count=0;
PyObject *str; PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
...@@ -1457,7 +1466,13 @@ split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) ...@@ -1457,7 +1466,13 @@ split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
} }
} }
} }
if (i <= len) { if (i == 0 && count == 0 && PyString_CheckExact(self)) {
/* ch not in self, so just use self as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
}
else if (i <= len) {
SPLIT_ADD(s, i, len); SPLIT_ADD(s, i, len);
} }
FIX_PREALLOC_SIZE(list); FIX_PREALLOC_SIZE(list);
...@@ -1492,7 +1507,7 @@ string_split(PyStringObject *self, PyObject *args) ...@@ -1492,7 +1507,7 @@ string_split(PyStringObject *self, PyObject *args)
if (maxsplit < 0) if (maxsplit < 0)
maxsplit = PY_SSIZE_T_MAX; maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None) if (subobj == Py_None)
return split_whitespace(s, len, maxsplit); return split_whitespace(self, len, maxsplit);
if (PyString_Check(subobj)) { if (PyString_Check(subobj)) {
sub = PyString_AS_STRING(subobj); sub = PyString_AS_STRING(subobj);
n = PyString_GET_SIZE(subobj); n = PyString_GET_SIZE(subobj);
...@@ -1509,7 +1524,7 @@ string_split(PyStringObject *self, PyObject *args) ...@@ -1509,7 +1524,7 @@ string_split(PyStringObject *self, PyObject *args)
return NULL; return NULL;
} }
else if (n == 1) else if (n == 1)
return split_char(s, len, sub[0], maxsplit); return split_char(self, len, sub[0], maxsplit);
list = PyList_New(PREALLOC_SIZE(maxsplit)); list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL) if (list == NULL)
...@@ -1609,8 +1624,9 @@ string_rpartition(PyStringObject *self, PyObject *sep_obj) ...@@ -1609,8 +1624,9 @@ string_rpartition(PyStringObject *self, PyObject *sep_obj)
} }
Py_LOCAL_INLINE(PyObject *) Py_LOCAL_INLINE(PyObject *)
rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
{ {
const char *s = PyString_AS_STRING(self);
Py_ssize_t i, j, count=0; Py_ssize_t i, j, count=0;
PyObject *str; PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
...@@ -1625,6 +1641,13 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) ...@@ -1625,6 +1641,13 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
if (i<0) break; if (i<0) break;
j = i; i--; j = i; i--;
RSKIP_NONSPACE(s, i); RSKIP_NONSPACE(s, i);
if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
/* No whitespace in self, so just use it as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
break;
}
SPLIT_ADD(s, i + 1, j + 1); SPLIT_ADD(s, i + 1, j + 1);
} }
if (i >= 0) { if (i >= 0) {
...@@ -1645,8 +1668,9 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) ...@@ -1645,8 +1668,9 @@ rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
} }
Py_LOCAL_INLINE(PyObject *) Py_LOCAL_INLINE(PyObject *)
rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
{ {
const char *s = PyString_AS_STRING(self);
register Py_ssize_t i, j, count=0; register Py_ssize_t i, j, count=0;
PyObject *str; PyObject *str;
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
...@@ -1664,7 +1688,13 @@ rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) ...@@ -1664,7 +1688,13 @@ rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
} }
} }
} }
if (j >= -1) { if (i < 0 && count == 0 && PyString_CheckExact(self)) {
/* ch not in self, so just use self as list[0] */
Py_INCREF(self);
PyList_SET_ITEM(list, 0, (PyObject *)self);
count++;
}
else if (j >= -1) {
SPLIT_ADD(s, 0, j + 1); SPLIT_ADD(s, 0, j + 1);
} }
FIX_PREALLOC_SIZE(list); FIX_PREALLOC_SIZE(list);
...@@ -1691,7 +1721,7 @@ string_rsplit(PyStringObject *self, PyObject *args) ...@@ -1691,7 +1721,7 @@ string_rsplit(PyStringObject *self, PyObject *args)
{ {
Py_ssize_t len = PyString_GET_SIZE(self), n, i, j; Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Py_ssize_t maxsplit = -1, count=0; Py_ssize_t maxsplit = -1, count=0;
const char *s = PyString_AS_STRING(self), *sub; const char *s, *sub;
PyObject *list, *str, *subobj = Py_None; PyObject *list, *str, *subobj = Py_None;
if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
...@@ -1699,7 +1729,7 @@ string_rsplit(PyStringObject *self, PyObject *args) ...@@ -1699,7 +1729,7 @@ string_rsplit(PyStringObject *self, PyObject *args)
if (maxsplit < 0) if (maxsplit < 0)
maxsplit = PY_SSIZE_T_MAX; maxsplit = PY_SSIZE_T_MAX;
if (subobj == Py_None) if (subobj == Py_None)
return rsplit_whitespace(s, len, maxsplit); return rsplit_whitespace(self, len, maxsplit);
if (PyString_Check(subobj)) { if (PyString_Check(subobj)) {
sub = PyString_AS_STRING(subobj); sub = PyString_AS_STRING(subobj);
n = PyString_GET_SIZE(subobj); n = PyString_GET_SIZE(subobj);
...@@ -1716,7 +1746,7 @@ string_rsplit(PyStringObject *self, PyObject *args) ...@@ -1716,7 +1746,7 @@ string_rsplit(PyStringObject *self, PyObject *args)
return NULL; return NULL;
} }
else if (n == 1) else if (n == 1)
return rsplit_char(s, len, sub[0], maxsplit); return rsplit_char(self, len, sub[0], maxsplit);
list = PyList_New(PREALLOC_SIZE(maxsplit)); list = PyList_New(PREALLOC_SIZE(maxsplit));
if (list == NULL) if (list == NULL)
...@@ -1725,6 +1755,7 @@ string_rsplit(PyStringObject *self, PyObject *args) ...@@ -1725,6 +1755,7 @@ string_rsplit(PyStringObject *self, PyObject *args)
j = len; j = len;
i = j - n; i = j - n;
s = PyString_AS_STRING(self);
while ( (i >= 0) && (maxsplit-- > 0) ) { while ( (i >= 0) && (maxsplit-- > 0) ) {
for (; i>=0; i--) { for (; i>=0; i--) {
if (Py_STRING_MATCH(s, i, sub, n)) { if (Py_STRING_MATCH(s, i, sub, n)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment