Commit 75c00efc authored by Hye-Shik Chang's avatar Hye-Shik Chang

[SF #866875] Add a specialized routine for one character

separaters on str.split() and str.rsplit().
parent cb2117a8
...@@ -175,41 +175,82 @@ class CommonTest(unittest.TestCase): ...@@ -175,41 +175,82 @@ class CommonTest(unittest.TestCase):
def test_split(self): def test_split(self):
self.checkequal(['this', 'is', 'the', 'split', 'function'], self.checkequal(['this', 'is', 'the', 'split', 'function'],
'this is the split function', 'split') 'this is the split function', 'split')
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|')
self.checkequal(['a', 'b', 'c|d'], 'a|b|c|d', 'split', '|', 2) # by whitespace
self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'split')
self.checkequal(['a', 'b c d'], 'a b c d', 'split', None, 1) self.checkequal(['a', 'b c d'], 'a b c d', 'split', None, 1)
self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2) self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2)
self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 3) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 3)
self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 4) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 4)
self.checkequal(['a b c d'], 'a b c d', 'split', None, 0) self.checkequal(['a b c d'], 'a b c d', 'split', None, 0)
self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2) self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2)
self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'split')
# by a char
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|')
self.checkequal(['a', 'b|c|d'], 'a|b|c|d', 'split', '|', 1)
self.checkequal(['a', 'b', 'c|d'], 'a|b|c|d', 'split', '|', 2)
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 3)
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 4)
self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0)
self.checkequal(['a', '', 'b||c||d'], 'a||b||c||d', 'split', '|', 2)
self.checkequal(['endcase ', ''], 'endcase |', 'split', '|')
self.checkequal(['a', '', 'b\x00c\x00d'], 'a\x00\x00b\x00c\x00d', 'split', '\x00', 2)
# by string
self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//') self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
self.checkequal(['a', 'b//c//d'], 'a//b//c//d', 'split', '//', 1)
self.checkequal(['a', 'b', 'c//d'], 'a//b//c//d', 'split', '//', 2)
self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 3)
self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 4)
self.checkequal(['a//b//c//d'], 'a//b//c//d', 'split', '//', 0)
self.checkequal(['a', '', 'b////c////d'], 'a////b////c////d', 'split', '//', 2)
self.checkequal(['endcase ', ''], 'endcase test', 'split', 'test') self.checkequal(['endcase ', ''], 'endcase test', 'split', 'test')
# mixed use of str and unicode
self.checkequal([u'a', u'b', u'c d'], 'a b c d', 'split', u' ', 2)
# argument type
self.checkraises(TypeError, 'hello', 'split', 42, 42, 42) self.checkraises(TypeError, 'hello', 'split', 42, 42, 42)
def test_rsplit(self): def test_rsplit(self):
self.checkequal(['this', 'is', 'the', 'rsplit', 'function'], self.checkequal(['this', 'is', 'the', 'rsplit', 'function'],
'this is the rsplit function', 'rsplit') 'this is the rsplit function', 'rsplit')
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|')
self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2) # by whitespace
self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'rsplit')
self.checkequal(['a b c', 'd'], 'a b c d', 'rsplit', None, 1) self.checkequal(['a b c', 'd'], 'a b c d', 'rsplit', None, 1)
self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2) self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2)
self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 3) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 3)
self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 4) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 4)
self.checkequal(['a b c d'], 'a b c d', 'rsplit', None, 0) self.checkequal(['a b c d'], 'a b c d', 'rsplit', None, 0)
self.checkequal(['a, b, c', 'd'], 'a, b, c, d', 'rsplit', ', ', 1)
self.checkequal(['a, b', 'c', 'd'], 'a, b, c, d', 'rsplit', ', ', 2)
self.checkequal(['a', 'b', 'c', 'd'], 'a, b, c, d', 'rsplit', ', ', 3)
self.checkequal(['a', 'b', 'c', 'd'], 'a, b, c, d', 'rsplit', ', ', 4)
self.checkequal(['a, b, c, d'], 'a, b, c, d', 'rsplit', ', ', 0)
self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2) self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2)
self.checkequal(['a\x00b', 'c'], 'a\x00b\x00c', 'rsplit', '\x00', 1)
self.checkequal(['', ''], 'abcd', 'rsplit', 'abcd') # by a char
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|')
self.checkequal(['a|b|c', 'd'], 'a|b|c|d', 'rsplit', '|', 1)
self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2)
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 3)
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 4)
self.checkequal(['a|b|c|d'], 'a|b|c|d', 'rsplit', '|', 0)
self.checkequal(['a||b||c', '', 'd'], 'a||b||c||d', 'rsplit', '|', 2)
self.checkequal(['', ' begincase'], '| begincase', 'rsplit', '|')
self.checkequal(['a\x00\x00b', 'c', 'd'], 'a\x00\x00b\x00c\x00d', 'rsplit', '\x00', 2)
# by string
self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//')
self.checkequal(['a//b//c', 'd'], 'a//b//c//d', 'rsplit', '//', 1)
self.checkequal(['a//b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 2)
self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 3)
self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 4)
self.checkequal(['a//b//c//d'], 'a//b//c//d', 'rsplit', '//', 0)
self.checkequal(['a////b////c', '', 'd'], 'a////b////c////d', 'rsplit', '//', 2)
self.checkequal(['', ' begincase'], 'test begincase', 'rsplit', 'test')
# mixed use of str and unicode
self.checkequal([u'a b', u'c', u'd'], 'a b c d', 'rsplit', u' ', 2) self.checkequal([u'a b', u'c', u'd'], 'a b c d', 'rsplit', u' ', 2)
self.checkequal(['', ' endcase'], '| endcase', 'rsplit', '|')
self.checkequal(['', ' endcase'], 'test endcase', 'rsplit', 'test') # argument type
self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42)
def test_strip(self): def test_strip(self):
self.checkequal('hello', ' hello ', 'strip') self.checkequal('hello', ' hello ', 'strip')
......
...@@ -1282,12 +1282,35 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; ...@@ -1282,12 +1282,35 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3) #define STRIPNAME(i) (stripformat[i]+3)
#define SPLIT_APPEND(data, left, right) \
str = PyString_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
#define SPLIT_INSERT(data, left, right) \
str = PyString_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Insert(list, 0, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
static PyObject * static PyObject *
split_whitespace(const char *s, int len, int maxsplit) split_whitespace(const char *s, int len, int maxsplit)
{ {
int i, j, err; int i, j;
PyObject* item; PyObject *str;
PyObject *list = PyList_New(0); PyObject *list = PyList_New(0);
if (list == NULL) if (list == NULL)
...@@ -1302,33 +1325,49 @@ split_whitespace(const char *s, int len, int maxsplit) ...@@ -1302,33 +1325,49 @@ split_whitespace(const char *s, int len, int maxsplit)
if (j < i) { if (j < i) {
if (maxsplit-- <= 0) if (maxsplit-- <= 0)
break; break;
item = PyString_FromStringAndSize(s+j, (int)(i-j)); SPLIT_APPEND(s, j, i);
if (item == NULL)
goto finally;
err = PyList_Append(list, item);
Py_DECREF(item);
if (err < 0)
goto finally;
while (i < len && isspace(Py_CHARMASK(s[i]))) while (i < len && isspace(Py_CHARMASK(s[i])))
i++; i++;
j = i; j = i;
} }
} }
if (j < len) { if (j < len) {
item = PyString_FromStringAndSize(s+j, (int)(len - j)); SPLIT_APPEND(s, j, len);
if (item == NULL)
goto finally;
err = PyList_Append(list, item);
Py_DECREF(item);
if (err < 0)
goto finally;
} }
return list; return list;
finally: onError:
Py_DECREF(list); Py_DECREF(list);
return NULL; return NULL;
} }
static PyObject *
split_char(const char *s, int len, char ch, int maxcount)
{
register int i, j;
PyObject *str;
PyObject *list = PyList_New(0);
if (list == NULL)
return NULL;
for (i = j = 0; i < len; ) {
if (s[i] == ch) {
if (maxcount-- <= 0)
break;
SPLIT_APPEND(s, j, i);
i = j = i + 1;
} else
i++;
}
if (j <= len) {
SPLIT_APPEND(s, j, len);
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
PyDoc_STRVAR(split__doc__, PyDoc_STRVAR(split__doc__,
"S.split([sep [,maxsplit]]) -> list of strings\n\ "S.split([sep [,maxsplit]]) -> list of strings\n\
...@@ -1362,10 +1401,13 @@ string_split(PyStringObject *self, PyObject *args) ...@@ -1362,10 +1401,13 @@ string_split(PyStringObject *self, PyObject *args)
#endif #endif
else if (PyObject_AsCharBuffer(subobj, &sub, &n)) else if (PyObject_AsCharBuffer(subobj, &sub, &n))
return NULL; return NULL;
if (n == 0) { if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator"); PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL; return NULL;
} }
else if (n == 1)
return split_char(s, len, sub[0], maxsplit);
list = PyList_New(0); list = PyList_New(0);
if (list == NULL) if (list == NULL)
...@@ -1406,8 +1448,8 @@ string_split(PyStringObject *self, PyObject *args) ...@@ -1406,8 +1448,8 @@ string_split(PyStringObject *self, PyObject *args)
static PyObject * static PyObject *
rsplit_whitespace(const char *s, int len, int maxsplit) rsplit_whitespace(const char *s, int len, int maxsplit)
{ {
int i, j, err; int i, j;
PyObject* item; PyObject *str;
PyObject *list = PyList_New(0); PyObject *list = PyList_New(0);
if (list == NULL) if (list == NULL)
...@@ -1422,33 +1464,49 @@ rsplit_whitespace(const char *s, int len, int maxsplit) ...@@ -1422,33 +1464,49 @@ rsplit_whitespace(const char *s, int len, int maxsplit)
if (j > i) { if (j > i) {
if (maxsplit-- <= 0) if (maxsplit-- <= 0)
break; break;
item = PyString_FromStringAndSize(s+i+1, (int)(j-i)); SPLIT_INSERT(s, i + 1, j + 1);
if (item == NULL)
goto finally;
err = PyList_Insert(list, 0, item);
Py_DECREF(item);
if (err < 0)
goto finally;
while (i >= 0 && isspace(Py_CHARMASK(s[i]))) while (i >= 0 && isspace(Py_CHARMASK(s[i])))
i--; i--;
j = i; j = i;
} }
} }
if (j >= 0) { if (j >= 0) {
item = PyString_FromStringAndSize(s, (int)(j + 1)); SPLIT_INSERT(s, 0, j + 1);
if (item == NULL)
goto finally;
err = PyList_Insert(list, 0, item);
Py_DECREF(item);
if (err < 0)
goto finally;
} }
return list; return list;
finally: onError:
Py_DECREF(list); Py_DECREF(list);
return NULL; return NULL;
} }
static PyObject *
rsplit_char(const char *s, int len, char ch, int maxcount)
{
register int i, j;
PyObject *str;
PyObject *list = PyList_New(0);
if (list == NULL)
return NULL;
for (i = j = len - 1; i >= 0; ) {
if (s[i] == ch) {
if (maxcount-- <= 0)
break;
SPLIT_INSERT(s, i + 1, j + 1);
j = i = i - 1;
} else
i--;
}
if (j >= -1) {
SPLIT_INSERT(s, 0, j + 1);
}
return list;
onError:
Py_DECREF(list);
return NULL;
}
PyDoc_STRVAR(rsplit__doc__, PyDoc_STRVAR(rsplit__doc__,
"S.rsplit([sep [,maxsplit]]) -> list of strings\n\ "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
...@@ -1483,10 +1541,13 @@ string_rsplit(PyStringObject *self, PyObject *args) ...@@ -1483,10 +1541,13 @@ string_rsplit(PyStringObject *self, PyObject *args)
#endif #endif
else if (PyObject_AsCharBuffer(subobj, &sub, &n)) else if (PyObject_AsCharBuffer(subobj, &sub, &n))
return NULL; return NULL;
if (n == 0) { if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator"); PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL; return NULL;
} }
else if (n == 1)
return rsplit_char(s, len, sub[0], maxsplit);
list = PyList_New(0); list = PyList_New(0);
if (list == NULL) if (list == NULL)
...@@ -3104,17 +3165,6 @@ Return a list of the lines in S, breaking at line boundaries.\n\ ...@@ -3104,17 +3165,6 @@ Return a list of the lines in S, breaking at line boundaries.\n\
Line breaks are not included in the resulting list unless keepends\n\ Line breaks are not included in the resulting list unless keepends\n\
is given and true."); is given and true.");
#define SPLIT_APPEND(data, left, right) \
str = PyString_FromStringAndSize(data + left, right - left); \
if (!str) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
static PyObject* static PyObject*
string_splitlines(PyStringObject *self, PyObject *args) string_splitlines(PyStringObject *self, PyObject *args)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment