Commit c5f946e0 authored by Hye-Shik Chang's avatar Hye-Shik Chang

Add rsplit method for str and unicode builtin types.

SF feature request #801847.
Original patch is written by Sean Reifschneider.
parent f29da547
......@@ -694,6 +694,24 @@ The original string is returned if
\versionchanged[Support for the \var{fillchar} argument]{2.4}
\begin{methoddesc}[string]{rsplit}{\optional{, sep\optional{, maxsplit}}}
Return a list of the words of the string, scanning the string from
the end working forward. The resulting list of words is in the
same order as \function{split()}. If the optional second argument
\var{sep} is absent or \code{None}, the words are separated by
arbitrary strings of whitespace characters (space, tab, newline,
return, formfeed). If the second argument \var{sep} is present and
not \code{None}, it specifies a string to be used as the word
separator. The returned list will then have one more item than the
number of non-overlapping occurrences of the separator in the string.
The optional third argument \var{maxsplit} defaults to 0. If it
is nonzero, at most \var{maxsplit} number of splits occur, and the
remainder of the string is returned as the first element of the
list (thus, the list will have at most \code{\var{maxsplit}+1}
Return a copy of the string with trailing characters removed. If
\var{chars} is omitted or \code{None}, whitespace characters are
......@@ -215,6 +215,23 @@ The functions defined in this module are:
\begin{funcdesc}{rsplit}{s\optional{, sep\optional{, maxsplit}}}
Return a list of the words of the string \var{s}, scanning \var{s} from
the end working forward. The resulting list of words is in the same
order as \function{split()}. If the optional second argument \var{sep}
is absent or \code{None}, the words are separated by arbitrary strings
of whitespace characters (space, tab, newline, return, formfeed).
If the second argument \var{sep} is present and not \code{None}, it
specifies a string to be used as the word separator. The returned
list will then have one more item than the number of non-overlapping
occurrences of the separator in the string. The optional third argument
\var{maxsplit} defaults to 0. If it is nonzero, at most \var{maxsplit}
number of splits occur, and the remainder of the string is returned
as the first element of the list (thus, the list will have at most
\code{\var{maxsplit}+1} elements).
\begin{funcdesc}{splitfields}{s\optional{, sep\optional{, maxsplit}}}
This function behaves identically to \function{split()}. (In the
past, \function{split()} was only used with one argument, while
......@@ -185,6 +185,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_Resize PyUnicodeUCS2_Resize
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
# define PyUnicode_Split PyUnicodeUCS2_Split
# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
# define PyUnicode_Translate PyUnicodeUCS2_Translate
......@@ -959,6 +960,25 @@ PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
int keepends /* If true, line end markers are included */
/* Split a string giving a list of Unicode strings.
If sep is NULL, splitting will be done at all whitespace
substrings. Otherwise, splits occur at the given separator.
At most maxsplit splits will be done. But unlike PyUnicode_Split
PyUnicode_RSplit splits from the end of the string. If negative,
no limit is set.
Separators are not included in the resulting list.
PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
PyObject *s, /* String to split */
PyObject *sep, /* String separator */
int maxsplit /* Maxsplit count */
/* Translate a string by applying a character mapping table to it and
return the resulting Unicode object.
......@@ -121,6 +121,18 @@ def split(s, sep=None, maxsplit=-1):
return s.split(sep, maxsplit)
splitfields = split
# Split a string into a list of space/tab-separated words
def rsplit(s, sep=None, maxsplit=-1):
"""rsplit(s [,sep [,maxsplit]]) -> list of strings
Return a list of the words in the string s, using sep as the
delimiter string, starting at the end of the string and working
to the front. If maxsplit is given, at most maxsplit splits are
done. If sep is not specified or is None, any whitespace string
is a separator.
return s.rsplit(sep, maxsplit)
# Join fields with optional separator
def join(words, sep = ' '):
"""join(list [,sep]) -> string
......@@ -189,6 +189,26 @@ class CommonTest(unittest.TestCase):
self.checkraises(TypeError, 'hello', 'split', 42, 42, 42)
def test_rsplit(self):
self.checkequal(['this', 'is', 'the', 'rsplit', 'function'],
'this is the rsplit function', 'rsplit')
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|')
self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2)
self.checkequal(['a b c', 'd'], 'a b c d', 'rsplit', None, 1)
self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2)
self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 3)
self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 4)
self.checkequal(['a b c d'], 'a b c d', 'rsplit', None, 0)
self.checkequal(['a, b, c', 'd'], 'a, b, c, d', 'rsplit', ', ', 1)
self.checkequal(['a, b', 'c', 'd'], 'a, b, c, d', 'rsplit', ', ', 2)
self.checkequal(['a', 'b', 'c', 'd'], 'a, b, c, d', 'rsplit', ', ', 3)
self.checkequal(['a', 'b', 'c', 'd'], 'a, b, c, d', 'rsplit', ', ', 4)
self.checkequal(['a, b, c, d'], 'a, b, c, d', 'rsplit', ', ', 0)
self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2)
self.checkequal(['a\x00b', 'c'], 'a\x00b\x00c', 'rsplit', '\x00', 1)
self.checkequal(['', ''], 'abcd', 'rsplit', 'abcd')
self.checkequal([u'a b', u'c', u'd'], 'a b c d', 'rsplit', u' ', 2)
def test_strip(self):
self.checkequal('hello', ' hello ', 'strip')
self.checkequal('hello ', ' hello ', 'lstrip')
......@@ -1407,6 +1407,129 @@ string_split(PyStringObject *self, PyObject *args)
return NULL;
static PyObject *
rsplit_whitespace(const char *s, int len, int maxsplit)
int i, j, err;
PyObject* item;
PyObject *list = PyList_New(0);
if (list == NULL)
return NULL;
for (i = j = len - 1; i >= 0; ) {
while (i >= 0 && isspace(Py_CHARMASK(s[i])))
j = i;
while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
if (j > i) {
if (maxsplit-- <= 0)
item = PyString_FromStringAndSize(s+i+1, (int)(j-i));
if (item == NULL)
goto finally;
err = PyList_Insert(list, 0, item);
if (err < 0)
goto finally;
while (i >= 0 && isspace(Py_CHARMASK(s[i])))
j = i;
if (j >= 0) {
item = PyString_FromStringAndSize(s, (int)(j + 1));
if (item == NULL)
goto finally;
err = PyList_Insert(list, 0, item);
if (err < 0)
goto finally;
return list;
return NULL;
"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Return a list of the words in the string S, using sep as the\n\
delimiter string, starting at the end of the string and working\n\
to the front. If maxsplit is given, at most maxsplit splits are\n\
done. If sep is not specified or is None, any whitespace string\n\
is a separator.");
static PyObject *
string_rsplit(PyStringObject *self, PyObject *args)
int len = PyString_GET_SIZE(self), n, i, j, err;
int maxsplit = -1;
const char *s = PyString_AS_STRING(self), *sub;
PyObject *list, *item, *subobj = Py_None;
if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
return NULL;
if (maxsplit < 0)
maxsplit = INT_MAX;
if (subobj == Py_None)
return rsplit_whitespace(s, len, maxsplit);
if (PyString_Check(subobj)) {
sub = PyString_AS_STRING(subobj);
n = PyString_GET_SIZE(subobj);
else if (PyUnicode_Check(subobj))
return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
else if (PyObject_AsCharBuffer(subobj, &sub, &n))
return NULL;
if (n == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
list = PyList_New(0);
if (list == NULL)
return NULL;
j = len;
i = j - n;
while (i >= 0) {
if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
if (maxsplit-- <= 0)
item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
if (item == NULL)
goto fail;
err = PyList_Insert(list, 0, item);
if (err < 0)
goto fail;
j = i;
i -= n;
item = PyString_FromStringAndSize(s, j);
if (item == NULL)
goto fail;
err = PyList_Insert(list, 0, item);
if (err < 0)
goto fail;
return list;
return NULL;
"S.join(sequence) -> string\n\
......@@ -3064,6 +3187,7 @@ string_methods[] = {
string.maketrans(). */
{"join", (PyCFunction)string_join, METH_O, join__doc__},
{"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
{"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
{"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
{"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
{"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
......@@ -4053,7 +4053,7 @@ PyUnicodeObject *pad(PyUnicodeObject *self,
#define SPLIT_APPEND(data, left, right) \
str = PyUnicode_FromUnicode(data + left, right - left); \
str = PyUnicode_FromUnicode((data) + (left), (right) - (left)); \
if (!str) \
goto onError; \
if (PyList_Append(list, str)) { \
......@@ -4063,6 +4063,17 @@ PyUnicodeObject *pad(PyUnicodeObject *self,
else \
#define SPLIT_INSERT(data, left, right) \
str = PyUnicode_FromUnicode((data) + (left), (right) - (left)); \
if (!str) \
goto onError; \
if (PyList_Insert(list, 0, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
PyObject *split_whitespace(PyUnicodeObject *self,
PyObject *list,
......@@ -4214,7 +4225,106 @@ PyObject *split_substring(PyUnicodeObject *self,
return NULL;
PyObject *rsplit_whitespace(PyUnicodeObject *self,
PyObject *list,
int maxcount)
register int i;
register int j;
int len = self->length;
PyObject *str;
for (i = j = len - 1; i >= 0; ) {
/* find a token */
while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
j = i;
while (i >= 0 && !Py_UNICODE_ISSPACE(self->str[i]))
if (j > i) {
if (maxcount-- <= 0)
SPLIT_INSERT(self->str, i + 1, j + 1);
while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
j = i;
if (j >= 0) {
SPLIT_INSERT(self->str, 0, j + 1);
return list;
return NULL;
PyObject *rsplit_char(PyUnicodeObject *self,
PyObject *list,
int maxcount)
register int i;
register int j;
int len = self->length;
PyObject *str;
for (i = j = len - 1; i >= 0; ) {
if (self->str[i] == ch) {
if (maxcount-- <= 0)
SPLIT_INSERT(self->str, i + 1, j + 1);
j = i = i - 1;
} else
if (j >= 0) {
SPLIT_INSERT(self->str, 0, j + 1);
return list;
return NULL;
PyObject *rsplit_substring(PyUnicodeObject *self,
PyObject *list,
PyUnicodeObject *substring,
int maxcount)
register int i;
register int j;
int len = self->length;
int sublen = substring->length;
PyObject *str;
for (i = len - sublen, j = len; i >= 0; ) {
if (Py_UNICODE_MATCH(self, i, substring)) {
if (maxcount-- <= 0)
SPLIT_INSERT(self->str, i + sublen, j);
j = i;
i -= sublen;
} else
if (j >= 0) {
SPLIT_INSERT(self->str, 0, j);
return list;
return NULL;
PyObject *split(PyUnicodeObject *self,
......@@ -4245,6 +4355,35 @@ PyObject *split(PyUnicodeObject *self,
return split_substring(self,list,substring,maxcount);
PyObject *rsplit(PyUnicodeObject *self,
PyUnicodeObject *substring,
int maxcount)
PyObject *list;
if (maxcount < 0)
maxcount = INT_MAX;
list = PyList_New(0);
if (!list)
return NULL;
if (substring == NULL)
return rsplit_whitespace(self,list,maxcount);
else if (substring->length == 1)
return rsplit_char(self,list,substring->str[0],maxcount);
else if (substring->length == 0) {
PyErr_SetString(PyExc_ValueError, "empty separator");
return NULL;
return rsplit_substring(self,list,substring,maxcount);
PyObject *replace(PyUnicodeObject *self,
PyUnicodeObject *str1,
......@@ -5675,6 +5814,56 @@ unicode_split(PyUnicodeObject *self, PyObject *args)
return PyUnicode_Split((PyObject *)self, substring, maxcount);
PyObject *PyUnicode_RSplit(PyObject *s,
PyObject *sep,
int maxsplit)
PyObject *result;
s = PyUnicode_FromObject(s);
if (s == NULL)
return NULL;
if (sep != NULL) {
sep = PyUnicode_FromObject(sep);
if (sep == NULL) {
return NULL;
result = rsplit((PyUnicodeObject *)s, (PyUnicodeObject *)sep, maxsplit);
return result;
"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Return a list of the words in S, using sep as the\n\
delimiter string, starting at the end of the string and\n\
working to the front. If maxsplit is given, at most maxsplit\n\
splits are done. If sep is not specified, any whitespace string\n\
is a separator.");
static PyObject*
unicode_rsplit(PyUnicodeObject *self, PyObject *args)
PyObject *substring = Py_None;
int maxcount = -1;
if (!PyArg_ParseTuple(args, "|Oi:rsplit", &substring, &maxcount))
return NULL;
if (substring == Py_None)
return rsplit(self, NULL, maxcount);
else if (PyUnicode_Check(substring))
return rsplit(self, (PyUnicodeObject *)substring, maxcount);
return PyUnicode_RSplit((PyObject *)self, substring, maxcount);
"S.splitlines([keepends]]) -> list of strings\n\
......@@ -5870,6 +6059,7 @@ static PyMethodDef unicode_methods[] = {
{"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__},
{"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
{"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
{"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
{"join", (PyCFunction) unicode_join, METH_O, join__doc__},
{"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__},
{"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__},
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment