Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
75c00efc
Commit
75c00efc
authored
Jan 05, 2004
by
Hye-Shik Chang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[SF #866875] Add a specialized routine for one character
separaters on str.split() and str.rsplit().
parent
cb2117a8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
150 additions
and
59 deletions
+150
-59
Lib/test/string_tests.py
Lib/test/string_tests.py
+55
-14
Objects/stringobject.c
Objects/stringobject.c
+95
-45
No files found.
Lib/test/string_tests.py
View file @
75c00efc
...
@@ -175,41 +175,82 @@ class CommonTest(unittest.TestCase):
...
@@ -175,41 +175,82 @@ class CommonTest(unittest.TestCase):
def
test_split
(
self
):
def
test_split
(
self
):
self
.
checkequal
([
'this'
,
'is'
,
'the'
,
'split'
,
'function'
],
self
.
checkequal
([
'this'
,
'is'
,
'the'
,
'split'
,
'function'
],
'this is the split function'
,
'split'
)
'this is the split function'
,
'split'
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a|b|c|d'
,
'split'
,
'|'
)
self
.
checkequal
([
'a'
,
'b'
,
'c|d'
],
'a|b|c|d'
,
'split'
,
'|'
,
2
)
# by whitespace
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d '
,
'split'
)
self
.
checkequal
([
'a'
,
'b c d'
],
'a b c d'
,
'split'
,
None
,
1
)
self
.
checkequal
([
'a'
,
'b c d'
],
'a b c d'
,
'split'
,
None
,
1
)
self
.
checkequal
([
'a'
,
'b'
,
'c d'
],
'a b c d'
,
'split'
,
None
,
2
)
self
.
checkequal
([
'a'
,
'b'
,
'c d'
],
'a b c d'
,
'split'
,
None
,
2
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d'
,
'split'
,
None
,
3
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d'
,
'split'
,
None
,
3
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d'
,
'split'
,
None
,
4
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d'
,
'split'
,
None
,
4
)
self
.
checkequal
([
'a b c d'
],
'a b c d'
,
'split'
,
None
,
0
)
self
.
checkequal
([
'a b c d'
],
'a b c d'
,
'split'
,
None
,
0
)
self
.
checkequal
([
'a'
,
'b'
,
'c d'
],
'a b c d'
,
'split'
,
None
,
2
)
self
.
checkequal
([
'a'
,
'b'
,
'c d'
],
'a b c d'
,
'split'
,
None
,
2
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d '
,
'split'
)
# by a char
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a|b|c|d'
,
'split'
,
'|'
)
self
.
checkequal
([
'a'
,
'b|c|d'
],
'a|b|c|d'
,
'split'
,
'|'
,
1
)
self
.
checkequal
([
'a'
,
'b'
,
'c|d'
],
'a|b|c|d'
,
'split'
,
'|'
,
2
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a|b|c|d'
,
'split'
,
'|'
,
3
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a|b|c|d'
,
'split'
,
'|'
,
4
)
self
.
checkequal
([
'a|b|c|d'
],
'a|b|c|d'
,
'split'
,
'|'
,
0
)
self
.
checkequal
([
'a'
,
''
,
'b||c||d'
],
'a||b||c||d'
,
'split'
,
'|'
,
2
)
self
.
checkequal
([
'endcase '
,
''
],
'endcase |'
,
'split'
,
'|'
)
self
.
checkequal
([
'a'
,
''
,
'b
\
x00
c
\
x00
d'
],
'a
\
x00
\
x00
b
\
x00
c
\
x00
d'
,
'split'
,
'
\
x00
'
,
2
)
# by string
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a//b//c//d'
,
'split'
,
'//'
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a//b//c//d'
,
'split'
,
'//'
)
self
.
checkequal
([
'a'
,
'b//c//d'
],
'a//b//c//d'
,
'split'
,
'//'
,
1
)
self
.
checkequal
([
'a'
,
'b'
,
'c//d'
],
'a//b//c//d'
,
'split'
,
'//'
,
2
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a//b//c//d'
,
'split'
,
'//'
,
3
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a//b//c//d'
,
'split'
,
'//'
,
4
)
self
.
checkequal
([
'a//b//c//d'
],
'a//b//c//d'
,
'split'
,
'//'
,
0
)
self
.
checkequal
([
'a'
,
''
,
'b////c////d'
],
'a////b////c////d'
,
'split'
,
'//'
,
2
)
self
.
checkequal
([
'endcase '
,
''
],
'endcase test'
,
'split'
,
'test'
)
self
.
checkequal
([
'endcase '
,
''
],
'endcase test'
,
'split'
,
'test'
)
# mixed use of str and unicode
self
.
checkequal
([
u'a'
,
u'b'
,
u'c d'
],
'a b c d'
,
'split'
,
u' '
,
2
)
# argument type
self
.
checkraises
(
TypeError
,
'hello'
,
'split'
,
42
,
42
,
42
)
self
.
checkraises
(
TypeError
,
'hello'
,
'split'
,
42
,
42
,
42
)
def
test_rsplit
(
self
):
def
test_rsplit
(
self
):
self
.
checkequal
([
'this'
,
'is'
,
'the'
,
'rsplit'
,
'function'
],
self
.
checkequal
([
'this'
,
'is'
,
'the'
,
'rsplit'
,
'function'
],
'this is the rsplit function'
,
'rsplit'
)
'this is the rsplit function'
,
'rsplit'
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a|b|c|d'
,
'rsplit'
,
'|'
)
self
.
checkequal
([
'a|b'
,
'c'
,
'd'
],
'a|b|c|d'
,
'rsplit'
,
'|'
,
2
)
# by whitespace
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d '
,
'rsplit'
)
self
.
checkequal
([
'a b c'
,
'd'
],
'a b c d'
,
'rsplit'
,
None
,
1
)
self
.
checkequal
([
'a b c'
,
'd'
],
'a b c d'
,
'rsplit'
,
None
,
1
)
self
.
checkequal
([
'a b'
,
'c'
,
'd'
],
'a b c d'
,
'rsplit'
,
None
,
2
)
self
.
checkequal
([
'a b'
,
'c'
,
'd'
],
'a b c d'
,
'rsplit'
,
None
,
2
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d'
,
'rsplit'
,
None
,
3
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d'
,
'rsplit'
,
None
,
3
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d'
,
'rsplit'
,
None
,
4
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a b c d'
,
'rsplit'
,
None
,
4
)
self
.
checkequal
([
'a b c d'
],
'a b c d'
,
'rsplit'
,
None
,
0
)
self
.
checkequal
([
'a b c d'
],
'a b c d'
,
'rsplit'
,
None
,
0
)
self
.
checkequal
([
'a, b, c'
,
'd'
],
'a, b, c, d'
,
'rsplit'
,
', '
,
1
)
self
.
checkequal
([
'a, b'
,
'c'
,
'd'
],
'a, b, c, d'
,
'rsplit'
,
', '
,
2
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a, b, c, d'
,
'rsplit'
,
', '
,
3
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a, b, c, d'
,
'rsplit'
,
', '
,
4
)
self
.
checkequal
([
'a, b, c, d'
],
'a, b, c, d'
,
'rsplit'
,
', '
,
0
)
self
.
checkequal
([
'a b'
,
'c'
,
'd'
],
'a b c d'
,
'rsplit'
,
None
,
2
)
self
.
checkequal
([
'a b'
,
'c'
,
'd'
],
'a b c d'
,
'rsplit'
,
None
,
2
)
self
.
checkequal
([
'a
\
x00
b'
,
'c'
],
'a
\
x00
b
\
x00
c'
,
'rsplit'
,
'
\
x00
'
,
1
)
self
.
checkequal
([
''
,
''
],
'abcd'
,
'rsplit'
,
'abcd'
)
# by a char
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a|b|c|d'
,
'rsplit'
,
'|'
)
self
.
checkequal
([
'a|b|c'
,
'd'
],
'a|b|c|d'
,
'rsplit'
,
'|'
,
1
)
self
.
checkequal
([
'a|b'
,
'c'
,
'd'
],
'a|b|c|d'
,
'rsplit'
,
'|'
,
2
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a|b|c|d'
,
'rsplit'
,
'|'
,
3
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a|b|c|d'
,
'rsplit'
,
'|'
,
4
)
self
.
checkequal
([
'a|b|c|d'
],
'a|b|c|d'
,
'rsplit'
,
'|'
,
0
)
self
.
checkequal
([
'a||b||c'
,
''
,
'd'
],
'a||b||c||d'
,
'rsplit'
,
'|'
,
2
)
self
.
checkequal
([
''
,
' begincase'
],
'| begincase'
,
'rsplit'
,
'|'
)
self
.
checkequal
([
'a
\
x00
\
x00
b'
,
'c'
,
'd'
],
'a
\
x00
\
x00
b
\
x00
c
\
x00
d'
,
'rsplit'
,
'
\
x00
'
,
2
)
# by string
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a//b//c//d'
,
'rsplit'
,
'//'
)
self
.
checkequal
([
'a//b//c'
,
'd'
],
'a//b//c//d'
,
'rsplit'
,
'//'
,
1
)
self
.
checkequal
([
'a//b'
,
'c'
,
'd'
],
'a//b//c//d'
,
'rsplit'
,
'//'
,
2
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a//b//c//d'
,
'rsplit'
,
'//'
,
3
)
self
.
checkequal
([
'a'
,
'b'
,
'c'
,
'd'
],
'a//b//c//d'
,
'rsplit'
,
'//'
,
4
)
self
.
checkequal
([
'a//b//c//d'
],
'a//b//c//d'
,
'rsplit'
,
'//'
,
0
)
self
.
checkequal
([
'a////b////c'
,
''
,
'd'
],
'a////b////c////d'
,
'rsplit'
,
'//'
,
2
)
self
.
checkequal
([
''
,
' begincase'
],
'test begincase'
,
'rsplit'
,
'test'
)
# mixed use of str and unicode
self
.
checkequal
([
u'a b'
,
u'c'
,
u'd'
],
'a b c d'
,
'rsplit'
,
u' '
,
2
)
self
.
checkequal
([
u'a b'
,
u'c'
,
u'd'
],
'a b c d'
,
'rsplit'
,
u' '
,
2
)
self
.
checkequal
([
''
,
' endcase'
],
'| endcase'
,
'rsplit'
,
'|'
)
self
.
checkequal
([
''
,
' endcase'
],
'test endcase'
,
'rsplit'
,
'test'
)
# argument type
self
.
checkraises
(
TypeError
,
'hello'
,
'rsplit'
,
42
,
42
,
42
)
def
test_strip
(
self
):
def
test_strip
(
self
):
self
.
checkequal
(
'hello'
,
' hello '
,
'strip'
)
self
.
checkequal
(
'hello'
,
' hello '
,
'strip'
)
...
...
Objects/stringobject.c
View file @
75c00efc
...
@@ -1282,12 +1282,35 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
...
@@ -1282,12 +1282,35 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
#define STRIPNAME(i) (stripformat[i]+3)
#define STRIPNAME(i) (stripformat[i]+3)
#define SPLIT_APPEND(data, left, right) \
str = PyString_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
#define SPLIT_INSERT(data, left, right) \
str = PyString_FromStringAndSize((data) + (left), \
(right) - (left)); \
if (str == NULL) \
goto onError; \
if (PyList_Insert(list, 0, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
static
PyObject
*
static
PyObject
*
split_whitespace
(
const
char
*
s
,
int
len
,
int
maxsplit
)
split_whitespace
(
const
char
*
s
,
int
len
,
int
maxsplit
)
{
{
int
i
,
j
,
err
;
int
i
,
j
;
PyObject
*
item
;
PyObject
*
str
;
PyObject
*
list
=
PyList_New
(
0
);
PyObject
*
list
=
PyList_New
(
0
);
if
(
list
==
NULL
)
if
(
list
==
NULL
)
...
@@ -1302,33 +1325,49 @@ split_whitespace(const char *s, int len, int maxsplit)
...
@@ -1302,33 +1325,49 @@ split_whitespace(const char *s, int len, int maxsplit)
if
(
j
<
i
)
{
if
(
j
<
i
)
{
if
(
maxsplit
--
<=
0
)
if
(
maxsplit
--
<=
0
)
break
;
break
;
item
=
PyString_FromStringAndSize
(
s
+
j
,
(
int
)(
i
-
j
));
SPLIT_APPEND
(
s
,
j
,
i
);
if
(
item
==
NULL
)
goto
finally
;
err
=
PyList_Append
(
list
,
item
);
Py_DECREF
(
item
);
if
(
err
<
0
)
goto
finally
;
while
(
i
<
len
&&
isspace
(
Py_CHARMASK
(
s
[
i
])))
while
(
i
<
len
&&
isspace
(
Py_CHARMASK
(
s
[
i
])))
i
++
;
i
++
;
j
=
i
;
j
=
i
;
}
}
}
}
if
(
j
<
len
)
{
if
(
j
<
len
)
{
item
=
PyString_FromStringAndSize
(
s
+
j
,
(
int
)(
len
-
j
));
SPLIT_APPEND
(
s
,
j
,
len
);
if
(
item
==
NULL
)
goto
finally
;
err
=
PyList_Append
(
list
,
item
);
Py_DECREF
(
item
);
if
(
err
<
0
)
goto
finally
;
}
}
return
list
;
return
list
;
finally
:
onError
:
Py_DECREF
(
list
);
Py_DECREF
(
list
);
return
NULL
;
return
NULL
;
}
}
static
PyObject
*
split_char
(
const
char
*
s
,
int
len
,
char
ch
,
int
maxcount
)
{
register
int
i
,
j
;
PyObject
*
str
;
PyObject
*
list
=
PyList_New
(
0
);
if
(
list
==
NULL
)
return
NULL
;
for
(
i
=
j
=
0
;
i
<
len
;
)
{
if
(
s
[
i
]
==
ch
)
{
if
(
maxcount
--
<=
0
)
break
;
SPLIT_APPEND
(
s
,
j
,
i
);
i
=
j
=
i
+
1
;
}
else
i
++
;
}
if
(
j
<=
len
)
{
SPLIT_APPEND
(
s
,
j
,
len
);
}
return
list
;
onError:
Py_DECREF
(
list
);
return
NULL
;
}
PyDoc_STRVAR
(
split__doc__
,
PyDoc_STRVAR
(
split__doc__
,
"S.split([sep [,maxsplit]]) -> list of strings
\n
\
"S.split([sep [,maxsplit]]) -> list of strings
\n
\
...
@@ -1362,10 +1401,13 @@ string_split(PyStringObject *self, PyObject *args)
...
@@ -1362,10 +1401,13 @@ string_split(PyStringObject *self, PyObject *args)
#endif
#endif
else
if
(
PyObject_AsCharBuffer
(
subobj
,
&
sub
,
&
n
))
else
if
(
PyObject_AsCharBuffer
(
subobj
,
&
sub
,
&
n
))
return
NULL
;
return
NULL
;
if
(
n
==
0
)
{
if
(
n
==
0
)
{
PyErr_SetString
(
PyExc_ValueError
,
"empty separator"
);
PyErr_SetString
(
PyExc_ValueError
,
"empty separator"
);
return
NULL
;
return
NULL
;
}
}
else
if
(
n
==
1
)
return
split_char
(
s
,
len
,
sub
[
0
],
maxsplit
);
list
=
PyList_New
(
0
);
list
=
PyList_New
(
0
);
if
(
list
==
NULL
)
if
(
list
==
NULL
)
...
@@ -1406,8 +1448,8 @@ string_split(PyStringObject *self, PyObject *args)
...
@@ -1406,8 +1448,8 @@ string_split(PyStringObject *self, PyObject *args)
static
PyObject
*
static
PyObject
*
rsplit_whitespace
(
const
char
*
s
,
int
len
,
int
maxsplit
)
rsplit_whitespace
(
const
char
*
s
,
int
len
,
int
maxsplit
)
{
{
int
i
,
j
,
err
;
int
i
,
j
;
PyObject
*
item
;
PyObject
*
str
;
PyObject
*
list
=
PyList_New
(
0
);
PyObject
*
list
=
PyList_New
(
0
);
if
(
list
==
NULL
)
if
(
list
==
NULL
)
...
@@ -1422,33 +1464,49 @@ rsplit_whitespace(const char *s, int len, int maxsplit)
...
@@ -1422,33 +1464,49 @@ rsplit_whitespace(const char *s, int len, int maxsplit)
if
(
j
>
i
)
{
if
(
j
>
i
)
{
if
(
maxsplit
--
<=
0
)
if
(
maxsplit
--
<=
0
)
break
;
break
;
item
=
PyString_FromStringAndSize
(
s
+
i
+
1
,
(
int
)(
j
-
i
));
SPLIT_INSERT
(
s
,
i
+
1
,
j
+
1
);
if
(
item
==
NULL
)
goto
finally
;
err
=
PyList_Insert
(
list
,
0
,
item
);
Py_DECREF
(
item
);
if
(
err
<
0
)
goto
finally
;
while
(
i
>=
0
&&
isspace
(
Py_CHARMASK
(
s
[
i
])))
while
(
i
>=
0
&&
isspace
(
Py_CHARMASK
(
s
[
i
])))
i
--
;
i
--
;
j
=
i
;
j
=
i
;
}
}
}
}
if
(
j
>=
0
)
{
if
(
j
>=
0
)
{
item
=
PyString_FromStringAndSize
(
s
,
(
int
)(
j
+
1
));
SPLIT_INSERT
(
s
,
0
,
j
+
1
);
if
(
item
==
NULL
)
goto
finally
;
err
=
PyList_Insert
(
list
,
0
,
item
);
Py_DECREF
(
item
);
if
(
err
<
0
)
goto
finally
;
}
}
return
list
;
return
list
;
finally
:
onError
:
Py_DECREF
(
list
);
Py_DECREF
(
list
);
return
NULL
;
return
NULL
;
}
}
static
PyObject
*
rsplit_char
(
const
char
*
s
,
int
len
,
char
ch
,
int
maxcount
)
{
register
int
i
,
j
;
PyObject
*
str
;
PyObject
*
list
=
PyList_New
(
0
);
if
(
list
==
NULL
)
return
NULL
;
for
(
i
=
j
=
len
-
1
;
i
>=
0
;
)
{
if
(
s
[
i
]
==
ch
)
{
if
(
maxcount
--
<=
0
)
break
;
SPLIT_INSERT
(
s
,
i
+
1
,
j
+
1
);
j
=
i
=
i
-
1
;
}
else
i
--
;
}
if
(
j
>=
-
1
)
{
SPLIT_INSERT
(
s
,
0
,
j
+
1
);
}
return
list
;
onError:
Py_DECREF
(
list
);
return
NULL
;
}
PyDoc_STRVAR
(
rsplit__doc__
,
PyDoc_STRVAR
(
rsplit__doc__
,
"S.rsplit([sep [,maxsplit]]) -> list of strings
\n
\
"S.rsplit([sep [,maxsplit]]) -> list of strings
\n
\
...
@@ -1483,10 +1541,13 @@ string_rsplit(PyStringObject *self, PyObject *args)
...
@@ -1483,10 +1541,13 @@ string_rsplit(PyStringObject *self, PyObject *args)
#endif
#endif
else
if
(
PyObject_AsCharBuffer
(
subobj
,
&
sub
,
&
n
))
else
if
(
PyObject_AsCharBuffer
(
subobj
,
&
sub
,
&
n
))
return
NULL
;
return
NULL
;
if
(
n
==
0
)
{
if
(
n
==
0
)
{
PyErr_SetString
(
PyExc_ValueError
,
"empty separator"
);
PyErr_SetString
(
PyExc_ValueError
,
"empty separator"
);
return
NULL
;
return
NULL
;
}
}
else
if
(
n
==
1
)
return
rsplit_char
(
s
,
len
,
sub
[
0
],
maxsplit
);
list
=
PyList_New
(
0
);
list
=
PyList_New
(
0
);
if
(
list
==
NULL
)
if
(
list
==
NULL
)
...
@@ -3104,17 +3165,6 @@ Return a list of the lines in S, breaking at line boundaries.\n\
...
@@ -3104,17 +3165,6 @@ Return a list of the lines in S, breaking at line boundaries.\n\
Line breaks are not included in the resulting list unless keepends
\n
\
Line breaks are not included in the resulting list unless keepends
\n
\
is given and true."
);
is given and true."
);
#define SPLIT_APPEND(data, left, right) \
str = PyString_FromStringAndSize(data + left, right - left); \
if (!str) \
goto onError; \
if (PyList_Append(list, str)) { \
Py_DECREF(str); \
goto onError; \
} \
else \
Py_DECREF(str);
static
PyObject
*
static
PyObject
*
string_splitlines
(
PyStringObject
*
self
,
PyObject
*
args
)
string_splitlines
(
PyStringObject
*
self
,
PyObject
*
args
)
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment