Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
06a69dd8
Commit
06a69dd8
authored
May 26, 2006
by
Fredrik Lundh
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
needforspeed: partition implementation, part two.
feel free to improve the documentation and the docstrings.
parent
19bebf2e
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
143 additions
and
78 deletions
+143
-78
Doc/lib/libstdtypes.tex
Doc/lib/libstdtypes.tex
+8
-0
Include/unicodeobject.h
Include/unicodeobject.h
+9
-0
Lib/test/string_tests.py
Lib/test/string_tests.py
+15
-0
Objects/stringobject.c
Objects/stringobject.c
+15
-15
Objects/unicodeobject.c
Objects/unicodeobject.c
+96
-63
No files found.
Doc/lib/libstdtypes.tex
View file @
06a69dd8
...
...
@@ -727,6 +727,14 @@ a prefix; rather, all combinations of its values are stripped:
\versionchanged
[Support for the \var{chars} argument]
{
2.2.2
}
\end{methoddesc}
\begin{methoddesc}
[string]
{
partition
}{
sep
}
Splits the string at the
\var
{
sep
}
, and return a 3-tuple containing
the part before the separator, the separator itself, and the part
after the separator. If the separator is not found, return a 3-tuple
containing the string itself, followed by two empty strings.
\versionadded
{
2.5
}
\end{methoddesc}
\begin{methoddesc}
[string]
{
replace
}{
old, new
\optional
{
, count
}}
Return a copy of the string with all occurrences of substring
\var
{
old
}
replaced by
\var
{
new
}
. If the optional argument
...
...
Include/unicodeobject.h
View file @
06a69dd8
...
...
@@ -184,6 +184,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
# define PyUnicode_Join PyUnicodeUCS2_Join
# define PyUnicode_Partition PyUnicodeUCS2_Partition
# define PyUnicode_Replace PyUnicodeUCS2_Replace
# define PyUnicode_Resize PyUnicodeUCS2_Resize
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
...
...
@@ -259,6 +260,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
# define PyUnicode_Join PyUnicodeUCS4_Join
# define PyUnicode_Partition PyUnicodeUCS4_Partition
# define PyUnicode_Replace PyUnicodeUCS4_Replace
# define PyUnicode_Resize PyUnicodeUCS4_Resize
# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
...
...
@@ -1018,6 +1020,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
int
keepends
/* If true, line end markers are included */
);
/* Partition a string using a given separator. */
PyAPI_FUNC
(
PyObject
*
)
PyUnicode_Partition
(
PyObject
*
s
,
/* String to partition */
PyObject
*
sep
/* String separator */
);
/* Split a string giving a list of Unicode strings.
If sep is NULL, splitting will be done at all whitespace
...
...
Lib/test/string_tests.py
View file @
06a69dd8
...
...
@@ -900,6 +900,21 @@ class MixinStrUnicodeUserStringTest:
self
.
checkequal
(
'A'
,
'a'
,
'title'
)
self
.
checkequal
(
True
,
'a'
,
'islower'
)
def
test_partition
(
self
):
self
.
checkequal
((
'this'
,
' is '
,
'the partition method'
),
'this is the partition method'
,
'partition'
,
' is '
)
# from raymond's original specification
S
=
'http://www.python.org'
self
.
checkequal
((
'http'
,
'://'
,
'www.python.org'
),
S
,
'partition'
,
'://'
)
self
.
checkequal
((
'http://www.python.org'
,
''
,
''
),
S
,
'partition'
,
'?'
)
self
.
checkequal
((
''
,
'http://'
,
'www.python.org'
),
S
,
'partition'
,
'http://'
)
self
.
checkequal
((
'http://www.python.'
,
'org'
,
''
),
S
,
'partition'
,
'org'
)
self
.
checkraises
(
ValueError
,
S
,
'partition'
,
''
)
self
.
checkraises
(
TypeError
,
S
,
'partition'
,
None
)
class
MixinStrStringUserStringTest
:
# Additional tests for 8bit strings, i.e. str, UserString and
...
...
Objects/stringobject.c
View file @
06a69dd8
...
...
@@ -1610,20 +1610,20 @@ string_partition(PyStringObject *self, PyObject *args)
{
Py_ssize_t
len
=
PyString_GET_SIZE
(
self
),
sep_len
,
pos
;
const
char
*
str
=
PyString_AS_STRING
(
self
),
*
sep
;
PyObject
*
sepobj
;
PyObject
*
sep
_
obj
;
PyObject
*
out
;
if
(
!
PyArg_ParseTuple
(
args
,
"O:partition"
,
&
sepobj
))
if
(
!
PyArg_ParseTuple
(
args
,
"O:partition"
,
&
sep
_
obj
))
return
NULL
;
if
(
PyString_Check
(
sepobj
))
{
sep
=
PyString_AS_STRING
(
sepobj
);
sep_len
=
PyString_GET_SIZE
(
sepobj
);
if
(
PyString_Check
(
sep
_
obj
))
{
sep
=
PyString_AS_STRING
(
sep
_
obj
);
sep_len
=
PyString_GET_SIZE
(
sep
_
obj
);
}
#ifdef Py_USING_UNICODE
_NOTYET
else
if
(
PyUnicode_Check
(
sepobj
))
return
PyUnicode_Partition
((
PyObject
*
)
self
,
sepobj
);
#ifdef Py_USING_UNICODE
else
if
(
PyUnicode_Check
(
sep
_
obj
))
return
PyUnicode_Partition
((
PyObject
*
)
self
,
sep
_
obj
);
#endif
else
if
(
PyObject_AsCharBuffer
(
sepobj
,
&
sep
,
&
sep_len
))
else
if
(
PyObject_AsCharBuffer
(
sep
_
obj
,
&
sep
,
&
sep_len
))
return
NULL
;
if
(
sep_len
==
0
)
{
...
...
@@ -1644,13 +1644,13 @@ string_partition(PyStringObject *self, PyObject *args)
Py_INCREF
(
nullstring
);
PyTuple_SET_ITEM
(
out
,
2
,
(
PyObject
*
)
nullstring
);
}
else
{
Py
_INCREF
(
sepobj
)
;
Py
Object
*
obj
;
PyTuple_SET_ITEM
(
out
,
0
,
PyString_FromStringAndSize
(
str
,
pos
));
Py
Tuple_SET_ITEM
(
out
,
1
,
sep
obj
);
PyTuple_SET_ITEM
(
out
,
2
,
PyString_FromStringAndSize
(
str
+
sep_len
+
pos
,
len
-
sep_len
-
pos
)
);
Py
_INCREF
(
sep_
obj
);
PyTuple_SET_ITEM
(
out
,
1
,
sep_obj
);
pos
+=
sep_len
;
obj
=
PyString_FromStringAndSize
(
str
+
pos
,
len
-
pos
);
PyTuple_SET_ITEM
(
out
,
2
,
obj
);
if
(
PyErr_Occurred
())
{
Py_DECREF
(
out
);
return
NULL
;
...
...
Objects/unicodeobject.c
View file @
06a69dd8
...
...
@@ -4,6 +4,9 @@ Unicode implementation based on original code by Fredrik Lundh,
modified by Marc-Andre Lemburg <mal@lemburg.com> according to the
Unicode Integration Proposal (see file Misc/unicode.txt).
Major speed upgrades to the method implementations at the Reykjavik
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
Copyright (c) Corporation for National Research Initiatives.
--------------------------------------------------------------------
...
...
@@ -193,6 +196,7 @@ int unicode_resize(register PyUnicodeObject *unicode,
/* Resizing shared object (unicode_empty or single character
objects) in-place is not allowed. Use PyUnicode_Resize()
instead ! */
if
(
unicode
==
unicode_empty
||
(
unicode
->
length
==
1
&&
unicode
->
str
[
0
]
<
256U
&&
...
...
@@ -202,8 +206,11 @@ int unicode_resize(register PyUnicodeObject *unicode,
return
-
1
;
}
/* We allocate one more byte to make sure the string is
Ux0000 terminated -- XXX is this needed ? */
/* We allocate one more byte to make sure the string is Ux0000 terminated.
The overallocation is also used by fastsearch, which assumes that it's
safe to look at str[length] (without makeing any assumptions about what
it contains). */
oldstr
=
unicode
->
str
;
PyMem_RESIZE
(
unicode
->
str
,
Py_UNICODE
,
length
+
1
);
if
(
!
unicode
->
str
)
{
...
...
@@ -3859,8 +3866,6 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
/* --- Helpers ------------------------------------------------------------ */
#define USE_FAST
/* experimental fast search implementation */
/* fast search/count implementation, based on a mix between boyer-
moore and horspool, with a few more bells and whistles on the top.
for some more background, see: http://effbot.org/stringlib */
...
...
@@ -3936,10 +3941,8 @@ fastsearch(Py_UNICODE* s, Py_ssize_t n, Py_UNICODE* p, Py_ssize_t m, int mode)
/* miss: check if next character is part of pattern */
if
(
!
(
mask
&
(
1
<<
(
s
[
i
+
m
]
&
0x1F
))))
i
=
i
+
m
;
else
{
else
i
=
i
+
skip
;
continue
;
}
}
else
{
/* skip: check if next character is part of pattern */
if
(
!
(
mask
&
(
1
<<
(
s
[
i
+
m
]
&
0x1F
))))
...
...
@@ -3973,23 +3976,13 @@ LOCAL(Py_ssize_t) count(PyUnicodeObject *self,
if
(
substring
->
length
==
0
)
return
(
end
-
start
+
1
);
#ifdef USE_FAST
count
=
fastsearch
(
PyUnicode_AS_UNICODE
(
self
)
+
start
,
end
-
start
,
substring
->
str
,
substring
->
length
,
FAST_COUNT
);
if
(
count
<
0
)
count
=
0
;
/* no match */
#else
end
-=
substring
->
length
;
while
(
start
<=
end
)
if
(
Py_UNICODE_MATCH
(
self
,
start
,
substring
))
{
count
++
;
start
+=
substring
->
length
;
}
else
start
++
;
#endif
return
count
;
}
...
...
@@ -4040,30 +4033,19 @@ static Py_ssize_t findstring(PyUnicodeObject *self,
if
(
substring
->
length
==
0
)
return
(
direction
>
0
)
?
start
:
end
;
#ifdef USE_FAST
if
(
direction
>
0
)
{
Py_ssize_t
pos
=
fastsearch
(
PyUnicode_AS_UNICODE
(
self
)
+
start
,
end
-
start
,
substring
->
str
,
substring
->
length
,
FAST_SEARCH
);
if
(
pos
<
0
)
return
pos
;
return
pos
+
start
;
}
#endif
end
-=
substring
->
length
;
if
(
direction
<
0
)
{
if
(
pos
>=
0
)
return
pos
+
start
;
}
else
{
end
-=
substring
->
length
;
for
(;
end
>=
start
;
end
--
)
if
(
Py_UNICODE_MATCH
(
self
,
end
,
substring
))
return
end
;
}
else
{
for
(;
start
<=
end
;
start
++
)
if
(
Py_UNICODE_MATCH
(
self
,
start
,
substring
))
return
start
;
}
return
-
1
;
}
...
...
@@ -5167,11 +5149,8 @@ int PyUnicode_Contains(PyObject *container,
PyObject
*
element
)
{
PyUnicodeObject
*
u
,
*
v
;
int
result
;
Py_ssize_t
size
;
#ifdef USE_FAST
Py_ssize_t
pos
;
#endif
/* Coerce the two arguments */
v
=
(
PyUnicodeObject
*
)
PyUnicode_FromObject
(
element
);
...
...
@@ -5189,44 +5168,19 @@ int PyUnicode_Contains(PyObject *container,
size
=
PyUnicode_GET_SIZE
(
v
);
if
(
!
size
)
{
result
=
1
;
pos
=
0
;
goto
done
;
}
#ifdef USE_FAST
pos
=
fastsearch
(
PyUnicode_AS_UNICODE
(
u
),
PyUnicode_GET_SIZE
(
u
),
PyUnicode_AS_UNICODE
(
v
),
size
,
FAST_SEARCH
);
result
=
(
pos
!=
-
1
);
#else
result
=
0
;
if
(
size
==
1
)
{
Py_UNICODE
chr
=
PyUnicode_AS_UNICODE
(
v
)[
0
];
Py_UNICODE
*
ptr
=
PyUnicode_AS_UNICODE
(
u
);
Py_UNICODE
*
end
=
ptr
+
PyUnicode_GET_SIZE
(
u
);
for
(;
ptr
<
end
;
ptr
++
)
{
if
(
*
ptr
==
chr
)
{
result
=
1
;
break
;
}
}
}
else
{
Py_ssize_t
start
=
0
;
Py_ssize_t
end
=
PyUnicode_GET_SIZE
(
u
)
-
size
;
for
(;
start
<=
end
;
start
++
)
if
(
Py_UNICODE_MATCH
(
u
,
start
,
v
))
{
result
=
1
;
break
;
}
}
#endif
done:
Py_DECREF
(
u
);
Py_DECREF
(
v
);
return
result
;
return
(
pos
!=
-
1
)
;
}
/* Concat to string or Unicode object giving a new Unicode object. */
...
...
@@ -6335,6 +6289,84 @@ unicode_split(PyUnicodeObject *self, PyObject *args)
return
PyUnicode_Split
((
PyObject
*
)
self
,
substring
,
maxcount
);
}
PyObject
*
PyUnicode_Partition
(
PyObject
*
str_in
,
PyObject
*
sep_in
)
{
PyObject
*
str_obj
;
PyObject
*
sep_obj
;
Py_UNICODE
*
str
,
*
sep
;
Py_ssize_t
len
,
sep_len
,
pos
;
PyObject
*
out
;
str_obj
=
PyUnicode_FromObject
(
str_in
);
if
(
!
str_obj
)
return
NULL
;
sep_obj
=
PyUnicode_FromObject
(
sep_in
);
if
(
!
sep_obj
)
goto
error
;
str
=
PyUnicode_AS_UNICODE
(
str_obj
);
len
=
PyUnicode_GET_SIZE
(
str_obj
);
sep
=
PyUnicode_AS_UNICODE
(
sep_obj
);
sep_len
=
PyUnicode_GET_SIZE
(
sep_obj
);
if
(
sep_len
==
0
)
{
PyErr_SetString
(
PyExc_ValueError
,
"empty separator"
);
goto
error
;
}
out
=
PyTuple_New
(
3
);
if
(
!
out
)
goto
error
;
pos
=
fastsearch
(
str
,
len
,
sep
,
sep_len
,
FAST_SEARCH
);
if
(
pos
<
0
)
{
Py_INCREF
(
str_obj
);
PyTuple_SET_ITEM
(
out
,
0
,
(
PyObject
*
)
str_obj
);
Py_INCREF
(
unicode_empty
);
PyTuple_SET_ITEM
(
out
,
1
,
(
PyObject
*
)
unicode_empty
);
Py_INCREF
(
unicode_empty
);
PyTuple_SET_ITEM
(
out
,
2
,
(
PyObject
*
)
unicode_empty
);
}
else
{
PyObject
*
obj
;
PyTuple_SET_ITEM
(
out
,
0
,
PyUnicode_FromUnicode
(
str
,
pos
));
Py_INCREF
(
sep_obj
);
PyTuple_SET_ITEM
(
out
,
1
,
sep_obj
);
obj
=
PyUnicode_FromUnicode
(
str
+
sep_len
+
pos
,
len
-
sep_len
-
pos
);
PyTuple_SET_ITEM
(
out
,
2
,
obj
);
if
(
PyErr_Occurred
())
{
Py_DECREF
(
out
);
goto
error
;
}
}
return
out
;
error:
Py_XDECREF
(
sep_obj
);
Py_DECREF
(
str_obj
);
return
NULL
;
}
PyDoc_STRVAR
(
partition__doc__
,
"S.partition(sep) -> (head, sep, tail)
\n
\
\n
\
Searches for the separator sep in S, and returns the part before it,
\n
\
the separator itself, and the part after it. If the separator is not
\n
\
found, returns S and two empty strings."
);
static
PyObject
*
unicode_partition
(
PyUnicodeObject
*
self
,
PyObject
*
args
)
{
PyObject
*
separator
;
if
(
!
PyArg_ParseTuple
(
args
,
"O:partition"
,
&
separator
))
return
NULL
;
return
PyUnicode_Partition
((
PyObject
*
)
self
,
separator
);
}
PyObject
*
PyUnicode_RSplit
(
PyObject
*
s
,
PyObject
*
sep
,
Py_ssize_t
maxsplit
)
...
...
@@ -6588,6 +6620,7 @@ static PyMethodDef unicode_methods[] = {
{
"count"
,
(
PyCFunction
)
unicode_count
,
METH_VARARGS
,
count__doc__
},
{
"expandtabs"
,
(
PyCFunction
)
unicode_expandtabs
,
METH_VARARGS
,
expandtabs__doc__
},
{
"find"
,
(
PyCFunction
)
unicode_find
,
METH_VARARGS
,
find__doc__
},
{
"partition"
,
(
PyCFunction
)
unicode_partition
,
METH_VARARGS
,
partition__doc__
},
{
"index"
,
(
PyCFunction
)
unicode_index
,
METH_VARARGS
,
index__doc__
},
{
"ljust"
,
(
PyCFunction
)
unicode_ljust
,
METH_VARARGS
,
ljust__doc__
},
{
"lower"
,
(
PyCFunction
)
unicode_lower
,
METH_NOARGS
,
lower__doc__
},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment