Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
e2cef885
Commit
e2cef885
authored
Apr 13, 2013
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #16061: Speed up str.replace() for replacing 1-character strings.
parent
a707f299
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
102 additions
and
26 deletions
+102
-26
Makefile.pre.in
Makefile.pre.in
+1
-0
Misc/NEWS
Misc/NEWS
+2
-0
Objects/stringlib/replace.h
Objects/stringlib/replace.h
+53
-0
Objects/unicodeobject.c
Objects/unicodeobject.c
+38
-26
PC/VS9.0/pythoncore.vcproj
PC/VS9.0/pythoncore.vcproj
+4
-0
PCbuild/pythoncore.vcxproj
PCbuild/pythoncore.vcxproj
+1
-0
PCbuild/pythoncore.vcxproj.filters
PCbuild/pythoncore.vcxproj.filters
+3
-0
No files found.
Makefile.pre.in
View file @
e2cef885
...
...
@@ -726,6 +726,7 @@ UNICODE_DEPS = \
$(srcdir)
/Objects/stringlib/find_max_char.h
\
$(srcdir)
/Objects/stringlib/localeutil.h
\
$(srcdir)
/Objects/stringlib/partition.h
\
$(srcdir)
/Objects/stringlib/replace.h
\
$(srcdir)
/Objects/stringlib/split.h
\
$(srcdir)
/Objects/stringlib/ucs1lib.h
\
$(srcdir)
/Objects/stringlib/ucs2lib.h
\
...
...
Misc/NEWS
View file @
e2cef885
...
...
@@ -10,6 +10,8 @@ What's New in Python 3.4.0 Alpha 1?
Core and Builtins
-----------------
- Issue #16061: Speed up str.replace() for replacing 1-character strings.
- Issue #17715: Fix segmentation fault from raising an exception in a __trunc__
method.
...
...
Objects/stringlib/replace.h
0 → 100644
View file @
e2cef885
/* stringlib: replace implementation */
#ifndef STRINGLIB_FASTSEARCH_H
#error must include "stringlib/fastsearch.h" before including this module
#endif
Py_LOCAL_INLINE
(
void
)
STRINGLIB
(
replace_1char_inplace
)(
STRINGLIB_CHAR
*
s
,
STRINGLIB_CHAR
*
end
,
Py_UCS4
u1
,
Py_UCS4
u2
,
Py_ssize_t
maxcount
)
{
*
s
=
u2
;
while
(
--
maxcount
&&
++
s
!=
end
)
{
/* Find the next character to be replaced.
If it occurs often, it is faster to scan for it using an inline
loop. If it occurs seldom, it is faster to scan for it using a
function call; the overhead of the function call is amortized
across the many characters that call covers. We start with an
inline loop and use a heuristic to determine whether to fall back
to a function call. */
if
(
*
s
!=
u1
)
{
int
attempts
=
10
;
/* search u1 in a dummy loop */
while
(
1
)
{
if
(
++
s
==
end
)
return
;
if
(
*
s
==
u1
)
break
;
if
(
!--
attempts
)
{
/* if u1 was not found for attempts iterations,
use FASTSEARCH() or memchr() */
#if STRINGLIB_SIZEOF_CHAR == 1
s
++
;
s
=
memchr
(
s
,
u1
,
end
-
s
);
if
(
s
==
NULL
)
return
;
#else
Py_ssize_t
i
;
STRINGLIB_CHAR
ch1
=
(
STRINGLIB_CHAR
)
u1
;
s
++
;
i
=
FASTSEARCH
(
s
,
end
-
s
,
&
ch1
,
1
,
0
,
FAST_SEARCH
);
if
(
i
<
0
)
return
;
s
+=
i
;
#endif
/* restart the dummy loop */
break
;
}
}
}
*
s
=
u2
;
}
}
Objects/unicodeobject.c
View file @
e2cef885
...
...
@@ -605,6 +605,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
...
...
@@ -615,6 +616,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
...
...
@@ -625,6 +627,7 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
...
...
@@ -9927,6 +9930,31 @@ anylib_count(int kind, PyObject *sstr, void* sbuf, Py_ssize_t slen,
return
0
;
}
static
void
replace_1char_inplace
(
PyObject
*
u
,
Py_ssize_t
pos
,
Py_UCS4
u1
,
Py_UCS4
u2
,
Py_ssize_t
maxcount
)
{
int
kind
=
PyUnicode_KIND
(
u
);
void
*
data
=
PyUnicode_DATA
(
u
);
Py_ssize_t
len
=
PyUnicode_GET_LENGTH
(
u
);
if
(
kind
==
PyUnicode_1BYTE_KIND
)
{
ucs1lib_replace_1char_inplace
((
Py_UCS1
*
)
data
+
pos
,
(
Py_UCS1
*
)
data
+
len
,
u1
,
u2
,
maxcount
);
}
else
if
(
kind
==
PyUnicode_2BYTE_KIND
)
{
ucs2lib_replace_1char_inplace
((
Py_UCS2
*
)
data
+
pos
,
(
Py_UCS2
*
)
data
+
len
,
u1
,
u2
,
maxcount
);
}
else
{
assert
(
kind
==
PyUnicode_4BYTE_KIND
);
ucs4lib_replace_1char_inplace
((
Py_UCS4
*
)
data
+
pos
,
(
Py_UCS4
*
)
data
+
len
,
u1
,
u2
,
maxcount
);
}
}
static
PyObject
*
replace
(
PyObject
*
self
,
PyObject
*
str1
,
PyObject
*
str2
,
Py_ssize_t
maxcount
)
...
...
@@ -9943,7 +9971,7 @@ replace(PyObject *self, PyObject *str1,
Py_ssize_t
len1
=
PyUnicode_GET_LENGTH
(
str1
);
Py_ssize_t
len2
=
PyUnicode_GET_LENGTH
(
str2
);
int
mayshrink
;
Py_UCS4
maxchar
,
maxchar_str2
;
Py_UCS4
maxchar
,
maxchar_str
1
,
maxchar_str
2
;
if
(
maxcount
<
0
)
maxcount
=
PY_SSIZE_T_MAX
;
...
...
@@ -9952,15 +9980,16 @@ replace(PyObject *self, PyObject *str1,
if
(
str1
==
str2
)
goto
nothing
;
if
(
skind
<
kind1
)
/* substring too wide to be present */
goto
nothing
;
maxchar
=
PyUnicode_MAX_CHAR_VALUE
(
self
);
maxchar_str1
=
PyUnicode_MAX_CHAR_VALUE
(
str1
);
if
(
maxchar
<
maxchar_str1
)
/* substring too wide to be present */
goto
nothing
;
maxchar_str2
=
PyUnicode_MAX_CHAR_VALUE
(
str2
);
/* Replacing str1 with str2 may cause a maxchar reduction in the
result string. */
mayshrink
=
(
maxchar_str2
<
maxchar
);
mayshrink
=
(
maxchar_str2
<
maxchar
_str1
)
&&
(
maxchar
==
maxchar_str1
);
maxchar
=
MAX_MAXCHAR
(
maxchar
,
maxchar_str2
);
if
(
len1
==
len2
)
{
...
...
@@ -9970,36 +9999,19 @@ replace(PyObject *self, PyObject *str1,
if
(
len1
==
1
)
{
/* replace characters */
Py_UCS4
u1
,
u2
;
int
rkind
;
Py_ssize_t
index
,
pos
;
char
*
src
,
*
rbuf
;
Py_ssize_t
pos
;
u1
=
PyUnicode_READ
(
kind1
,
buf1
,
0
);
pos
=
findchar
(
sbuf
,
PyUnicode_KIND
(
self
)
,
slen
,
u1
,
1
);
pos
=
findchar
(
sbuf
,
skind
,
slen
,
u1
,
1
);
if
(
pos
<
0
)
goto
nothing
;
u2
=
PyUnicode_READ
(
kind2
,
buf2
,
0
);
u
=
PyUnicode_New
(
slen
,
maxchar
);
if
(
!
u
)
goto
error
;
_PyUnicode_FastCopyCharacters
(
u
,
0
,
self
,
0
,
slen
);
rkind
=
PyUnicode_KIND
(
u
);
rbuf
=
PyUnicode_DATA
(
u
);
PyUnicode_WRITE
(
rkind
,
rbuf
,
pos
,
u2
);
index
=
0
;
src
=
sbuf
;
while
(
--
maxcount
)
{
pos
++
;
src
+=
pos
*
PyUnicode_KIND
(
self
);
slen
-=
pos
;
index
+=
pos
;
pos
=
findchar
(
src
,
PyUnicode_KIND
(
self
),
slen
,
u1
,
1
);
if
(
pos
<
0
)
break
;
PyUnicode_WRITE
(
rkind
,
rbuf
,
index
+
pos
,
u2
);
}
_PyUnicode_FastCopyCharacters
(
u
,
0
,
self
,
0
,
slen
);
replace_1char_inplace
(
u
,
pos
,
u1
,
u2
,
maxcount
);
}
else
{
int
rkind
=
skind
;
...
...
PC/VS9.0/pythoncore.vcproj
View file @
e2cef885
...
...
@@ -1586,6 +1586,10 @@
RelativePath=
"..\..\Objects\rangeobject.c"
>
</File>
<File
RelativePath=
"..\..\Objects\stringlib\replace.h"
>
</File>
<File
RelativePath=
"..\..\Objects\setobject.c"
>
...
...
PCbuild/pythoncore.vcxproj
View file @
e2cef885
...
...
@@ -475,6 +475,7 @@
<ClInclude
Include=
"..\Objects\stringlib\fastsearch.h"
/>
<ClInclude
Include=
"..\Objects\stringlib\find.h"
/>
<ClInclude
Include=
"..\Objects\stringlib\partition.h"
/>
<ClInclude
Include=
"..\Objects\stringlib\replace.h"
/>
<ClInclude
Include=
"..\Objects\stringlib\split.h"
/>
<ClInclude
Include=
"..\Objects\unicodetype_db.h"
/>
<ClInclude
Include=
"..\Parser\parser.h"
/>
...
...
PCbuild/pythoncore.vcxproj.filters
View file @
e2cef885
...
...
@@ -378,6 +378,9 @@
<ClInclude
Include=
"..\Objects\stringlib\partition.h"
>
<Filter>
Objects
</Filter>
</ClInclude>
<ClInclude
Include=
"..\Objects\stringlib\replace.h"
>
<Filter>
Objects
</Filter>
</ClInclude>
<ClInclude
Include=
"..\Objects\stringlib\split.h"
>
<Filter>
Objects
</Filter>
</ClInclude>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment