Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
c3289b5f
Commit
c3289b5f
authored
Aug 07, 2008
by
Antoine Pitrou
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
issue #3460: PyUnicode_Join() implementation can be simplified in py3k
parent
94743105
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
58 additions
and
90 deletions
+58
-90
Misc/NEWS
Misc/NEWS
+4
-0
Objects/unicodeobject.c
Objects/unicodeobject.c
+54
-90
No files found.
Misc/NEWS
View file @
c3289b5f
...
@@ -22,6 +22,10 @@ Core and Builtins
...
@@ -22,6 +22,10 @@ Core and Builtins
If you need to access the UTF-8 representation of a Unicode object
If you need to access the UTF-8 representation of a Unicode object
as bytes string, please use PyUnicode_AsUTF8String() instead.
as bytes string, please use PyUnicode_AsUTF8String() instead.
- Issue #3460: PyUnicode_Join() implementation is 10% to 80% faster thanks
to Python 3.0's stricter semantics which allow to avoid successive
reallocations of the result string (this also affects str.join()).
Library
Library
-------
-------
...
...
Objects/unicodeobject.c
View file @
c3289b5f
...
@@ -5619,78 +5619,70 @@ int fixtitle(PyUnicodeObject *self)
...
@@ -5619,78 +5619,70 @@ int fixtitle(PyUnicodeObject *self)
PyObject
*
PyObject
*
PyUnicode_Join
(
PyObject
*
separator
,
PyObject
*
seq
)
PyUnicode_Join
(
PyObject
*
separator
,
PyObject
*
seq
)
{
{
PyObject
*
internal_separator
=
NULL
;
const
Py_UNICODE
blank
=
' '
;
const
Py_UNICODE
blank
=
' '
;
const
Py_UNICODE
*
sep
=
&
blank
;
const
Py_UNICODE
*
sep
=
&
blank
;
Py_ssize_t
seplen
=
1
;
Py_ssize_t
seplen
=
1
;
PyUnicodeObject
*
res
=
NULL
;
/* the result */
PyUnicodeObject
*
res
=
NULL
;
/* the result */
Py_ssize_t
res_alloc
=
100
;
/* # allocated bytes for string in res */
Py_ssize_t
res_used
;
/* # used bytes */
Py_UNICODE
*
res_p
;
/* pointer to free byte in res's string area */
Py_UNICODE
*
res_p
;
/* pointer to free byte in res's string area */
PyObject
*
fseq
;
/* PySequence_Fast(seq) */
PyObject
*
fseq
;
/* PySequence_Fast(seq) */
Py_ssize_t
seqlen
;
/* len(fseq) -- number of items in sequence */
Py_ssize_t
seqlen
;
/* len(fseq) -- number of items in sequence */
PyObject
**
items
;
PyObject
*
item
;
PyObject
*
item
;
Py_ssize_t
i
;
Py_ssize_t
sz
,
i
;
fseq
=
PySequence_Fast
(
seq
,
""
);
fseq
=
PySequence_Fast
(
seq
,
""
);
if
(
fseq
==
NULL
)
{
if
(
fseq
==
NULL
)
{
return
NULL
;
return
NULL
;
}
}
/* Grrrr. A codec may be invoked to convert str objects to
/* NOTE: the following code can't call back into Python code,
* Unicode, and so it's possible to call back into Python code
* so we are sure that fseq won't be mutated.
* during PyUnicode_FromObject(), and so it's possible for a sick
* codec to change the size of fseq (if seq is a list). Therefore
* we have to keep refetching the size -- can't assume seqlen
* is invariant.
*/
*/
seqlen
=
PySequence_Fast_GET_SIZE
(
fseq
);
seqlen
=
PySequence_Fast_GET_SIZE
(
fseq
);
/* If empty sequence, return u"". */
/* If empty sequence, return u"". */
if
(
seqlen
==
0
)
{
if
(
seqlen
==
0
)
{
res
=
_PyUnicode_New
(
0
);
/* empty sequence; return u"" */
res
=
_PyUnicode_New
(
0
);
/* empty sequence; return u"" */
goto
Done
;
goto
Done
;
}
}
items
=
PySequence_Fast_ITEMS
(
fseq
);
/* If singleton sequence with an exact Unicode, return that. */
/* If singleton sequence with an exact Unicode, return that. */
if
(
seqlen
==
1
)
{
if
(
seqlen
==
1
)
{
item
=
PySequence_Fast_GET_ITEM
(
fseq
,
0
)
;
item
=
items
[
0
]
;
if
(
PyUnicode_CheckExact
(
item
))
{
if
(
PyUnicode_CheckExact
(
item
))
{
Py_INCREF
(
item
);
Py_INCREF
(
item
);
res
=
(
PyUnicodeObject
*
)
item
;
res
=
(
PyUnicodeObject
*
)
item
;
goto
Done
;
goto
Done
;
}
}
}
}
else
{
/* At least two items to join, or one that isn't exact Unicode. */
/* Set up sep and seplen */
if
(
seqlen
>
1
)
{
if
(
separator
==
NULL
)
{
/* Set up sep and seplen -- they're needed. */
sep
=
&
blank
;
if
(
separator
==
NULL
)
{
seplen
=
1
;
sep
=
&
blank
;
seplen
=
1
;
}
}
else
{
else
{
internal_separator
=
PyUnicode_FromObject
(
separator
);
if
(
!
PyUnicode_Check
(
separator
))
{
if
(
internal_separator
==
NULL
)
PyErr_Format
(
PyExc_TypeError
,
goto
onError
;
"separator: expected str instance,"
sep
=
PyUnicode_AS_UNICODE
(
internal_separator
);
" %.80s found"
,
seplen
=
PyUnicode_GET_SIZE
(
internal_separator
);
Py_TYPE
(
separator
)
->
tp_name
);
/* In case PyUnicode_FromObject() mutated seq. */
goto
onError
;
seqlen
=
PySequence_Fast_GET_SIZE
(
fseq
);
}
sep
=
PyUnicode_AS_UNICODE
(
separator
);
seplen
=
PyUnicode_GET_SIZE
(
separator
);
}
}
}
}
/* Get space. */
/* There are at least two things to join, or else we have a subclass
res
=
_PyUnicode_New
(
res_alloc
);
* of str in the sequence.
if
(
res
==
NULL
)
* Do a pre-pass to figure out the total amount of space we'll
goto
onError
;
* need (sz), and see whether all argument are strings.
res_p
=
PyUnicode_AS_UNICODE
(
res
);
*/
res_used
=
0
;
sz
=
0
;
for
(
i
=
0
;
i
<
seqlen
;
i
++
)
{
for
(
i
=
0
;
i
<
seqlen
;
++
i
)
{
const
Py_ssize_t
old_sz
=
sz
;
Py_ssize_t
itemlen
;
item
=
items
[
i
];
Py_ssize_t
new_res_used
;
item
=
PySequence_Fast_GET_ITEM
(
fseq
,
i
);
/* Convert item to Unicode. */
if
(
!
PyUnicode_Check
(
item
))
{
if
(
!
PyUnicode_Check
(
item
))
{
PyErr_Format
(
PyExc_TypeError
,
PyErr_Format
(
PyExc_TypeError
,
"sequence item %zd: expected str instance,"
"sequence item %zd: expected str instance,"
...
@@ -5698,68 +5690,40 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
...
@@ -5698,68 +5690,40 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
i
,
Py_TYPE
(
item
)
->
tp_name
);
i
,
Py_TYPE
(
item
)
->
tp_name
);
goto
onError
;
goto
onError
;
}
}
item
=
PyUnicode_FromObject
(
item
);
sz
+=
PyUnicode_GET_SIZE
(
item
);
if
(
item
==
NULL
)
if
(
i
!=
0
)
goto
onError
;
sz
+=
seplen
;
/* We own a reference to item from here on. */
if
(
sz
<
old_sz
||
sz
>
PY_SSIZE_T_MAX
)
{
PyErr_SetString
(
PyExc_OverflowError
,
/* In case PyUnicode_FromObject() mutated seq. */
"join() result is too long for a Python string"
);
seqlen
=
PySequence_Fast_GET_SIZE
(
fseq
);
goto
onError
;
}
/* Make sure we have enough space for the separator and the item. */
}
itemlen
=
PyUnicode_GET_SIZE
(
item
);
new_res_used
=
res_used
+
itemlen
;
if
(
new_res_used
<
0
)
goto
Overflow
;
if
(
i
<
seqlen
-
1
)
{
new_res_used
+=
seplen
;
if
(
new_res_used
<
0
)
goto
Overflow
;
}
if
(
new_res_used
>
res_alloc
)
{
/* double allocated size until it's big enough */
do
{
res_alloc
+=
res_alloc
;
if
(
res_alloc
<=
0
)
goto
Overflow
;
}
while
(
new_res_used
>
res_alloc
);
if
(
_PyUnicode_Resize
(
&
res
,
res_alloc
)
<
0
)
{
Py_DECREF
(
item
);
goto
onError
;
}
res_p
=
PyUnicode_AS_UNICODE
(
res
)
+
res_used
;
}
res
=
_PyUnicode_New
(
sz
);
if
(
res
==
NULL
)
goto
onError
;
/* Catenate everything. */
res_p
=
PyUnicode_AS_UNICODE
(
res
);
for
(
i
=
0
;
i
<
seqlen
;
++
i
)
{
Py_ssize_t
itemlen
;
item
=
items
[
i
];
itemlen
=
PyUnicode_GET_SIZE
(
item
);
/* Copy item, and maybe the separator. */
/* Copy item, and maybe the separator. */
Py_UNICODE_COPY
(
res_p
,
PyUnicode_AS_UNICODE
(
item
),
itemlen
);
if
(
i
)
{
res_p
+=
itemlen
;
if
(
i
<
seqlen
-
1
)
{
Py_UNICODE_COPY
(
res_p
,
sep
,
seplen
);
Py_UNICODE_COPY
(
res_p
,
sep
,
seplen
);
res_p
+=
seplen
;
res_p
+=
seplen
;
}
}
Py_
DECREF
(
item
);
Py_
UNICODE_COPY
(
res_p
,
PyUnicode_AS_UNICODE
(
item
),
itemlen
);
res_
used
=
new_res_used
;
res_
p
+=
itemlen
;
}
}
/* Shrink res to match the used area; this probably can't fail,
* but it's cheap to check.
*/
if
(
_PyUnicode_Resize
(
&
res
,
res_used
)
<
0
)
goto
onError
;
Done:
Done:
Py_XDECREF
(
internal_separator
);
Py_DECREF
(
fseq
);
Py_DECREF
(
fseq
);
return
(
PyObject
*
)
res
;
return
(
PyObject
*
)
res
;
Overflow:
PyErr_SetString
(
PyExc_OverflowError
,
"join() result is too long for a Python string"
);
Py_DECREF
(
item
);
/* fall through */
onError:
onError:
Py_XDECREF
(
internal_separator
);
Py_DECREF
(
fseq
);
Py_DECREF
(
fseq
);
Py_XDECREF
(
res
);
Py_XDECREF
(
res
);
return
NULL
;
return
NULL
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment