Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
89a76abf
Commit
89a76abf
authored
Apr 05, 2014
by
Victor Stinner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #21118: Optimize str.translate() for ASCII => ASCII translation
parent
5a29f258
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
120 additions
and
1 deletion
+120
-1
Objects/unicodeobject.c
Objects/unicodeobject.c
+120
-1
No files found.
Objects/unicodeobject.c
View file @
89a76abf
...
...
@@ -8545,6 +8545,116 @@ charmaptranslate_output(Py_UCS4 ch, PyObject *mapping,
return
1
;
}
static
int
unicode_fast_translate_lookup
(
PyObject
*
mapping
,
Py_UCS1
ch
,
Py_UCS1
*
translate
)
{
PyObject
*
item
;
int
ret
=
0
;
item
=
NULL
;
if
(
charmaptranslate_lookup
(
ch
,
mapping
,
&
item
))
{
return
-
1
;
}
if
(
item
==
Py_None
)
{
/* deletion: skip fast translate */
goto
exit
;
}
if
(
item
==
NULL
)
{
/* not found => default to 1:1 mapping */
translate
[
ch
]
=
ch
;
return
1
;
}
if
(
PyLong_Check
(
item
))
{
long
replace
=
(
Py_UCS4
)
PyLong_AS_LONG
(
item
);
if
(
replace
==
-
1
)
{
Py_DECREF
(
item
);
return
-
1
;
}
if
(
replace
<
0
||
127
<
replace
)
{
/* invalid character or character outside ASCII:
skip the fast translate */
goto
exit
;
}
translate
[
ch
]
=
(
Py_UCS1
)
replace
;
}
else
if
(
PyUnicode_Check
(
item
))
{
Py_UCS4
replace
;
if
(
PyUnicode_READY
(
item
)
==
-
1
)
{
Py_DECREF
(
item
);
return
-
1
;
}
if
(
PyUnicode_GET_LENGTH
(
item
)
!=
1
)
goto
exit
;
replace
=
PyUnicode_READ_CHAR
(
item
,
0
);
if
(
replace
>
127
)
goto
exit
;
translate
[
ch
]
=
(
Py_UCS1
)
replace
;
}
else
{
/* not a long or unicode */
goto
exit
;
}
Py_DECREF
(
item
);
item
=
NULL
;
ret
=
1
;
exit:
Py_XDECREF
(
item
);
return
ret
;
}
/* Fast path for ascii => ascii translation. Return 1 if the whole string
was translated into writer, return 0 if the input string was partially
translated into writer, raise an exception and return -1 on error. */
static
int
unicode_fast_translate
(
PyObject
*
input
,
PyObject
*
mapping
,
_PyUnicodeWriter
*
writer
)
{
Py_UCS1
translate
[
128
],
ch
,
ch2
;
Py_ssize_t
len
;
Py_UCS1
*
in
,
*
end
,
*
out
;
int
res
;
if
(
PyUnicode_READY
(
input
)
==
-
1
)
return
-
1
;
if
(
!
PyUnicode_IS_ASCII
(
input
))
return
0
;
len
=
PyUnicode_GET_LENGTH
(
input
);
memset
(
translate
,
0xff
,
128
);
in
=
PyUnicode_1BYTE_DATA
(
input
);
end
=
in
+
len
;
assert
(
PyUnicode_IS_ASCII
(
writer
->
buffer
));
assert
(
PyUnicode_GET_LENGTH
(
writer
->
buffer
)
==
len
);
out
=
PyUnicode_1BYTE_DATA
(
writer
->
buffer
);
for
(;
in
<
end
;
in
++
,
out
++
)
{
ch
=
*
in
;
ch2
=
translate
[
ch
];
if
(
ch2
==
0xff
)
{
res
=
unicode_fast_translate_lookup
(
mapping
,
ch
,
translate
);
if
(
res
<
0
)
return
-
1
;
if
(
res
==
0
)
{
writer
->
pos
=
in
-
PyUnicode_1BYTE_DATA
(
input
);
return
0
;
}
ch2
=
translate
[
ch
];
}
*
out
=
ch2
;
}
writer
->
pos
=
len
;
return
1
;
}
PyObject
*
_PyUnicode_TranslateCharmap
(
PyObject
*
input
,
PyObject
*
mapping
,
...
...
@@ -8561,6 +8671,7 @@ _PyUnicode_TranslateCharmap(PyObject *input,
PyObject
*
errorHandler
=
NULL
;
PyObject
*
exc
=
NULL
;
int
ignore
;
int
res
;
if
(
mapping
==
NULL
)
{
PyErr_BadArgument
();
...
...
@@ -8584,9 +8695,17 @@ _PyUnicode_TranslateCharmap(PyObject *input,
if
(
_PyUnicodeWriter_Prepare
(
&
writer
,
size
,
127
)
==
-
1
)
goto
onError
;
res
=
unicode_fast_translate
(
input
,
mapping
,
&
writer
);
if
(
res
<
0
)
{
_PyUnicodeWriter_Dealloc
(
&
writer
);
return
NULL
;
}
if
(
res
==
1
)
return
_PyUnicodeWriter_Finish
(
&
writer
);
ignore
=
(
errors
!=
NULL
&&
strcmp
(
errors
,
"ignore"
)
==
0
);
i
=
0
;
i
=
writer
.
pos
;
while
(
i
<
size
)
{
/* try to encode it */
int
translate
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment