Commit ae72c680 authored by Stefan Behnel's avatar Stefan Behnel

optimise unicode.encode() call with constant encoding parameters

parent 115ee21a
...@@ -9,10 +9,12 @@ import Symtab ...@@ -9,10 +9,12 @@ import Symtab
import Options import Options
from Code import UtilityCode from Code import UtilityCode
from StringEncoding import EncodedString from StringEncoding import EncodedString, BytesLiteral
from Errors import error from Errors import error
from ParseTreeTransforms import SkipDeclarations from ParseTreeTransforms import SkipDeclarations
import codecs
try: try:
reduce reduce
except NameError: except NameError:
...@@ -784,6 +786,112 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform): ...@@ -784,6 +786,112 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform):
node, "PyList_Reverse", self.single_param_func_type, node, "PyList_Reverse", self.single_param_func_type,
'reverse', is_unbound_method, args) 'reverse', is_unbound_method, args)
PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType(
Builtin.bytes_type, [
PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None),
PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
],
exception_value = "NULL")
PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType(
Builtin.bytes_type, [
PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None),
],
exception_value = "NULL")
_special_encodings = ['UTF8', 'UTF16', 'Latin1', 'ASCII',
'unicode_escape', 'raw_unicode_escape']
_special_encoders = [ (name, codecs.getencoder(name))
for name in _special_encodings ]
def _handle_simple_method_unicode_encode(self, node, args, is_unbound_method):
if len(args) < 1 or len(args) > 3:
error(node.pos, "unicode.encode(...) called with wrong number of args, found %d" %
len(args))
return node
null_node = ExprNodes.NullNode(node.pos)
string_node = args[0]
if len(args) == 1:
return self._substitute_method_call(
node, "PyUnicode_AsEncodedString",
self.PyUnicode_AsEncodedString_func_type,
'encode', is_unbound_method, [string_node, null_node, null_node])
encoding_node = args[1]
if isinstance(encoding_node, ExprNodes.CoerceToPyTypeNode):
encoding_node = encoding_node.arg
if not isinstance(encoding_node, (ExprNodes.UnicodeNode, ExprNodes.StringNode)):
return node
encoding = encoding_node.value
encoding_node = ExprNodes.StringNode(encoding_node.pos, value=encoding,
type=PyrexTypes.c_char_ptr_type)
if len(args) == 3:
error_handling_node = args[2]
if isinstance(error_handling_node, ExprNodes.CoerceToPyTypeNode):
error_handling_node = error_handling_node.arg
if not isinstance(error_handling_node,
(ExprNodes.UnicodeNode, ExprNodes.StringNode)):
return node
error_handling = error_handling_node.value
if error_handling == 'strict':
error_handling_node = null_node
else:
error_handling_node = ExprNodes.StringNode(
error_handling_node.pos, value=error_handling,
type=PyrexTypes.c_char_ptr_type)
else:
error_handling = 'strict'
error_handling_node = null_node
if isinstance(string_node, ExprNodes.UnicodeNode):
# constant, so try to do the encoding at compile time
try:
value = string_node.value.encode(encoding, error_handling)
except:
# well, looks like we can't
pass
else:
value = BytesLiteral(value)
value.encoding = encoding
return ExprNodes.StringNode(
string_node.pos, value=value, type=Builtin.bytes_type)
if error_handling == 'strict':
# try to find a specific encoder function
try: requested_encoder = codecs.getencoder(encoding)
except: pass
else:
encode_function = None
for name, encoder in self._special_encoders:
if encoder == requested_encoder:
if '_' in name:
name = ''.join([ s.capitalize()
for s in name.split('_')])
encode_function = "PyUnicode_As%sString" % name
break
if encode_function is not None:
return self._substitute_method_call(
node, encode_function,
self.PyUnicode_AsXyzString_func_type,
'encode', is_unbound_method, [string_node])
if len(args) == 2:
return self._substitute_method_call(
node, "PyUnicode_AsEncodedString",
self.PyUnicode_AsEncodedString_func_type,
'encode', is_unbound_method, [string_node, encoding_node])
return self._substitute_method_call(
node, "PyUnicode_AsEncodedString",
self.PyUnicode_AsEncodedString_func_type,
'encode', is_unbound_method,
[string_node, encoding_node, error_handling_node])
def _substitute_method_call(self, node, name, func_type, def _substitute_method_call(self, node, name, func_type,
attr_name, is_unbound_method, args=()): attr_name, is_unbound_method, args=()):
args = list(args) args = list(args)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment