Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Boxiang Sun
cython
Commits
a664239a
Commit
a664239a
authored
Mar 03, 2013
by
Nikita Nemkin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Basic support for Py_UNICODE* strings.
parent
0d651b18
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
150 additions
and
16 deletions
+150
-16
Cython/Compiler/Code.py
Cython/Compiler/Code.py
+28
-0
Cython/Compiler/ExprNodes.py
Cython/Compiler/ExprNodes.py
+60
-13
Cython/Compiler/Optimize.py
Cython/Compiler/Optimize.py
+13
-1
Cython/Compiler/PyrexTypes.py
Cython/Compiler/PyrexTypes.py
+14
-2
Cython/Compiler/StringEncoding.py
Cython/Compiler/StringEncoding.py
+12
-0
Cython/Utility/StringTools.c
Cython/Utility/StringTools.c
+14
-0
Cython/Utility/TypeConversion.c
Cython/Utility/TypeConversion.c
+9
-0
No files found.
Cython/Compiler/Code.py
View file @
a664239a
...
@@ -778,6 +778,16 @@ class StringConst(object):
...
@@ -778,6 +778,16 @@ class StringConst(object):
self.py_strings[key] = py_string
self.py_strings[key] = py_string
return py_string
return py_string
class UnicodeConst(object):
"""Global info about a Py_UNICODE[] constant held by GlobalState.
"""
# cname string
# text EncodedString (unicode)
def __init__(self, cname, text):
self.cname = cname
self.text = text
class PyStringConst(object):
class PyStringConst(object):
"""Global info about a Python string constant held by GlobalState.
"""Global info about a Python string constant held by GlobalState.
"""
"""
...
@@ -873,6 +883,7 @@ class GlobalState(object):
...
@@ -873,6 +883,7 @@ class GlobalState(object):
self.const_cname_counter = 1
self.const_cname_counter = 1
self.string_const_index = {}
self.string_const_index = {}
self.unicode_const_index = {}
self.int_const_index = {}
self.int_const_index = {}
self.py_constants = []
self.py_constants = []
...
@@ -1016,6 +1027,16 @@ class GlobalState(object):
...
@@ -1016,6 +1027,16 @@ class GlobalState(object):
c
.
add_py_version
(
py_version
)
c
.
add_py_version
(
py_version
)
return
c
return
c
def
get_unicode_const
(
self
,
text
):
# return a Py_UNICODE[] constant, creating a new one if necessary
assert
text
.
is_unicode
try
:
c
=
self
.
unicode_const_index
[
text
]
except
KeyError
:
c
=
UnicodeConst
(
self
.
new_const_cname
(),
text
)
self
.
unicode_const_index
[
text
]
=
c
return
c
def
get_py_string_const
(
self
,
text
,
identifier
=
None
,
def
get_py_string_const
(
self
,
text
,
identifier
=
None
,
is_str
=
False
,
unicode_value
=
None
):
is_str
=
False
,
unicode_value
=
None
):
# return a Python string constant, creating a new one if necessary
# return a Python string constant, creating a new one if necessary
...
@@ -1141,6 +1162,10 @@ class GlobalState(object):
...
@@ -1141,6 +1162,10 @@ class GlobalState(object):
for
py_string
in
c
.
py_strings
.
values
():
for
py_string
in
c
.
py_strings
.
values
():
py_strings
.
append
((
c
.
cname
,
len
(
py_string
.
cname
),
py_string
))
py_strings
.
append
((
c
.
cname
,
len
(
py_string
.
cname
),
py_string
))
for
c
in
self
.
unicode_const_index
.
values
():
decls_writer
.
putln
(
'static Py_UNICODE %s[] = { %s };'
%
(
c
.
cname
,
StringEncoding
.
encode_py_unicode_string
(
c
.
text
)))
if
py_strings
:
if
py_strings
:
self
.
use_utility_code
(
UtilityCode
.
load_cached
(
"InitStrings"
,
"StringTools.c"
))
self
.
use_utility_code
(
UtilityCode
.
load_cached
(
"InitStrings"
,
"StringTools.c"
))
py_strings
.
sort
()
py_strings
.
sort
()
...
@@ -1435,6 +1460,9 @@ class CCodeWriter(object):
...
@@ -1435,6 +1460,9 @@ class CCodeWriter(object):
def
get_string_const
(
self
,
text
):
def
get_string_const
(
self
,
text
):
return
self
.
globalstate
.
get_string_const
(
text
).
cname
return
self
.
globalstate
.
get_string_const
(
text
).
cname
def
get_unicode_const
(
self
,
text
):
return
self
.
globalstate
.
get_unicode_const
(
text
).
cname
def
get_py_string_const
(
self
,
text
,
identifier
=
None
,
def
get_py_string_const
(
self
,
text
,
identifier
=
None
,
is_str
=
False
,
unicode_value
=
None
):
is_str
=
False
,
unicode_value
=
None
):
return
self
.
globalstate
.
get_py_string_const
(
return
self
.
globalstate
.
get_py_string_const
(
...
...
Cython/Compiler/ExprNodes.py
View file @
a664239a
...
@@ -63,14 +63,16 @@ coercion_error_dict = {
...
@@ -63,14 +63,16 @@ coercion_error_dict = {
# string related errors
# string related errors
(
Builtin
.
unicode_type
,
Builtin
.
bytes_type
)
:
"Cannot convert Unicode string to 'bytes' implicitly, encoding required."
,
(
Builtin
.
unicode_type
,
Builtin
.
bytes_type
)
:
"Cannot convert Unicode string to 'bytes' implicitly, encoding required."
,
(
Builtin
.
unicode_type
,
Builtin
.
str_type
)
:
"Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding."
,
(
Builtin
.
unicode_type
,
Builtin
.
str_type
)
:
"Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding."
,
(
Builtin
.
unicode_type
,
PyrexTypes
.
c_char_ptr_type
)
:
"Unicode objects
do not support coercion to C types
."
,
(
Builtin
.
unicode_type
,
PyrexTypes
.
c_char_ptr_type
)
:
"Unicode objects
only support coercion to Py_UNICODE*
."
,
(
Builtin
.
unicode_type
,
PyrexTypes
.
c_uchar_ptr_type
)
:
"Unicode objects
do not support coercion to C types
."
,
(
Builtin
.
unicode_type
,
PyrexTypes
.
c_uchar_ptr_type
)
:
"Unicode objects
only support coercion to Py_UNICODE*
."
,
(
Builtin
.
bytes_type
,
Builtin
.
unicode_type
)
:
"Cannot convert 'bytes' object to unicode implicitly, decoding required"
,
(
Builtin
.
bytes_type
,
Builtin
.
unicode_type
)
:
"Cannot convert 'bytes' object to unicode implicitly, decoding required"
,
(
Builtin
.
bytes_type
,
Builtin
.
str_type
)
:
"Cannot convert 'bytes' object to str implicitly. This is not portable to Py3."
,
(
Builtin
.
bytes_type
,
Builtin
.
str_type
)
:
"Cannot convert 'bytes' object to str implicitly. This is not portable to Py3."
,
(
Builtin
.
bytes_type
,
PyrexTypes
.
c_py_unicode_ptr_type
)
:
"Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'."
,
(
Builtin
.
str_type
,
Builtin
.
unicode_type
)
:
"str objects do not support coercion to unicode, use a unicode string literal instead (u'')"
,
(
Builtin
.
str_type
,
Builtin
.
unicode_type
)
:
"str objects do not support coercion to unicode, use a unicode string literal instead (u'')"
,
(
Builtin
.
str_type
,
Builtin
.
bytes_type
)
:
"Cannot convert 'str' to 'bytes' implicitly. This is not portable."
,
(
Builtin
.
str_type
,
Builtin
.
bytes_type
)
:
"Cannot convert 'str' to 'bytes' implicitly. This is not portable."
,
(
Builtin
.
str_type
,
PyrexTypes
.
c_char_ptr_type
)
:
"'str' objects do not support coercion to C types (use 'bytes'?)."
,
(
Builtin
.
str_type
,
PyrexTypes
.
c_char_ptr_type
)
:
"'str' objects do not support coercion to C types (use 'bytes'?)."
,
(
Builtin
.
str_type
,
PyrexTypes
.
c_uchar_ptr_type
)
:
"'str' objects do not support coercion to C types (use 'bytes'?)."
,
(
Builtin
.
str_type
,
PyrexTypes
.
c_uchar_ptr_type
)
:
"'str' objects do not support coercion to C types (use 'bytes'?)."
,
(
Builtin
.
str_type
,
PyrexTypes
.
c_py_unicode_ptr_type
)
:
"'str' objects do not support coercion to C types (use 'unicode'?)."
,
(
PyrexTypes
.
c_char_ptr_type
,
Builtin
.
unicode_type
)
:
"Cannot convert 'char*' to unicode implicitly, decoding required"
,
(
PyrexTypes
.
c_char_ptr_type
,
Builtin
.
unicode_type
)
:
"Cannot convert 'char*' to unicode implicitly, decoding required"
,
(
PyrexTypes
.
c_uchar_ptr_type
,
Builtin
.
unicode_type
)
:
"Cannot convert 'char*' to unicode implicitly, decoding required"
,
(
PyrexTypes
.
c_uchar_ptr_type
,
Builtin
.
unicode_type
)
:
"Cannot convert 'char*' to unicode implicitly, decoding required"
,
}
}
...
@@ -1171,8 +1173,8 @@ class BytesNode(ConstNode):
...
@@ -1171,8 +1173,8 @@ class BytesNode(ConstNode):
return
self
.
result_code
return
self
.
result_code
class
UnicodeNode
(
Py
ConstNode
):
class
UnicodeNode
(
ConstNode
):
# A Py
thon unicode object
# A Py
_UNICODE* or unicode literal
#
#
# value EncodedString
# value EncodedString
# bytes_value BytesLiteral the literal parsed as bytes string ('-3' unicode literals only)
# bytes_value BytesLiteral the literal parsed as bytes string ('-3' unicode literals only)
...
@@ -1213,7 +1215,11 @@ class UnicodeNode(PyConstNode):
...
@@ -1213,7 +1215,11 @@ class UnicodeNode(PyConstNode):
if
dst_type
.
is_string
and
self
.
bytes_value
is
not
None
:
if
dst_type
.
is_string
and
self
.
bytes_value
is
not
None
:
# special case: '-3' enforced unicode literal used in a C char* context
# special case: '-3' enforced unicode literal used in a C char* context
return
BytesNode
(
self
.
pos
,
value
=
self
.
bytes_value
).
coerce_to
(
dst_type
,
env
)
return
BytesNode
(
self
.
pos
,
value
=
self
.
bytes_value
).
coerce_to
(
dst_type
,
env
)
error
(
self
.
pos
,
"Unicode literals do not support coercion to C types other than Py_UNICODE or Py_UCS4."
)
if
dst_type
.
is_unicode
:
node
=
UnicodeNode
(
self
.
pos
,
value
=
self
.
value
)
node
.
type
=
dst_type
return
node
error
(
self
.
pos
,
"Unicode literals do not support coercion to C types other than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* (for strings)."
)
elif
dst_type
is
not
py_object_type
:
elif
dst_type
is
not
py_object_type
:
if
not
self
.
check_for_coercion_error
(
dst_type
,
env
):
if
not
self
.
check_for_coercion_error
(
dst_type
,
env
):
self
.
fail_assignment
(
dst_type
)
self
.
fail_assignment
(
dst_type
)
...
@@ -1225,11 +1231,20 @@ class UnicodeNode(PyConstNode):
...
@@ -1225,11 +1231,20 @@ class UnicodeNode(PyConstNode):
## and (0xD800 <= self.value[0] <= 0xDBFF)
## and (0xD800 <= self.value[0] <= 0xDBFF)
## and (0xDC00 <= self.value[1] <= 0xDFFF))
## and (0xDC00 <= self.value[1] <= 0xDFFF))
def
coerce_to_boolean
(
self
,
env
):
bool_value
=
bool
(
self
.
value
)
return
BoolNode
(
self
.
pos
,
value
=
bool_value
,
constant_result
=
bool_value
)
def
contains_surrogates
(
self
):
def
contains_surrogates
(
self
):
return
_string_contains_surrogates
(
self
.
value
)
return
_string_contains_surrogates
(
self
.
value
)
def
generate_evaluation_code
(
self
,
code
):
def
generate_evaluation_code
(
self
,
code
):
if
self
.
type
.
is_pyobject
:
self
.
result_code
=
code
.
get_py_string_const
(
self
.
value
)
self
.
result_code
=
code
.
get_py_string_const
(
self
.
value
)
else
:
if
self
.
contains_surrogates
():
warning
(
self
.
pos
,
"Py_UNICODE* literals with characters outside BMP are not portable."
,
level
=
1
);
self
.
result_code
=
code
.
get_unicode_const
(
self
.
value
)
def
calculate_result_code
(
self
):
def
calculate_result_code
(
self
):
return
self
.
result_code
return
self
.
result_code
...
@@ -2633,6 +2648,9 @@ class IndexNode(ExprNode):
...
@@ -2633,6 +2648,9 @@ class IndexNode(ExprNode):
if
base_type
.
is_string
:
if
base_type
.
is_string
:
# sliced C strings must coerce to Python
# sliced C strings must coerce to Python
return
bytes_type
return
bytes_type
elif
base_type
.
is_unicode
:
# sliced Py_UNICODE* strings must coerce to Python
return
unicode_type
elif
base_type
in
(
unicode_type
,
bytes_type
,
str_type
,
list_type
,
tuple_type
):
elif
base_type
in
(
unicode_type
,
bytes_type
,
str_type
,
list_type
,
tuple_type
):
# slicing these returns the same type
# slicing these returns the same type
return
base_type
return
base_type
...
@@ -3446,6 +3464,8 @@ class SliceIndexNode(ExprNode):
...
@@ -3446,6 +3464,8 @@ class SliceIndexNode(ExprNode):
base_type
=
self
.
base
.
infer_type
(
env
)
base_type
=
self
.
base
.
infer_type
(
env
)
if
base_type
.
is_string
or
base_type
.
is_cpp_class
:
if
base_type
.
is_string
or
base_type
.
is_cpp_class
:
return
bytes_type
return
bytes_type
elif
base_type
.
is_unicode
:
return
unicode_type
elif
base_type
in
(
bytes_type
,
str_type
,
unicode_type
,
elif
base_type
in
(
bytes_type
,
str_type
,
unicode_type
,
list_type
,
tuple_type
):
list_type
,
tuple_type
):
return
base_type
return
base_type
...
@@ -3510,6 +3530,8 @@ class SliceIndexNode(ExprNode):
...
@@ -3510,6 +3530,8 @@ class SliceIndexNode(ExprNode):
base_type
=
self
.
base
.
type
base_type
=
self
.
base
.
type
if
base_type
.
is_string
or
base_type
.
is_cpp_string
:
if
base_type
.
is_string
or
base_type
.
is_cpp_string
:
self
.
type
=
default_str_type
(
env
)
self
.
type
=
default_str_type
(
env
)
elif
base_type
.
is_unicode
:
self
.
type
=
unicode_type
elif
base_type
.
is_ptr
:
elif
base_type
.
is_ptr
:
self
.
type
=
base_type
self
.
type
=
base_type
elif
base_type
.
is_array
:
elif
base_type
.
is_array
:
...
@@ -3578,6 +3600,27 @@ class SliceIndexNode(ExprNode):
...
@@ -3578,6 +3600,27 @@ class SliceIndexNode(ExprNode):
stop_code
,
stop_code
,
start_code
,
start_code
,
code
.
error_goto_if_null
(
result
,
self
.
pos
)))
code
.
error_goto_if_null
(
result
,
self
.
pos
)))
elif
self
.
base
.
type
.
is_unicode
:
base_result
=
self
.
base
.
result
()
if
self
.
base
.
type
!=
PyrexTypes
.
c_py_unicode_ptr_type
:
base_result
=
'((const Py_UNICODE*)%s)'
%
base_result
if
self
.
stop
is
None
:
code
.
putln
(
"%s = __Pyx_PyUnicode_FromUnicode(%s + %s); %s"
%
(
result
,
base_result
,
start_code
,
code
.
error_goto_if_null
(
result
,
self
.
pos
)))
else
:
code
.
putln
(
"%s = __Pyx_PyUnicode_FromUnicodeAndLength(%s + %s, %s - %s); %s"
%
(
result
,
base_result
,
start_code
,
stop_code
,
start_code
,
code
.
error_goto_if_null
(
result
,
self
.
pos
)))
elif
self
.
base
.
type
is
unicode_type
:
elif
self
.
base
.
type
is
unicode_type
:
code
.
globalstate
.
use_utility_code
(
code
.
globalstate
.
use_utility_code
(
UtilityCode
.
load_cached
(
"PyUnicode_Substring"
,
"StringTools.c"
))
UtilityCode
.
load_cached
(
"PyUnicode_Substring"
,
"StringTools.c"
))
...
@@ -4903,11 +4946,11 @@ class AttributeNode(ExprNode):
...
@@ -4903,11 +4946,11 @@ class AttributeNode(ExprNode):
self
.
is_py_attr
=
0
self
.
is_py_attr
=
0
self
.
member
=
self
.
attribute
self
.
member
=
self
.
attribute
if
obj_type
is
None
:
if
obj_type
is
None
:
if
self
.
obj
.
type
.
is_string
:
if
self
.
obj
.
type
.
is_string
or
self
.
obj
.
type
.
is_unicode
:
self
.
obj
=
self
.
obj
.
coerce_to_pyobject
(
env
)
self
.
obj
=
self
.
obj
.
coerce_to_pyobject
(
env
)
obj_type
=
self
.
obj
.
type
obj_type
=
self
.
obj
.
type
else
:
else
:
if
obj_type
.
is_string
:
if
obj_type
.
is_string
or
obj_type
.
is_unicode
:
obj_type
=
py_object_type
obj_type
=
py_object_type
if
obj_type
.
is_ptr
or
obj_type
.
is_array
:
if
obj_type
.
is_ptr
or
obj_type
.
is_array
:
obj_type
=
obj_type
.
base_type
obj_type
=
obj_type
.
base_type
...
@@ -8337,8 +8380,12 @@ class BinopNode(ExprNode):
...
@@ -8337,8 +8380,12 @@ class BinopNode(ExprNode):
if
self
.
is_py_operation_types
(
type1
,
type2
):
if
self
.
is_py_operation_types
(
type1
,
type2
):
if
type2
.
is_string
:
if
type2
.
is_string
:
type2
=
Builtin
.
bytes_type
type2
=
Builtin
.
bytes_type
elif
type2
.
is_unicode
:
type2
=
Builtin
.
unicode_type
if
type1
.
is_string
:
if
type1
.
is_string
:
type1
=
Builtin
.
bytes_type
type1
=
Builtin
.
bytes_type
elif
type1
.
is_unicode
:
type1
=
Builtin
.
unicode_type
elif
self
.
operator
==
'%'
\
elif
self
.
operator
==
'%'
\
and
type1
in
(
Builtin
.
str_type
,
Builtin
.
unicode_type
):
and
type1
in
(
Builtin
.
str_type
,
Builtin
.
unicode_type
):
# note that b'%s' % b'abc' doesn't work in Py3
# note that b'%s' % b'abc' doesn't work in Py3
...
@@ -8587,7 +8634,7 @@ class AddNode(NumBinopNode):
...
@@ -8587,7 +8634,7 @@ class AddNode(NumBinopNode):
# '+' operator.
# '+' operator.
def
is_py_operation_types
(
self
,
type1
,
type2
):
def
is_py_operation_types
(
self
,
type1
,
type2
):
if
type1
.
is_string
and
type2
.
is_string
:
if
type1
.
is_string
and
type2
.
is_string
or
type1
.
is_unicode
and
type2
.
is_unicode
:
return
1
return
1
else
:
else
:
return
NumBinopNode
.
is_py_operation_types
(
self
,
type1
,
type2
)
return
NumBinopNode
.
is_py_operation_types
(
self
,
type1
,
type2
)
...
@@ -9950,7 +9997,7 @@ class CoerceToPyTypeNode(CoercionNode):
...
@@ -9950,7 +9997,7 @@ class CoerceToPyTypeNode(CoercionNode):
# be specific about some known types
# be specific about some known types
if
arg
.
type
.
is_string
or
arg
.
type
.
is_cpp_string
:
if
arg
.
type
.
is_string
or
arg
.
type
.
is_cpp_string
:
self
.
type
=
default_str_type
(
env
)
self
.
type
=
default_str_type
(
env
)
elif
arg
.
type
.
is_unicode_char
:
elif
arg
.
type
.
is_unicode
or
arg
.
type
.
is_unicode
_char
:
self
.
type
=
unicode_type
self
.
type
=
unicode_type
elif
arg
.
type
.
is_complex
:
elif
arg
.
type
.
is_complex
:
self
.
type
=
Builtin
.
complex_type
self
.
type
=
Builtin
.
complex_type
...
@@ -10065,13 +10112,13 @@ class CoerceFromPyTypeNode(CoercionNode):
...
@@ -10065,13 +10112,13 @@ class CoerceFromPyTypeNode(CoercionNode):
if
not
result_type
.
create_from_py_utility_code
(
env
):
if
not
result_type
.
create_from_py_utility_code
(
env
):
error
(
arg
.
pos
,
error
(
arg
.
pos
,
"Cannot convert Python object to '%s'"
%
result_type
)
"Cannot convert Python object to '%s'"
%
result_type
)
if
self
.
type
.
is_string
:
if
self
.
type
.
is_string
or
self
.
type
.
is_unicode
:
if
self
.
arg
.
is_ephemeral
():
if
self
.
arg
.
is_ephemeral
():
error
(
arg
.
pos
,
error
(
arg
.
pos
,
"Obtaining
char* from temporary Python value"
)
"Obtaining
'%s' from temporary Python value"
%
result_type
)
elif
self
.
arg
.
is_name
and
self
.
arg
.
entry
and
self
.
arg
.
entry
.
is_pyglobal
:
elif
self
.
arg
.
is_name
and
self
.
arg
.
entry
and
self
.
arg
.
entry
.
is_pyglobal
:
warning
(
arg
.
pos
,
warning
(
arg
.
pos
,
"Obtaining
char* from externally modifiable global Python value"
,
"Obtaining
'%s' from externally modifiable global Python value"
%
result_type
,
level
=
1
)
level
=
1
)
def
analyse_types
(
self
,
env
):
def
analyse_types
(
self
,
env
):
...
...
Cython/Compiler/Optimize.py
View file @
a664239a
...
@@ -1977,6 +1977,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
...
@@ -1977,6 +1977,11 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
PyrexTypes
.
CFuncTypeArg
(
"bytes"
,
PyrexTypes
.
c_char_ptr_type
,
None
)
PyrexTypes
.
CFuncTypeArg
(
"bytes"
,
PyrexTypes
.
c_char_ptr_type
,
None
)
])
])
Pyx_Py_UNICODE_strlen_func_type
=
PyrexTypes
.
CFuncType
(
PyrexTypes
.
c_size_t_type
,
[
PyrexTypes
.
CFuncTypeArg
(
"unicode"
,
PyrexTypes
.
c_py_unicode_ptr_type
,
None
)
])
PyObject_Size_func_type
=
PyrexTypes
.
CFuncType
(
PyObject_Size_func_type
=
PyrexTypes
.
CFuncType
(
PyrexTypes
.
c_py_ssize_t_type
,
[
PyrexTypes
.
c_py_ssize_t_type
,
[
PyrexTypes
.
CFuncTypeArg
(
"obj"
,
PyrexTypes
.
py_object_type
,
None
)
PyrexTypes
.
CFuncTypeArg
(
"obj"
,
PyrexTypes
.
py_object_type
,
None
)
...
@@ -1996,7 +2001,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
...
@@ -1996,7 +2001,8 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
_ext_types_with_pysize
=
set
([
"cpython.array.array"
])
_ext_types_with_pysize
=
set
([
"cpython.array.array"
])
def
_handle_simple_function_len
(
self
,
node
,
pos_args
):
def
_handle_simple_function_len
(
self
,
node
,
pos_args
):
"""Replace len(char*) by the equivalent call to strlen() and
"""Replace len(char*) by the equivalent call to strlen(),
len(Py_UNICODE) by the equivalent Py_UNICODE_strlen() and
len(known_builtin_type) by an equivalent C-API call.
len(known_builtin_type) by an equivalent C-API call.
"""
"""
if
len
(
pos_args
)
!=
1
:
if
len
(
pos_args
)
!=
1
:
...
@@ -2011,6 +2017,12 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
...
@@ -2011,6 +2017,12 @@ class OptimizeBuiltinCalls(Visitor.MethodDispatcherTransform):
args
=
[
arg
],
args
=
[
arg
],
is_temp
=
node
.
is_temp
,
is_temp
=
node
.
is_temp
,
utility_code
=
UtilityCode
.
load_cached
(
"IncludeStringH"
,
"StringTools.c"
))
utility_code
=
UtilityCode
.
load_cached
(
"IncludeStringH"
,
"StringTools.c"
))
elif
arg
.
type
.
is_unicode
:
new_node
=
ExprNodes
.
PythonCapiCallNode
(
node
.
pos
,
"__Pyx_Py_UNICODE_strlen"
,
self
.
Pyx_Py_UNICODE_strlen_func_type
,
args
=
[
arg
],
is_temp
=
node
.
is_temp
,
utility_code
=
UtilityCode
.
load_cached
(
"py_unicode_strlen"
,
"StringTools.c"
))
elif
arg
.
type
.
is_pyobject
:
elif
arg
.
type
.
is_pyobject
:
cfunc_name
=
self
.
_map_to_capi_len_function
(
arg
.
type
)
cfunc_name
=
self
.
_map_to_capi_len_function
(
arg
.
type
)
if
cfunc_name
is
None
:
if
cfunc_name
is
None
:
...
...
Cython/Compiler/PyrexTypes.py
View file @
a664239a
...
@@ -145,6 +145,7 @@ class PyrexType(BaseType):
...
@@ -145,6 +145,7 @@ class PyrexType(BaseType):
# is_enum boolean Is a C enum type
# is_enum boolean Is a C enum type
# is_typedef boolean Is a typedef type
# is_typedef boolean Is a typedef type
# is_string boolean Is a C char * type
# is_string boolean Is a C char * type
# is_unicode boolean Is a C PyUNICODE * type
# is_cpp_string boolean Is a C++ std::string type
# is_cpp_string boolean Is a C++ std::string type
# is_unicode_char boolean Is either Py_UCS4 or Py_UNICODE
# is_unicode_char boolean Is either Py_UCS4 or Py_UNICODE
# is_returncode boolean Is used only to signal exceptions
# is_returncode boolean Is used only to signal exceptions
...
@@ -202,6 +203,7 @@ class PyrexType(BaseType):
...
@@ -202,6 +203,7 @@ class PyrexType(BaseType):
is_enum
=
0
is_enum
=
0
is_typedef
=
0
is_typedef
=
0
is_string
=
0
is_string
=
0
is_unicode
=
0
is_unicode_char
=
0
is_unicode_char
=
0
is_returncode
=
0
is_returncode
=
0
is_error
=
0
is_error
=
0
...
@@ -871,7 +873,7 @@ class PyObjectType(PyrexType):
...
@@ -871,7 +873,7 @@ class PyObjectType(PyrexType):
def
assignable_from
(
self
,
src_type
):
def
assignable_from
(
self
,
src_type
):
# except for pointers, conversion will be attempted
# except for pointers, conversion will be attempted
return
not
src_type
.
is_ptr
or
src_type
.
is_string
return
not
src_type
.
is_ptr
or
src_type
.
is_string
or
src_type
.
is_unicode
def
declaration_code
(
self
,
entity_code
,
def
declaration_code
(
self
,
entity_code
,
for_display
=
0
,
dll_linkage
=
None
,
pyrex
=
0
):
for_display
=
0
,
dll_linkage
=
None
,
pyrex
=
0
):
...
@@ -1161,7 +1163,7 @@ class CType(PyrexType):
...
@@ -1161,7 +1163,7 @@ class CType(PyrexType):
def
error_condition
(
self
,
result_code
):
def
error_condition
(
self
,
result_code
):
conds
=
[]
conds
=
[]
if
self
.
is_string
:
if
self
.
is_string
or
self
.
is_unicode
:
conds
.
append
(
"(!%s)"
%
result_code
)
conds
.
append
(
"(!%s)"
%
result_code
)
elif
self
.
exception_value
is
not
None
:
elif
self
.
exception_value
is
not
None
:
conds
.
append
(
"(%s == (%s)%s)"
%
(
result_code
,
self
.
sign_and_name
(),
self
.
exception_value
))
conds
.
append
(
"(%s == (%s)%s)"
%
(
result_code
,
self
.
sign_and_name
(),
self
.
exception_value
))
...
@@ -2178,6 +2180,9 @@ class CPointerBaseType(CType):
...
@@ -2178,6 +2180,9 @@ class CPointerBaseType(CType):
if
base_type
.
same_as
(
char_type
):
if
base_type
.
same_as
(
char_type
):
self
.
is_string
=
1
self
.
is_string
=
1
break
break
else
:
if
base_type
.
same_as
(
c_py_unicode_type
):
self
.
is_unicode
=
1
if
self
.
is_string
and
not
base_type
.
is_error
:
if
self
.
is_string
and
not
base_type
.
is_error
:
if
base_type
.
signed
:
if
base_type
.
signed
:
...
@@ -2189,10 +2194,17 @@ class CPointerBaseType(CType):
...
@@ -2189,10 +2194,17 @@ class CPointerBaseType(CType):
if
self
.
is_ptr
:
if
self
.
is_ptr
:
self
.
from_py_function
=
"__Pyx_PyObject_AsUString"
self
.
from_py_function
=
"__Pyx_PyObject_AsUString"
self
.
exception_value
=
"NULL"
self
.
exception_value
=
"NULL"
elif
self
.
is_unicode
and
not
base_type
.
is_error
:
self
.
to_py_function
=
"__Pyx_PyUnicode_FromUnicode"
if
self
.
is_ptr
:
self
.
from_py_function
=
"__Pyx_PyUnicode_AsUnicode"
self
.
exception_value
=
"NULL"
def
py_type_name
(
self
):
def
py_type_name
(
self
):
if
self
.
is_string
:
if
self
.
is_string
:
return
"bytes"
return
"bytes"
elif
self
.
is_unicode
:
return
"unicode"
else
:
else
:
return
super
(
CPointerBaseType
,
self
).
py_type_name
()
return
super
(
CPointerBaseType
,
self
).
py_type_name
()
...
...
Cython/Compiler/StringEncoding.py
View file @
a664239a
...
@@ -4,6 +4,7 @@
...
@@ -4,6 +4,7 @@
import
re
import
re
import
sys
import
sys
import
array
if
sys
.
version_info
[
0
]
>=
3
:
if
sys
.
version_info
[
0
]
>=
3
:
_unicode
,
_str
,
_bytes
=
str
,
str
,
bytes
_unicode
,
_str
,
_bytes
=
str
,
str
,
bytes
...
@@ -262,3 +263,14 @@ def split_string_literal(s, limit=2000):
...
@@ -262,3 +263,14 @@ def split_string_literal(s, limit=2000):
chunks
.
append
(
s
[
start
:
end
])
chunks
.
append
(
s
[
start
:
end
])
start
=
end
start
=
end
return
'""'
.
join
(
chunks
)
return
'""'
.
join
(
chunks
)
def
encode_py_unicode_string
(
s
):
"""Create Py_UNICODE[] representation of a given unicode string.
"""
# Non-BMP characters will appear as surrogates, which is not compatible with
# wide (UTF-32) Python builds. UnicodeNode will warn the user about this.
a
=
array
.
array
(
'H'
,
s
.
encode
(
'UTF-16'
))
a
.
pop
(
0
)
# Remove BOM
a
.
append
(
0
)
# Add NULL terminator
return
u","
.
join
(
map
(
unicode
,
a
))
Cython/Utility/StringTools.c
View file @
a664239a
...
@@ -604,3 +604,17 @@ static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t i
...
@@ -604,3 +604,17 @@ static CYTHON_INLINE char __Pyx_PyBytes_GetItemInt(PyObject* bytes, Py_ssize_t i
index
+=
PyBytes_GET_SIZE
(
bytes
);
index
+=
PyBytes_GET_SIZE
(
bytes
);
return
PyBytes_AS_STRING
(
bytes
)[
index
];
return
PyBytes_AS_STRING
(
bytes
)[
index
];
}
}
/////////////// py_unicode_strlen.proto ///////////////
#if PY_VERSION_HEX < 0x03000000
static
CYTHON_INLINE
size_t
__Pyx_Py_UNICODE_strlen
(
const
Py_UNICODE
*
u
)
{
const
Py_UNICODE
*
u_end
=
u
;
while
(
*
u_end
++
)
;
return
u_end
-
u
-
1
;
}
#else
#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
#endif
Cython/Utility/TypeConversion.c
View file @
a664239a
/////////////// TypeConversions.proto ///////////////
/////////////// TypeConversions.proto ///////////////
// @requires: py_unicode_strlen
/* Type Conversion Predeclarations */
/* Type Conversion Predeclarations */
...
@@ -24,6 +25,14 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
...
@@ -24,6 +25,14 @@ static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s)
#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s)
#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
#if CYTHON_PEP393_ENABLED
#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
#else
#define __Pyx_PyUnicode_AsUnicode PyUnicode_AS_UNICODE
#endif
#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
static
CYTHON_INLINE
int
__Pyx_PyObject_IsTrue
(
PyObject
*
);
static
CYTHON_INLINE
int
__Pyx_PyObject_IsTrue
(
PyObject
*
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment