Commit 074362b4 authored by da-woods's avatar da-woods Committed by Stefan Behnel

unicode imports (#3119)

* Handle normalization of unicode identifiers
* Support unicode characters in module names
  (Only valid under Python 3)
parent 7e233ab0
......@@ -783,6 +783,8 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet=
create_extension = ctx.options.create_extension or default_create_extension
for pattern in patterns:
if not isinstance(pattern, (Extension_distutils, Extension_setuptools)):
pattern = encode_filename_in_py2(pattern)
if isinstance(pattern, str):
filepattern = pattern
template = Extension(pattern, []) # Fake Extension without sources
......
......@@ -32,8 +32,23 @@ from .. import Utils
from . import Options
from .Options import CompilationOptions, default_options
from .CmdLine import parse_command_line
from .Lexicon import (unicode_start_ch_any, unicode_continuation_ch_any,
unicode_start_ch_range, unicode_continuation_ch_range)
def _make_range_re(chrs):
out = []
for i in range(0, len(chrs), 2):
out.append(u"{0}-{1}".format(chrs[i], chrs[i+1]))
return u"".join(out)
# py2 version looked like r"[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*$"
module_name_pattern = u"[{0}{1}][{0}{2}{1}{3}]*".format(
unicode_start_ch_any, _make_range_re(unicode_start_ch_range),
unicode_continuation_ch_any,
_make_range_re(unicode_continuation_ch_range))
module_name_pattern = re.compile(u"{0}(\\.{0})*$".format(module_name_pattern))
module_name_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*$")
standard_include_path = os.path.abspath(
os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Includes'))
......@@ -158,7 +173,7 @@ class Context(object):
if not module_name_pattern.match(qualified_name):
raise CompileError(pos or (module_name, 0, 0),
"'%s' is not a valid module name" % module_name)
u"'%s' is not a valid module name" % module_name)
if relative_to:
if debug_find_module:
......@@ -433,6 +448,12 @@ def create_default_resultobj(compilation_source, options):
def run_pipeline(source, options, full_module_name=None, context=None):
from . import Pipeline
# ensure that the inputs are unicode (for Python 2)
if sys.version_info[0] == 2:
source = source.decode(sys.getfilesystemencoding())
if full_module_name:
full_module_name = full_module_name.decode("utf-8")
source_ext = os.path.splitext(source)[1]
options.configure_language_defaults(source_ext[1:]) # py/pyx
if context is None:
......@@ -442,6 +463,7 @@ def run_pipeline(source, options, full_module_name=None, context=None):
cwd = os.getcwd()
abs_path = os.path.abspath(source)
full_module_name = full_module_name or context.extract_module_name(source, options)
full_module_name = EncodedString(full_module_name)
Utils.raise_error_if_module_name_forbidden(full_module_name)
......@@ -611,7 +633,6 @@ def search_include_directories(dirs, qualified_name, suffix, pos, include=False)
The 'include' option will disable package dereferencing.
"""
if pos:
file_desc = pos[0]
if not isinstance(file_desc, FileSourceDescriptor):
......@@ -662,7 +683,6 @@ def search_include_directories(dirs, qualified_name, suffix, pos, include=False)
# search for namespaces second - PEP420
for package_dir in namespace_dirs:
# matches modules of the form: <dir>/foo/bar.pxd
path = os.path.join(package_dir, module_filename)
if os.path.exists(path):
......
This diff is collapsed.
......@@ -59,7 +59,7 @@ convert_func_prefix = pyrex_prefix + "convert_"
closure_scope_prefix = pyrex_prefix + "scope_"
closure_class_prefix = pyrex_prefix + "scope_struct_"
lambda_func_prefix = pyrex_prefix + "lambda_"
module_is_main = pyrex_prefix + "module_is_main_"
module_is_main = pyrex_prefix + "module_is_main"
defaults_struct_prefix = pyrex_prefix + "defaults"
dynamic_args_cname = pyrex_prefix + "dynamic_args"
......@@ -163,8 +163,11 @@ exc_vars = (exc_type_name, exc_value_name, exc_tb_name)
api_name = pyrex_prefix + "capi__"
h_guard_prefix = "__PYX_HAVE__"
api_guard_prefix = "__PYX_HAVE_API__"
# the h and api guards get changed to:
# __PYX_HAVE__FILENAME (for ascii filenames)
# __PYX_HAVE_U_PUNYCODEFILENAME (for non-ascii filenames)
h_guard_prefix = "__PYX_HAVE_"
api_guard_prefix = "__PYX_HAVE_API_"
api_func_guard = "__PYX_HAVE_API_FUNC_"
PYX_NAN = "__PYX_NAN()"
......
......@@ -146,6 +146,16 @@ class EncodedString(_unicode):
s = bytes_literal(self.byteencode(), self.encoding)
return s.as_c_string_literal()
if not hasattr(_unicode, "isascii"):
def isascii(self):
# not defined for Python3.7+ since the class already has it
try:
self.encode("ascii")
except UnicodeEncodeError:
return False
else:
return True
def string_contains_surrogates(ustring):
"""
......@@ -191,6 +201,11 @@ class BytesLiteral(_bytes):
value = split_string_literal(escape_byte_string(self))
return '"%s"' % value
if not hasattr(_bytes, "isascii"):
def isascii(self):
# already defined for Python3.7+
return True
def bytes_literal(s, encoding):
assert isinstance(s, bytes)
......@@ -206,6 +221,12 @@ def encoded_string(s, encoding):
s.encoding = encoding
return s
def encoded_string_or_bytes_literal(s, encoding):
if isinstance(s, bytes):
return bytes_literal(s, encoding)
else:
return encoded_string(s, encoding)
char_from_escape_sequence = {
r'\a' : u'\a',
......
......@@ -39,6 +39,7 @@ try:
from cStringIO import StringIO
except ImportError:
from io import StringIO
import sys
class StringIOTree(object):
......
......@@ -18,7 +18,7 @@ cdef {{struct_type}} {{funcname}}(obj) except *:
value = obj['{{member.name}}']
except KeyError:
raise ValueError("No value specified for struct attribute '{{member.name}}'")
result.{{member.cname}} = value
result.{{member.name}} = value
{{endfor}}
return result
......
......@@ -167,3 +167,19 @@ def test_nested_obj_to_struct(NestedStruct nested):
nested.mystruct.s.decode('UTF-8'),
nested.d)
cdef struct OverriddenCname:
int x "not_x"
def test_obj_to_struct_cnames(OverriddenCname s):
"""
>>> test_obj_to_struct_cnames({ 'x': 1 })
1
"""
print(s.x)
def test_struct_to_obj_cnames():
"""
>>> test_struct_to_obj_cnames()
{'x': 2}
"""
return OverriddenCname(2)
......@@ -51,10 +51,15 @@ if sys.version_info[0]>2:
'NormalClassΓΓ.εxciting_function.<locals>.nestεd'
Do kwargs work?
>>> unicode_kwarg(αrg=5)
>>> unicode_kwarg(αrγ=5)
5
>>> unicode_kwarg_from_cy()
1
Normalization of attributes
(The cdef class version is testable in Python 2 too)
>>> NormalizeAttrPy().get()
5
"""
else:
__doc__ = ""
......@@ -169,8 +174,8 @@ cdef class Derived(Γναμε2):
cdef Γναμε2 global_ναμε3 = Γναμε2()
def function_taking_fancy_argument(Γναμε2 αrg):
return αrg
def function_taking_fancy_argument(Γναμε2 αrγ):
return αrγ
class NormalClassΓΓ(Γναμε2):
"""
......@@ -190,19 +195,23 @@ class NormalClassΓΓ(Γναμε2):
pass
return nestεd
def unicode_kwarg(*,αrg):
return αrg
def unicode_kwarg(*, αrγ):
return αrγ
def unicode_kwarg_from_cy():
return unicode_kwarg(αrg=1)
return unicode_kwarg(αrγ=1)
cdef class NormalizeAttrCdef:
class NormalizeAttrPy:
"""Python normalizes identifier names before they are used;
therefore and fi should access the same attribute.
A more comprehensive version of this is in "unicode_identifiers_normalize.py"
comparing the behaviour to Python. The version here shows it
behaves the same in a cdef class and is tested with Python 2
therefore and fi should access the same attribute"""
def __init__(self):
self.fi = 5 # note unicode ligature symbol
def get(self):
return self.fi
cdef class NormalizeAttrCdef:
"""Python normalizes identifier names before they are used;
therefore and fi should access the same attribute
>>> NormalizeAttrCdef().get()
5
"""
......
# -*- coding: utf-8 -*-
# tag: py3, pep489
PYTHON setup.py build_ext --inplace
PYTHON -m mydoctest
########### mydoctest.py #######
import sys
if (sys.version_info[0] < 3 or
(sys.version_info[0] == 3 and sys.version_info[1] < 5)):
# The module is only Cythonized and not build for these versions
# so don't run the tests
exit()
import doctest
import from_py
val = doctest.testmod(from_py)[0]
import from_cy
val += doctest.testmod(from_cy)[0]
exit(val)
########### setup.py ########
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import sys
from Cython.Build import cythonize
files = ["mymoδ.pyx", "from_cy.pyx"]
# For Python 2 and Python <= 3.4 just run pyx->c;
# don't compile the C file
modules = cythonize(files)
if sys.version_info >= (3, 5):
from distutils.core import setup
setup(
ext_modules = modules
)
############ mymoδ.pyx #########
def f():
return True
cdef public api void cdef_func():
pass
############ pxd_moδ.pxd ##########
cdef struct S:
int x
cdef public api void cdef_func() # just to test generation of headers
############ from_py.py #########
# -*- coding: utf-8 -*-
import mymoδ
from mymoδ import f
__doc__ = """
>>> mymoδ.f()
True
>>> f()
True
"""
######### from_cy.pyx ##########
# -*- coding: utf-8 -*-
import mymoδ
from mymoδ import f
cimport pxd_moδ
from pxd_moδ cimport S
def test_imported():
"""
>>> test_imported()
True
"""
return mymoδ.f() and f() # True and True
def test_cimported():
"""
>>> test_cimported()
3
"""
cdef pxd_moδ.S v1
v1.x = 1
cdef S v2
v2.x = 2
return v1.x + v2.x
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment