Commit e2dcfb58 authored by Stefan Behnel's avatar Stefan Behnel Committed by GitHub

Fix fix unicode normalisation test in Windows. (GH-3194)

* Rewrite the test code generation for the unicode normalisation test, making sure that we always write UTF-8 source files.

* Fix failures to print the compilation status for modules with non-ascii names (on Windows).

* Help with remote debugging environment encoding problems in test runs, by extending the output of the "buildenv" pseudo-test.

* Explicitly set I/O encoding for subprocesses in parallel cythonize() runs to make the test runner workers inherit it (instead of defaulting to ASCII in Py2).

* Use a Latin-1 Unicode character in the test for Unicode module names to make it more compatible with Windows file system character sets (such as CP-1252).

* Properly decode source and module file name from the FS encoding in Py2. Previously, with ASCII module names, UTF-8 decoding always worked, but wasn't correct.

* Hack around a distutils 3.[5678] bug on Windows for unicode module names.
https://bugs.python.org/issue39432

* Try to fix cython.inline() on Windows with Py3.8+ where the DLL loading requires an explicit registration of the extension output directory.
Closes GH-3450.
parent 66a8a8e8
......@@ -539,7 +539,7 @@ class DependencyTree(object):
all.add(include_path)
all.update(self.included_files(include_path))
elif not self.quiet:
print("Unable to locate '%s' referenced from '%s'" % (filename, include))
print(u"Unable to locate '%s' referenced from '%s'" % (filename, include))
return all
@cached_method
......@@ -797,9 +797,9 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet=
if cython_sources:
filepattern = cython_sources[0]
if len(cython_sources) > 1:
print("Warning: Multiple cython sources found for extension '%s': %s\n"
"See https://cython.readthedocs.io/en/latest/src/userguide/sharing_declarations.html "
"for sharing declarations among Cython files." % (pattern.name, cython_sources))
print(u"Warning: Multiple cython sources found for extension '%s': %s\n"
u"See https://cython.readthedocs.io/en/latest/src/userguide/sharing_declarations.html "
u"for sharing declarations among Cython files." % (pattern.name, cython_sources))
else:
# ignore non-cython modules
module_list.append(pattern)
......@@ -873,7 +873,7 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet=
m.sources.remove(target_file)
except ValueError:
# never seen this in the wild, but probably better to warn about this unexpected case
print("Warning: Cython source file not found in sources list, adding %s" % file)
print(u"Warning: Cython source file not found in sources list, adding %s" % file)
m.sources.insert(0, file)
seen.add(name)
return module_list, module_metadata
......@@ -973,6 +973,9 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
exclude_failures=exclude_failures,
language=language,
aliases=aliases)
fix_windows_unicode_modules(module_list)
deps = create_dependency_tree(ctx, quiet=quiet)
build_dir = getattr(options, 'build_dir', None)
......@@ -1041,9 +1044,12 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
if force or c_timestamp < dep_timestamp:
if not quiet and not force:
if source == dep:
print("Compiling %s because it changed." % source)
print(u"Compiling %s because it changed." % Utils.decode_filename(source))
else:
print("Compiling %s because it depends on %s." % (source, dep))
print(u"Compiling %s because it depends on %s." % (
Utils.decode_filename(source),
Utils.decode_filename(dep),
))
if not force and options.cache:
fingerprint = deps.transitive_fingerprint(source, m, options)
else:
......@@ -1114,7 +1120,7 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
if failed_modules:
for module in failed_modules:
module_list.remove(module)
print("Failed compilations: %s" % ', '.join(sorted([
print(u"Failed compilations: %s" % ', '.join(sorted([
module.name for module in failed_modules])))
if options.cache:
......@@ -1125,6 +1131,41 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
return module_list
def fix_windows_unicode_modules(module_list):
# Hack around a distutils 3.[5678] bug on Windows for unicode module names.
# https://bugs.python.org/issue39432
if sys.platform != "win32":
return
if sys.version_info < (3, 5) or sys.version_info >= (3, 8, 2):
return
def make_filtered_list(ignored_symbol, old_entries):
class FilteredExportSymbols(list):
# export_symbols for unicode filename cause link errors on Windows
# Cython doesn't need them (it already defines PyInit with the correct linkage)
# so use this class as a temporary fix to stop them from being generated
def __contains__(self, val):
# so distutils doesn't "helpfully" add PyInit_<name>
return val == ignored_symbol or list.__contains__(self, val)
filtered_list = FilteredExportSymbols(old_entries)
if old_entries:
filtered_list.extend(name for name in old_entries if name != ignored_symbol)
return filtered_list
for m in module_list:
# TODO: use m.name.isascii() in Py3.7+
try:
m.name.encode("ascii")
continue
except UnicodeEncodeError:
pass
m.export_symbols = make_filtered_list(
"PyInit_" + m.name.rsplit(".", 1)[-1],
m.export_symbols,
)
if os.environ.get('XML_RESULTS'):
compile_result_dir = os.environ['XML_RESULTS']
def record_results(func):
......@@ -1180,7 +1221,7 @@ def cythonize_one(pyx_file, c_file, fingerprint, quiet, options=None,
zip_fingerprint_file = fingerprint_file_base + '.zip'
if os.path.exists(gz_fingerprint_file) or os.path.exists(zip_fingerprint_file):
if not quiet:
print("%sFound compiled %s in cache" % (progress, pyx_file))
print(u"%sFound compiled %s in cache" % (progress, pyx_file))
if os.path.exists(gz_fingerprint_file):
os.utime(gz_fingerprint_file, None)
with contextlib.closing(gzip_open(gz_fingerprint_file, 'rb')) as g:
......@@ -1194,7 +1235,7 @@ def cythonize_one(pyx_file, c_file, fingerprint, quiet, options=None,
z.extract(artifact, os.path.join(dirname, artifact))
return
if not quiet:
print("%sCythonizing %s" % (progress, pyx_file))
print(u"%sCythonizing %s" % (progress, Utils.decode_filename(pyx_file)))
if options is None:
options = CompilationOptions(default_options)
options.output_file = c_file
......
......@@ -264,6 +264,15 @@ def __invoke(%(params)s):
build_extension.build_lib = lib_dir
build_extension.run()
# On Windows, we need to add the library output directory to the DLL load path (Py3.8+).
# https://github.com/cython/cython/issues/3450
try:
add_dll_directory = os.add_dll_directory
except AttributeError:
pass
else:
add_dll_directory(os.path.dirname(module_path))
module = load_dynamic(module_name, module_path)
_cython_inline_cache[orig_code, arg_sigs, key_hash] = module.__invoke
......
......@@ -450,9 +450,9 @@ def run_pipeline(source, options, full_module_name=None, context=None):
# ensure that the inputs are unicode (for Python 2)
if sys.version_info[0] == 2:
source = source.decode(sys.getfilesystemencoding())
source = Utils.decode_filename(source)
if full_module_name:
full_module_name = full_module_name.decode("utf-8")
full_module_name = Utils.decode_filename(full_module_name)
source_ext = os.path.splitext(source)[1]
options.configure_language_defaults(source_ext[1:]) # py/pyx
......
......@@ -5,6 +5,7 @@ import unittest
import shlex
import sys
import tempfile
from io import open
from .Compiler import Errors
from .CodeWriter import CodeWriter
......@@ -196,23 +197,23 @@ def unpack_source_tree(tree_file, workdir, cython_root):
if workdir is None:
workdir = tempfile.mkdtemp()
header, cur_file = [], None
with open(tree_file) as f:
with open(tree_file, 'rb') as f:
try:
for line in f:
if line.startswith('#####'):
filename = line.strip().strip('#').strip().replace('/', os.path.sep)
if line[:5] == b'#####':
filename = line.strip().strip(b'#').strip().decode('utf8').replace('/', os.path.sep)
path = os.path.join(workdir, filename)
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
if cur_file is not None:
to_close, cur_file = cur_file, None
to_close.close()
cur_file = open(path, 'w')
cur_file = open(path, 'wb')
elif cur_file is not None:
cur_file.write(line)
elif line.strip() and not line.lstrip().startswith('#'):
if line.strip() not in ('"""', "'''"):
command = shlex.split(line)
elif line.strip() and not line.lstrip().startswith(b'#'):
if line.strip() not in (b'"""', b"'''"):
command = shlex.split(line.decode('utf8'))
if not command: continue
# In Python 3: prog, *args = command
prog, args = command[0], command[1:]
......
......@@ -13,10 +13,12 @@ environment:
- PYTHON: "C:\\Python27"
PYTHON_VERSION: "2.7"
PYTHON_ARCH: "32"
PYTHONIOENCODING: "utf-8"
- PYTHON: "C:\\Python27-x64"
PYTHON_VERSION: "2.7"
PYTHON_ARCH: "64"
PYTHONIOENCODING: "utf-8"
- PYTHON: "C:\\Python38"
PYTHON_VERSION: "3.8"
......@@ -67,6 +69,7 @@ environment:
PYTHON_VERSION: "2.7"
PYTHON_ARCH: "64"
BACKEND: cpp
PYTHONIOENCODING: "utf-8"
clone_depth: 5
......@@ -92,6 +95,7 @@ build_script:
test: off
test_script:
- "%PYTHON%\\Scripts\\pip.exe install -r test-requirements.txt"
- "%PYTHON%\\Scripts\\pip.exe install win_unicode_console"
- "set CFLAGS=/Od /W3"
- "%WITH_ENV% %PYTHON%\\python.exe runtests.py -vv --backend=%BACKEND% --no-code-style -j5"
......
......@@ -2197,6 +2197,9 @@ def main():
else:
keep_alive_interval = None
if options.shard_count > 1 and options.shard_num == -1:
if "PYTHONIOENCODING" not in os.environ:
# Make sure subprocesses can print() Unicode text.
os.environ["PYTHONIOENCODING"] = sys.stdout.encoding or sys.getdefaultencoding()
import multiprocessing
pool = multiprocessing.Pool(options.shard_count)
tasks = [(options, cmd_args, shard_num) for shard_num in range(options.shard_count)]
......@@ -2340,6 +2343,14 @@ def runtests(options, cmd_args, coverage=None):
else:
faulthandler.enable()
if sys.platform == "win32" and sys.version_info < (3, 6):
# enable Unicode console output, if possible
try:
import win_unicode_console
except ImportError:
pass
else:
win_unicode_console.enable()
WITH_CYTHON = options.with_cython
ROOTDIR = os.path.abspath(options.root_dir)
......
......@@ -130,4 +130,11 @@ CFLAGS (distutils) = {config_var('CFLAGS')}
CFLAGS (env) = {get_env('CFLAGS', '')}
LINKCC (distutils) = {config_var('LINKCC')}
LINKCC (env) = {get_env('LINKCC', '')}
Encodings:
LANG (env) = {get_env('LANG', '')}
PYTHONIOENCODING (env) = {get_env('PYTHONIOENCODING', '')}
sys stdout encoding = {sys.stdout.encoding}
sys default encoding = {sys.getdefaultencoding()}
sys FS encoding = {sys.getfilesystemencoding()}
""")
......@@ -72,10 +72,12 @@ def test():
return {1}
"""]
for idx in range(len(example_code)):
with open("test{0}.py".format(idx),"w") as f:
if sys.version_info[0] > 2:
from io import open
for idx, (code, strings) in enumerate(zip(example_code, string_pairs)):
with open("test{0}.py".format(idx), "w", encoding="utf8") as f:
code = code.format(*strings)
f.write("# -*- coding: utf-8 -*-\n")
f.write(example_code[idx].format(*string_pairs[idx]))
else:
f.write("\n") # code isn't Python 2 compatible - write a dummy file
# The code isn't Py2 compatible. Only write actual code in Py3+.
if sys.version_info[0] > 2:
f.write(code)
......@@ -7,8 +7,7 @@ PYTHON -m mydoctest
########### mydoctest.py #######
import sys
if (sys.version_info[0] < 3 or
(sys.version_info[0] == 3 and sys.version_info[1] < 5)):
if sys.version_info < (3, 5):
# The module is only Cythonized and not build for these versions
# so don't run the tests
exit()
......@@ -28,9 +27,19 @@ exit(val)
from __future__ import unicode_literals
import sys
# enable Unicode console output, if possible
if sys.platform == "win32" and sys.version_info < (3, 6):
try:
import win_unicode_console
except ImportError:
pass
else:
win_unicode_console.enable()
from Cython.Build import cythonize
files = ["mymoδ.pyx", "from_cy.pyx"]
files = ["mymoð.pyx", "from_cy.pyx"]
# For Python 2 and Python <= 3.4 just run pyx->c;
......@@ -44,7 +53,7 @@ if sys.version_info >= (3, 5):
ext_modules = modules
)
############ mymoδ.pyx #########
############ mymoð.pyx #########
def f():
return True
......@@ -52,7 +61,7 @@ def f():
cdef public api void cdef_func():
pass
############ pxd_moδ.pxd ##########
############ pxd_moð.pxd ##########
cdef struct S:
int x
......@@ -63,11 +72,11 @@ cdef public api void cdef_func() # just to test generation of headers
# -*- coding: utf-8 -*-
import mymoδ
from mymoδ import f
import mymoð
from mymoð import f
__doc__ = """
>>> mymoδ.f()
>>> mymoð.f()
True
>>> f()
True
......@@ -77,12 +86,12 @@ True
# -*- coding: utf-8 -*-
import mymoδ
import mymoð
from mymoδ import f
from mymoð import f
cimport pxd_moδ
from pxd_moδ cimport S
cimport pxd_moð
from pxd_moð cimport S
def test_imported():
......@@ -90,14 +99,15 @@ def test_imported():
>>> test_imported()
True
"""
return mymoδ.f() and f() # True and True
return mymoð.f() and f() # True and True
def test_cimported():
"""
>>> test_cimported()
3
"""
cdef pxd_moδ.S v1
cdef pxd_moð.S v1
v1.x = 1
cdef S v2
v2.x = 2
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment