Commit 3d85a6fa authored by Victor Stinner's avatar Victor Stinner

Set TESTFN_UNENCODEABLE on non-Windows OSes

 * Use 0xff byte on non-Windows OSes
 * mbcs is now really strict by default: i closed the issue #850997, so use the
   filesystem encoding and not Latin-1
 * Rename TESTFN_UNICODE_UNENCODEABLE to TESTFN_UNENCODEABLE
parent 994addc4
...@@ -382,29 +382,38 @@ TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid()) ...@@ -382,29 +382,38 @@ TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid())
# file system encoding, but *not* with the default (ascii) encoding # file system encoding, but *not* with the default (ascii) encoding
TESTFN_UNICODE = TESTFN + "-\xe0\xf2" TESTFN_UNICODE = TESTFN + "-\xe0\xf2"
TESTFN_ENCODING = sys.getfilesystemencoding() TESTFN_ENCODING = sys.getfilesystemencoding()
# TESTFN_UNICODE_UNENCODEABLE is a filename that should *not* be
# able to be encoded by *either* the default or filesystem encoding. # TESTFN_UNENCODEABLE is a filename (str type) that should *not* be able to be
# This test really only makes sense on Windows NT platforms # encoded by the filesystem encoding (in strict mode). It can be None if we
# which have special Unicode support in posixmodule. # cannot generate such filename.
if (not hasattr(sys, "getwindowsversion") or if os.name in ('nt', 'ce'):
sys.getwindowsversion()[3] < 2): # 0=win32s or 1=9x/ME if sys.getwindowsversion().platform < 2:
TESTFN_UNICODE_UNENCODEABLE = None # win32s (0) or Windows 9x/ME (1)
TESTFN_UNENCODEABLE = None
else:
# Japanese characters (I think - from bug 846133)
TESTFN_UNENCODEABLE = TESTFN + "-\u5171\u6709\u3055\u308c\u308b"
try:
TESTFN_UNENCODEABLE.encode(TESTFN_ENCODING)
except UnicodeEncodeError:
pass
else:
print('WARNING: The filename %r CAN be encoded by the filesystem encoding (%s). '
'Unicode filename tests may not be effective'
% (TESTFN_UNENCODEABLE, TESTFN_ENCODING))
TESTFN_UNENCODEABLE = None
else: else:
# Japanese characters (I think - from bug 846133)
TESTFN_UNICODE_UNENCODEABLE = TESTFN + "-\u5171\u6709\u3055\u308c\u308b"
try: try:
# XXX - Note - should be using TESTFN_ENCODING here - but for # ascii and utf-8 cannot encode the byte 0xff
# Windows, "mbcs" currently always operates as if in b'\xff'.decode(TESTFN_ENCODING)
# errors=ignore' mode - hence we get '?' characters rather than except UnicodeDecodeError:
# the exception. 'Latin1' operates as we expect - ie, fails. # 0xff will be encoded using the surrogate character u+DCFF
# See [ 850997 ] mbcs encoding ignores errors TESTFN_UNENCODEABLE = TESTFN_UNICODE \
TESTFN_UNICODE_UNENCODEABLE.encode("Latin1") + b'-\xff'.decode(TESTFN_ENCODING, 'surrogateescape')
except UnicodeEncodeError:
pass
else: else:
print('WARNING: The filename %r CAN be encoded by the filesystem. ' # File system encoding (eg. ISO-8859-* encodings) can encode
'Unicode filename tests may not be effective' # the byte 0xff. Skip some unicode filename tests.
% TESTFN_UNICODE_UNENCODEABLE) TESTFN_UNENCODEABLE = None
# Save the initial cwd # Save the initial cwd
SAVEDCWD = os.getcwd() SAVEDCWD = os.getcwd()
......
...@@ -5,8 +5,9 @@ import os, glob, time, shutil ...@@ -5,8 +5,9 @@ import os, glob, time, shutil
import unicodedata import unicodedata
import unittest import unittest
from test.support import run_unittest, TESTFN_UNICODE, rmtree from test.support import (run_unittest, rmtree,
from test.support import TESTFN_ENCODING, TESTFN_UNICODE_UNENCODEABLE TESTFN_ENCODING, TESTFN_UNICODE, TESTFN_UNENCODEABLE)
try: try:
TESTFN_UNICODE.encode(TESTFN_ENCODING) TESTFN_UNICODE.encode(TESTFN_ENCODING)
except (UnicodeError, TypeError): except (UnicodeError, TypeError):
...@@ -146,8 +147,8 @@ class TestUnicodeFiles(unittest.TestCase): ...@@ -146,8 +147,8 @@ class TestUnicodeFiles(unittest.TestCase):
# _test functions with each of the filename combinations we wish to test # _test functions with each of the filename combinations we wish to test
def test_single_files(self): def test_single_files(self):
self._test_single(TESTFN_UNICODE) self._test_single(TESTFN_UNICODE)
if TESTFN_UNICODE_UNENCODEABLE is not None: if TESTFN_UNENCODEABLE is not None:
self._test_single(TESTFN_UNICODE_UNENCODEABLE) self._test_single(TESTFN_UNENCODEABLE)
def test_directories(self): def test_directories(self):
# For all 'equivalent' combinations: # For all 'equivalent' combinations:
...@@ -156,9 +157,9 @@ class TestUnicodeFiles(unittest.TestCase): ...@@ -156,9 +157,9 @@ class TestUnicodeFiles(unittest.TestCase):
ext = ".dir" ext = ".dir"
self._do_directory(TESTFN_UNICODE+ext, TESTFN_UNICODE+ext, False) self._do_directory(TESTFN_UNICODE+ext, TESTFN_UNICODE+ext, False)
# Our directory name that can't use a non-unicode name. # Our directory name that can't use a non-unicode name.
if TESTFN_UNICODE_UNENCODEABLE is not None: if TESTFN_UNENCODEABLE is not None:
self._do_directory(TESTFN_UNICODE_UNENCODEABLE+ext, self._do_directory(TESTFN_UNENCODEABLE+ext,
TESTFN_UNICODE_UNENCODEABLE+ext, TESTFN_UNENCODEABLE+ext,
False) False)
def test_main(): def test_main():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment