Set TESTFN_UNENCODEABLE on non-Windows OSes

* Use 0xff byte on non-Windows OSes * mbcs is now really strict by default: i closed the issue #850997, so use the filesystem encoding and not Latin-1 * Rename TESTFN_UNICODE_UNENCODEABLE to TESTFN_UNENCODEABLE

Set TESTFN_UNENCODEABLE on non-Windows OSes
* Use 0xff byte on non-Windows OSes * mbcs is now really strict by default: i closed the issue #850997, so use the filesystem encoding and not Latin-1 * Rename TESTFN_UNICODE_UNENCODEABLE to TESTFN_UNENCODEABLE
3d85a6fa · Victor Stinner · 994addc4 · 3d85a6fa · 3d85a6fa
Commit 3d85a6fa authored Aug 13, 2010 by Victor Stinner
Hide whitespace changes
Inline Side-by-side

Showing with 37 additions and 27 deletions

Lib/test/support.py Lib/test/support.py +29 -20

Lib/test/test_unicode_file.py Lib/test/test_unicode_file.py +8 -7

No files found.
--- a/Lib/test/support.py
+++ b/Lib/test/support.py
@@ -382,29 +382,38 @@ TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid())
 # file system encoding, but *not* with the default (ascii) encoding
 TESTFN_UNICODE = TESTFN + "-\xe0\xf2"
 TESTFN_ENCODING = sys.getfilesystemencoding()
-# TESTFN_UNICODE_UNENCODEABLE is a filename that should *not* be
-# able to be encoded by *either* the default or filesystem encoding.
+# TESTFN_UNENCODEABLE is a filename (str type) that should *not* be able to be
-# This test really only makes sense on Windows NT platforms
+# encoded by the filesystem encoding (in strict mode). It can be None if we
-# which have special Unicode support in posixmodule.
+# cannot generate such filename.
-if (not hasattr(sys, "getwindowsversion") or
+if os.name in ('nt', 'ce'):
-        sys.getwindowsversion()[3] < 2): #  0=win32s or 1=9x/ME
+    if sys.getwindowsversion().platform < 2:
-    TESTFN_UNICODE_UNENCODEABLE = None
+        # win32s (0) or Windows 9x/ME (1)
+        TESTFN_UNENCODEABLE = None
+    else:
+        # Japanese characters (I think - from bug 846133)
+        TESTFN_UNENCODEABLE = TESTFN + "-\u5171\u6709\u3055\u308c\u308b"
+        try:
+            TESTFN_UNENCODEABLE.encode(TESTFN_ENCODING)
+        except UnicodeEncodeError:
+            pass
+        else:
+            print('WARNING: The filename %r CAN be encoded by the filesystem encoding (%s). '
+                  'Unicode filename tests may not be effective'
+                  % (TESTFN_UNENCODEABLE, TESTFN_ENCODING))
+            TESTFN_UNENCODEABLE = None
 else:
-    # Japanese characters (I think - from bug 846133)
-    TESTFN_UNICODE_UNENCODEABLE = TESTFN + "-\u5171\u6709\u3055\u308c\u308b"
    try:
-        # XXX - Note - should be using TESTFN_ENCODING here - but for
+        # ascii and utf-8 cannot encode the byte 0xff
-        # Windows, "mbcs" currently always operates as if in
+        b'\xff'.decode(TESTFN_ENCODING)
-        # errors=ignore' mode - hence we get '?' characters rather than
+    except UnicodeDecodeError:
-        # the exception.  'Latin1' operates as we expect - ie, fails.
+        # 0xff will be encoded using the surrogate character u+DCFF
-        # See [ 850997 ] mbcs encoding ignores errors
+        TESTFN_UNENCODEABLE = TESTFN_UNICODE \
-        TESTFN_UNICODE_UNENCODEABLE.encode("Latin1")
+            + b'-\xff'.decode(TESTFN_ENCODING, 'surrogateescape')
-    except UnicodeEncodeError:
-        pass
    else:
-        print('WARNING: The filename %r CAN be encoded by the filesystem.  '
+        # File system encoding (eg. ISO-8859-* encodings) can encode
-              'Unicode filename tests may not be effective'
+        # the byte 0xff. Skip some unicode filename tests.
-              % TESTFN_UNICODE_UNENCODEABLE)
+        TESTFN_UNENCODEABLE = None
 # Save the initial cwd
 SAVEDCWD = os.getcwd()

--- a/Lib/test/test_unicode_file.py
+++ b/Lib/test/test_unicode_file.py
@@ -5,8 +5,9 @@ import os, glob, time, shutil
 import unicodedata
 import unittest
-from test.support import run_unittest, TESTFN_UNICODE, rmtree
+from test.support import (run_unittest, rmtree,
-from test.support import TESTFN_ENCODING, TESTFN_UNICODE_UNENCODEABLE
+    TESTFN_ENCODING, TESTFN_UNICODE, TESTFN_UNENCODEABLE)
 try:
    TESTFN_UNICODE.encode(TESTFN_ENCODING)
 except (UnicodeError, TypeError):
@@ -146,8 +147,8 @@ class TestUnicodeFiles(unittest.TestCase):
    # _test functions with each of the filename combinations we wish to test
    def test_single_files(self):
        self._test_single(TESTFN_UNICODE)
-        if TESTFN_UNICODE_UNENCODEABLE is not None:
+        if TESTFN_UNENCODEABLE is not None:
-            self._test_single(TESTFN_UNICODE_UNENCODEABLE)
+            self._test_single(TESTFN_UNENCODEABLE)
    def test_directories(self):
        # For all 'equivalent' combinations:
@@ -156,9 +157,9 @@ class TestUnicodeFiles(unittest.TestCase):
        ext = ".dir"
        self._do_directory(TESTFN_UNICODE+ext, TESTFN_UNICODE+ext, False)
        # Our directory name that can't use a non-unicode name.
-        if TESTFN_UNICODE_UNENCODEABLE is not None:
+        if TESTFN_UNENCODEABLE is not None:
-            self._do_directory(TESTFN_UNICODE_UNENCODEABLE+ext,
+            self._do_directory(TESTFN_UNENCODEABLE+ext,
-                               TESTFN_UNICODE_UNENCODEABLE+ext,
+                               TESTFN_UNENCODEABLE+ext,
                               False)
 def test_main():