Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
2944d8c3
Commit
2944d8c3
authored
Nov 12, 2012
by
Victor Stinner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #16218, #16444: Backport improvment on tests for non-ASCII characters
parent
f76e5ed4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
105 additions
and
10 deletions
+105
-10
Lib/test/support.py
Lib/test/support.py
+75
-0
Lib/test/test_cmd_line.py
Lib/test/test_cmd_line.py
+3
-3
Lib/test/test_cmd_line_script.py
Lib/test/test_cmd_line_script.py
+23
-7
Lib/test/test_os.py
Lib/test/test_os.py
+4
-0
No files found.
Lib/test/support.py
View file @
2944d8c3
...
...
@@ -603,6 +603,49 @@ else:
# module name.
TESTFN
=
"{}_{}_tmp"
.
format
(
TESTFN
,
os
.
getpid
())
# FS_NONASCII: non-ASCII character encodable by os.fsencode(),
# or None if there is no such character.
FS_NONASCII
=
None
for
character
in
(
# First try printable and common characters to have a readable filename.
# For each character, the encoding list are just example of encodings able
# to encode the character (the list is not exhaustive).
# U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1
'
\
u00E6
'
,
# U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3
'
\
u0130
'
,
# U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257
'
\
u0141
'
,
# U+03C6 (Greek Small Letter Phi): cp1253
'
\
u03C6
'
,
# U+041A (Cyrillic Capital Letter Ka): cp1251
'
\
u041A
'
,
# U+05D0 (Hebrew Letter Alef): Encodable to cp424
'
\
u05D0
'
,
# U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic
'
\
u060C
'
,
# U+062A (Arabic Letter Teh): cp720
'
\
u062A
'
,
# U+0E01 (Thai Character Ko Kai): cp874
'
\
u0E01
'
,
# Then try more "special" characters. "special" because they may be
# interpreted or displayed differently depending on the exact locale
# encoding and the font.
# U+00A0 (No-Break Space)
'
\
u00A0
'
,
# U+20AC (Euro Sign)
'
\
u20AC
'
,
):
try
:
os
.
fsdecode
(
os
.
fsencode
(
character
))
except
UnicodeError
:
pass
else
:
FS_NONASCII
=
character
break
# TESTFN_UNICODE is a non-ascii filename
TESTFN_UNICODE
=
TESTFN
+
"-
\
xe0
\
xf2
\
u0258
\
u0141
\
u011f
"
...
...
@@ -647,6 +690,38 @@ elif sys.platform != 'darwin':
# the byte 0xff. Skip some unicode filename tests.
pass
# TESTFN_UNDECODABLE is a filename (bytes type) that should *not* be able to be
# decoded from the filesystem encoding (in strict mode). It can be None if we
# cannot generate such filename (ex: the latin1 encoding can decode any byte
# sequence). On UNIX, TESTFN_UNDECODABLE can be decoded by os.fsdecode() thanks
# to the surrogateescape error handler (PEP 383), but not from the filesystem
# encoding in strict mode.
TESTFN_UNDECODABLE
=
None
for
name
in
(
# b'\xff' is not decodable by os.fsdecode() with code page 932. Windows
# accepts it to create a file or a directory, or don't accept to enter to
# such directory (when the bytes name is used). So test b'\xe7' first: it is
# not decodable from cp932.
b'
\
xe7
w
\
xf0
'
,
# undecodable from ASCII, UTF-8
b'
\
xff
'
,
# undecodable from iso8859-3, iso8859-6, iso8859-7, cp424, iso8859-8, cp856
# and cp857
b'
\
xae
\
xd5
'
# undecodable from UTF-8 (UNIX and Mac OS X)
b'
\
xed
\
xb2
\
x80
'
,
b'
\
xed
\
xb4
\
x80
'
,
):
try
:
name
.
decode
(
TESTFN_ENCODING
)
except
UnicodeDecodeError
:
TESTFN_UNDECODABLE
=
os
.
fsencode
(
TESTFN
)
+
name
break
if
FS_NONASCII
:
TESTFN_NONASCII
=
TESTFN
+
'-'
+
FS_NONASCII
else
:
TESTFN_NONASCII
=
None
# Save the initial cwd
SAVEDCWD
=
os
.
getcwd
()
...
...
Lib/test/test_cmd_line.py
View file @
2944d8c3
...
...
@@ -93,15 +93,15 @@ class CmdLineTest(unittest.TestCase):
# All good if execution is successful
assert_python_ok
(
'-c'
,
'pass'
)
@
unittest
.
skipIf
(
sys
.
getfilesystemencoding
()
==
'ascii'
,
'need a filesystem encoding different than ASCII'
)
@
unittest
.
skipUnless
(
test
.
support
.
FS_NONASCII
,
'need support.FS_NONASCII'
)
def
test_non_ascii
(
self
):
# Test handling of non-ascii data
if
test
.
support
.
verbose
:
import
locale
print
(
'locale encoding = %s, filesystem encoding = %s'
%
(
locale
.
getpreferredencoding
(),
sys
.
getfilesystemencoding
()))
command
=
"assert(ord('
\
xe9
') == 0xe9)"
command
=
(
"assert(ord(%r) == %s)"
%
(
test
.
support
.
FS_NONASCII
,
ord
(
test
.
support
.
FS_NONASCII
)))
assert_python_ok
(
'-c'
,
command
)
# On Windows, pass bytes to subprocess doesn't test how Python decodes the
...
...
Lib/test/test_cmd_line_script.py
View file @
2944d8c3
...
...
@@ -363,14 +363,30 @@ class CmdLineTest(unittest.TestCase):
self
.
assertTrue
(
text
[
1
].
startswith
(
' File '
))
self
.
assertTrue
(
text
[
3
].
startswith
(
'NameError'
))
def
test_non_utf8
(
self
):
def
test_non_ascii
(
self
):
# Mac OS X denies the creation of a file with an invalid UTF-8 name.
# Windows allows to create a name with an arbitrary bytes name, but
# Python cannot a undecodable bytes argument to a subprocess.
#if (support.TESTFN_UNDECODABLE
#and sys.platform not in ('win32', 'darwin')):
# name = os.fsdecode(support.TESTFN_UNDECODABLE)
#elif support.TESTFN_NONASCII:
if
support
.
TESTFN_NONASCII
:
name
=
support
.
TESTFN_NONASCII
else
:
self
.
skipTest
(
"need support.TESTFN_NONASCII"
)
# Issue #16218
with
temp_dir
()
as
script_dir
:
script_name
=
_make_test_script
(
script_dir
,
'
\
udcf1
\
udcea
\
udcf0
\
udce8
\
udcef
\
udcf2
'
)
self
.
_check_script
(
script_name
,
script_name
,
script_name
,
script_dir
,
None
,
importlib
.
machinery
.
SourceFileLoader
)
source
=
'print(ascii(__file__))
\
n
'
script_name
=
_make_test_script
(
os
.
curdir
,
name
,
source
)
self
.
addCleanup
(
support
.
unlink
,
script_name
)
rc
,
stdout
,
stderr
=
assert_python_ok
(
script_name
)
self
.
assertEqual
(
ascii
(
script_name
),
stdout
.
rstrip
().
decode
(
'ascii'
),
'stdout=%r stderr=%r'
%
(
stdout
,
stderr
))
self
.
assertEqual
(
0
,
rc
)
def
test_main
():
support
.
run_unittest
(
CmdLineTest
)
...
...
Lib/test/test_os.py
View file @
2944d8c3
...
...
@@ -1243,6 +1243,8 @@ if sys.platform != 'win32':
def
setUp
(
self
):
if
support
.
TESTFN_UNENCODABLE
:
self
.
dir
=
support
.
TESTFN_UNENCODABLE
elif
support
.
TESTFN_NONASCII
:
self
.
dir
=
support
.
TESTFN_NONASCII
else
:
self
.
dir
=
support
.
TESTFN
self
.
bdir
=
os
.
fsencode
(
self
.
dir
)
...
...
@@ -1257,6 +1259,8 @@ if sys.platform != 'win32':
add_filename
(
support
.
TESTFN_UNICODE
)
if
support
.
TESTFN_UNENCODABLE
:
add_filename
(
support
.
TESTFN_UNENCODABLE
)
if
support
.
TESTFN_NONASCII
:
add_filename
(
support
.
TESTFN_NONASCII
)
if
not
bytesfn
:
self
.
skipTest
(
"couldn't create any non-ascii filename"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment