Commit 2ac9d311 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #6815: os.path.expandvars() now supports non-ASCII Unicode environment

variables names and values.
parent c77d4ba8
......@@ -294,6 +294,13 @@ def expandvars(path):
return path
import string
varchars = string.ascii_letters + string.digits + '_-'
if isinstance(path, unicode):
encoding = sys.getfilesystemencoding()
def getenv(var):
return os.environ[var.encode(encoding)].decode(encoding)
else:
def getenv(var):
return os.environ[var]
res = ''
index = 0
pathlen = len(path)
......@@ -322,9 +329,9 @@ def expandvars(path):
index = pathlen - 1
else:
var = path[:index]
if var in os.environ:
res = res + os.environ[var]
else:
try:
res = res + getenv(var)
except KeyError:
res = res + '%' + var + '%'
elif c == '$': # variable or '$$'
if path[index + 1:index + 2] == '$':
......@@ -336,9 +343,9 @@ def expandvars(path):
try:
index = path.index('}')
var = path[:index]
if var in os.environ:
res = res + os.environ[var]
else:
try:
res = res + getenv(var)
except KeyError:
res = res + '${' + var + '}'
except ValueError:
res = res + '${' + path
......@@ -351,9 +358,9 @@ def expandvars(path):
var = var + c
index = index + 1
c = path[index:index + 1]
if var in os.environ:
res = res + os.environ[var]
else:
try:
res = res + getenv(var)
except KeyError:
res = res + '$' + var
if c != '':
index = index - 1
......
......@@ -285,28 +285,43 @@ def expanduser(path):
# Non-existent variables are left unchanged.
_varprog = None
_uvarprog = None
def expandvars(path):
"""Expand shell variables of form $var and ${var}. Unknown variables
are left unchanged."""
global _varprog
global _varprog, _uvarprog
if '$' not in path:
return path
if not _varprog:
import re
_varprog = re.compile(r'\$(\w+|\{[^}]*\})')
if isinstance(path, _unicode):
if not _varprog:
import re
_varprog = re.compile(r'\$(\w+|\{[^}]*\})')
varprog = _varprog
encoding = sys.getfilesystemencoding()
else:
if not _uvarprog:
import re
_uvarprog = re.compile(_unicode(r'\$(\w+|\{[^}]*\})'), re.UNICODE)
varprog = _uvarprog
encoding = None
i = 0
while True:
m = _varprog.search(path, i)
m = varprog.search(path, i)
if not m:
break
i, j = m.span(0)
name = m.group(1)
if name.startswith('{') and name.endswith('}'):
name = name[1:-1]
if encoding:
name = name.encode(encoding)
if name in os.environ:
tail = path[j:]
path = path[:i] + os.environ[name]
value = os.environ[name]
if encoding:
value = value.decode(encoding)
path = path[:i] + value
i = len(path)
path += tail
else:
......
......@@ -199,13 +199,40 @@ class CommonTest(GenericTest):
self.assertEqual(expandvars("$[foo]bar"), "$[foo]bar")
self.assertEqual(expandvars("$bar bar"), "$bar bar")
self.assertEqual(expandvars("$?bar"), "$?bar")
self.assertEqual(expandvars("${foo}bar"), "barbar")
self.assertEqual(expandvars("$foo}bar"), "bar}bar")
self.assertEqual(expandvars("${foo"), "${foo")
self.assertEqual(expandvars("${{foo}}"), "baz1}")
self.assertEqual(expandvars("$foo$foo"), "barbar")
self.assertEqual(expandvars("$bar$bar"), "$bar$bar")
@unittest.skipUnless(test_support.FS_NONASCII, 'need test_support.FS_NONASCII')
def test_expandvars_nonascii(self):
if self.pathmodule.__name__ == 'macpath':
self.skipTest('macpath.expandvars is a stub')
expandvars = self.pathmodule.expandvars
def check(value, expected):
self.assertEqual(expandvars(value), expected)
encoding = sys.getfilesystemencoding()
with test_support.EnvironmentVarGuard() as env:
env.clear()
unonascii = test_support.FS_NONASCII
snonascii = unonascii.encode(encoding)
env['spam'] = snonascii
env[snonascii] = 'ham' + snonascii
check(snonascii, snonascii)
check('$spam bar', '%s bar' % snonascii)
check('${spam}bar', '%sbar' % snonascii)
check('${%s}bar' % snonascii, 'ham%sbar' % snonascii)
check('$bar%s bar' % snonascii, '$bar%s bar' % snonascii)
check('$spam}bar', '%s}bar' % snonascii)
check(unonascii, unonascii)
check(u'$spam bar', u'%s bar' % unonascii)
check(u'${spam}bar', u'%sbar' % unonascii)
check(u'${%s}bar' % unonascii, u'ham%sbar' % unonascii)
check(u'$bar%s bar' % unonascii, u'$bar%s bar' % unonascii)
check(u'$spam}bar', u'%s}bar' % unonascii)
def test_abspath(self):
self.assertIn("foo", self.pathmodule.abspath("foo"))
......
import ntpath
import os
import sys
from test.test_support import TestFailed
from test import test_support, test_genericpath
import unittest
def tester0(fn, wantResult):
gotResult = eval(fn)
if wantResult != gotResult:
raise TestFailed, "%s should return: %r but returned: %r" \
%(fn, wantResult, gotResult)
def tester(fn, wantResult):
fn = fn.replace("\\", "\\\\")
gotResult = eval(fn)
if wantResult != gotResult:
raise TestFailed, "%s should return: %s but returned: %s" \
%(str(fn), str(wantResult), str(gotResult))
tester0(fn, wantResult)
class TestNtpath(unittest.TestCase):
......@@ -173,7 +176,6 @@ class TestNtpath(unittest.TestCase):
tester('ntpath.expandvars("$[foo]bar")', "$[foo]bar")
tester('ntpath.expandvars("$bar bar")', "$bar bar")
tester('ntpath.expandvars("$?bar")', "$?bar")
tester('ntpath.expandvars("${foo}bar")', "barbar")
tester('ntpath.expandvars("$foo}bar")', "bar}bar")
tester('ntpath.expandvars("${foo")', "${foo")
tester('ntpath.expandvars("${{foo}}")', "baz1}")
......@@ -187,6 +189,30 @@ class TestNtpath(unittest.TestCase):
tester('ntpath.expandvars("%foo%%bar")', "bar%bar")
tester('ntpath.expandvars("\'%foo%\'%bar")', "\'%foo%\'%bar")
@unittest.skipUnless(test_support.FS_NONASCII, 'need test_support.FS_NONASCII')
def test_expandvars_nonascii(self):
encoding = sys.getfilesystemencoding()
def check(value, expected):
tester0("ntpath.expandvars(%r)" % value, expected)
tester0("ntpath.expandvars(%r)" % value.decode(encoding),
expected.decode(encoding))
with test_support.EnvironmentVarGuard() as env:
env.clear()
unonascii = test_support.FS_NONASCII
snonascii = unonascii.encode(encoding)
env['spam'] = snonascii
env[snonascii] = 'ham' + snonascii
check('$spam bar', '%s bar' % snonascii)
check('$%s bar' % snonascii, '$%s bar' % snonascii)
check('${spam}bar', '%sbar' % snonascii)
check('${%s}bar' % snonascii, 'ham%sbar' % snonascii)
check('$spam}bar', '%s}bar' % snonascii)
check('$%s}bar' % snonascii, '$%s}bar' % snonascii)
check('%spam% bar', '%s bar' % snonascii)
check('%{}% bar'.format(snonascii), 'ham%s bar' % snonascii)
check('%spam%bar', '%sbar' % snonascii)
check('%{}%bar'.format(snonascii), 'ham%sbar' % snonascii)
def test_abspath(self):
# ntpath.abspath() can only be used on a system with the "nt" module
# (reasonably), so we protect this test with "import nt". This allows
......
......@@ -465,6 +465,52 @@ except NameError:
is_jython = sys.platform.startswith('java')
# FS_NONASCII: non-ASCII Unicode character encodable by
# sys.getfilesystemencoding(), or None if there is no such character.
FS_NONASCII = None
if have_unicode:
for character in (
# First try printable and common characters to have a readable filename.
# For each character, the encoding list are just example of encodings able
# to encode the character (the list is not exhaustive).
# U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1
unichr(0x00E6),
# U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3
unichr(0x0130),
# U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257
unichr(0x0141),
# U+03C6 (Greek Small Letter Phi): cp1253
unichr(0x03C6),
# U+041A (Cyrillic Capital Letter Ka): cp1251
unichr(0x041A),
# U+05D0 (Hebrew Letter Alef): Encodable to cp424
unichr(0x05D0),
# U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic
unichr(0x060C),
# U+062A (Arabic Letter Teh): cp720
unichr(0x062A),
# U+0E01 (Thai Character Ko Kai): cp874
unichr(0x0E01),
# Then try more "special" characters. "special" because they may be
# interpreted or displayed differently depending on the exact locale
# encoding and the font.
# U+00A0 (No-Break Space)
unichr(0x00A0),
# U+20AC (Euro Sign)
unichr(0x20AC),
):
try:
character.encode(sys.getfilesystemencoding())\
.decode(sys.getfilesystemencoding())
except UnicodeError:
pass
else:
FS_NONASCII = character
break
# Filename used for testing
if os.name == 'java':
# Jython disallows @ in module names
......
......@@ -39,6 +39,9 @@ Core and Builtins
Library
-------
- Issue #6815: os.path.expandvars() now supports non-ASCII Unicode environment
variables names and values.
- Issue #20635: Fixed grid_columnconfigure() and grid_rowconfigure() methods of
Tkinter widgets to work in wantobjects=True mode.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment