Commit d57e8768 authored by Amaury Forgeot d'Arc's avatar Amaury Forgeot d'Arc

Second part of #3187, for windows:

os and os.path functions now accept both unicode and byte strings for file names.

Reviewed by Guido.
parent 4ac18c23
...@@ -19,6 +19,7 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext", ...@@ -19,6 +19,7 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
"extsep","devnull","realpath","supports_unicode_filenames","relpath"] "extsep","devnull","realpath","supports_unicode_filenames","relpath"]
# strings representing various path-related bits and pieces # strings representing various path-related bits and pieces
# These are primarily for export; internally, they are hardcoded.
curdir = '.' curdir = '.'
pardir = '..' pardir = '..'
extsep = '.' extsep = '.'
...@@ -33,6 +34,36 @@ elif 'os2' in sys.builtin_module_names: ...@@ -33,6 +34,36 @@ elif 'os2' in sys.builtin_module_names:
altsep = '/' altsep = '/'
devnull = 'nul' devnull = 'nul'
def _get_sep(path):
if isinstance(path, bytes):
return b'\\'
else:
return '\\'
def _get_altsep(path):
if isinstance(path, bytes):
return b'/'
else:
return '/'
def _get_bothseps(path):
if isinstance(path, bytes):
return b'\\/'
else:
return '\\/'
def _get_dot(path):
if isinstance(path, bytes):
return b'.'
else:
return '.'
def _get_colon(path):
if isinstance(path, bytes):
return b':'
else:
return ':'
# Normalize the case of a pathname and map slashes to backslashes. # Normalize the case of a pathname and map slashes to backslashes.
# Other normalizations (such as optimizing '../' away) are not done # Other normalizations (such as optimizing '../' away) are not done
# (this is done by normpath). # (this is done by normpath).
...@@ -41,7 +72,7 @@ def normcase(s): ...@@ -41,7 +72,7 @@ def normcase(s):
"""Normalize case of pathname. """Normalize case of pathname.
Makes all characters lowercase and all slashes into backslashes.""" Makes all characters lowercase and all slashes into backslashes."""
return s.replace("/", "\\").lower() return s.replace(_get_altsep(s), _get_sep(s)).lower()
# Return whether a path is absolute. # Return whether a path is absolute.
...@@ -53,7 +84,7 @@ def normcase(s): ...@@ -53,7 +84,7 @@ def normcase(s):
def isabs(s): def isabs(s):
"""Test whether a path is absolute""" """Test whether a path is absolute"""
s = splitdrive(s)[1] s = splitdrive(s)[1]
return s != '' and s[:1] in '/\\' return len(s) > 0 and s[:1] in _get_bothseps(s)
# Join two (or more) paths. # Join two (or more) paths.
...@@ -62,10 +93,13 @@ def join(a, *p): ...@@ -62,10 +93,13 @@ def join(a, *p):
"""Join two or more pathname components, inserting "\\" as needed. """Join two or more pathname components, inserting "\\" as needed.
If any component is an absolute path, all previous path components If any component is an absolute path, all previous path components
will be discarded.""" will be discarded."""
sep = _get_sep(a)
seps = _get_bothseps(a)
colon = _get_colon(a)
path = a path = a
for b in p: for b in p:
b_wins = 0 # set to 1 iff b makes path irrelevant b_wins = 0 # set to 1 iff b makes path irrelevant
if path == "": if not path:
b_wins = 1 b_wins = 1
elif isabs(b): elif isabs(b):
...@@ -77,13 +111,13 @@ def join(a, *p): ...@@ -77,13 +111,13 @@ def join(a, *p):
# 3. join('c:/a', '/b') == '/b' # 3. join('c:/a', '/b') == '/b'
# 4. join('c:', 'd:/') = 'd:/' # 4. join('c:', 'd:/') = 'd:/'
# 5. join('c:/', 'd:/') = 'd:/' # 5. join('c:/', 'd:/') = 'd:/'
if path[1:2] != ":" or b[1:2] == ":": if path[1:2] != colon or b[1:2] == colon:
# Path doesn't start with a drive letter, or cases 4 and 5. # Path doesn't start with a drive letter, or cases 4 and 5.
b_wins = 1 b_wins = 1
# Else path has a drive letter, and b doesn't but is absolute. # Else path has a drive letter, and b doesn't but is absolute.
elif len(path) > 3 or (len(path) == 3 and elif len(path) > 3 or (len(path) == 3 and
path[-1] not in "/\\"): path[-1:] not in seps):
# case 3 # case 3
b_wins = 1 b_wins = 1
...@@ -92,24 +126,24 @@ def join(a, *p): ...@@ -92,24 +126,24 @@ def join(a, *p):
else: else:
# Join, and ensure there's a separator. # Join, and ensure there's a separator.
assert len(path) > 0 assert len(path) > 0
if path[-1] in "/\\": if path[-1:] in seps:
if b and b[0] in "/\\": if b and b[:1] in seps:
path += b[1:] path += b[1:]
else: else:
path += b path += b
elif path[-1] == ":": elif path[-1:] == colon:
path += b path += b
elif b: elif b:
if b[0] in "/\\": if b[:1] in seps:
path += b path += b
else: else:
path += "\\" + b path += sep + b
else: else:
# path is not empty and does not end with a backslash, # path is not empty and does not end with a backslash,
# but b is empty; since, e.g., split('a/') produces # but b is empty; since, e.g., split('a/') produces
# ('a', ''), it's best if join() adds a backslash in # ('a', ''), it's best if join() adds a backslash in
# this case. # this case.
path += '\\' path += sep
return path return path
...@@ -120,9 +154,9 @@ def join(a, *p): ...@@ -120,9 +154,9 @@ def join(a, *p):
def splitdrive(p): def splitdrive(p):
"""Split a pathname into drive and path specifiers. Returns a 2-tuple """Split a pathname into drive and path specifiers. Returns a 2-tuple
"(drive,path)"; either part may be empty""" "(drive,path)"; either part may be empty"""
if p[1:2] == ':': if p[1:2] == _get_colon(p):
return p[0:2], p[2:] return p[0:2], p[2:]
return '', p return p[:0], p
# Parse UNC paths # Parse UNC paths
...@@ -134,24 +168,25 @@ def splitunc(p): ...@@ -134,24 +168,25 @@ def splitunc(p):
using backslashes). unc+rest is always the input path. using backslashes). unc+rest is always the input path.
Paths containing drive letters never have an UNC part. Paths containing drive letters never have an UNC part.
""" """
if p[1:2] == ':': sep = _get_sep(p)
return '', p # Drive letter present if not p[1:2]:
return p[:0], p # Drive letter present
firstTwo = p[0:2] firstTwo = p[0:2]
if firstTwo == '//' or firstTwo == '\\\\': if normcase(firstTwo) == sep + sep:
# is a UNC path: # is a UNC path:
# vvvvvvvvvvvvvvvvvvvv equivalent to drive letter # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter
# \\machine\mountpoint\directories... # \\machine\mountpoint\directories...
# directory ^^^^^^^^^^^^^^^ # directory ^^^^^^^^^^^^^^^
normp = normcase(p) normp = normcase(p)
index = normp.find('\\', 2) index = normp.find(sep, 2)
if index == -1: if index == -1:
##raise RuntimeError, 'illegal UNC path: "' + p + '"' ##raise RuntimeError, 'illegal UNC path: "' + p + '"'
return ("", p) return (p[:0], p)
index = normp.find('\\', index + 1) index = normp.find(sep, index + 1)
if index == -1: if index == -1:
index = len(p) index = len(p)
return p[:index], p[index:] return p[:index], p[index:]
return '', p return p[:0], p
# Split a path in head (everything up to the last '/') and tail (the # Split a path in head (everything up to the last '/') and tail (the
...@@ -165,15 +200,16 @@ def split(p): ...@@ -165,15 +200,16 @@ def split(p):
Return tuple (head, tail) where tail is everything after the final slash. Return tuple (head, tail) where tail is everything after the final slash.
Either part may be empty.""" Either part may be empty."""
seps = _get_bothseps(p)
d, p = splitdrive(p) d, p = splitdrive(p)
# set i to index beyond p's last slash # set i to index beyond p's last slash
i = len(p) i = len(p)
while i and p[i-1] not in '/\\': while i and p[i-1] not in seps:
i = i - 1 i = i - 1
head, tail = p[:i], p[i:] # now tail has no slashes head, tail = p[:i], p[i:] # now tail has no slashes
# remove trailing slashes from head, unless it's all slashes # remove trailing slashes from head, unless it's all slashes
head2 = head head2 = head
while head2 and head2[-1] in '/\\': while head2 and head2[-1:] in seps:
head2 = head2[:-1] head2 = head2[:-1]
head = head2 or head head = head2 or head
return d + head, tail return d + head, tail
...@@ -185,7 +221,8 @@ def split(p): ...@@ -185,7 +221,8 @@ def split(p):
# It is always true that root + ext == p. # It is always true that root + ext == p.
def splitext(p): def splitext(p):
return genericpath._splitext(p, sep, altsep, extsep) return genericpath._splitext(p, _get_sep(p), _get_altsep(p),
_get_dot(p))
splitext.__doc__ = genericpath._splitext.__doc__ splitext.__doc__ = genericpath._splitext.__doc__
...@@ -220,10 +257,11 @@ lexists = exists ...@@ -220,10 +257,11 @@ lexists = exists
def ismount(path): def ismount(path):
"""Test whether a path is a mount point (defined as root of drive)""" """Test whether a path is a mount point (defined as root of drive)"""
unc, rest = splitunc(path) unc, rest = splitunc(path)
seps = _get_bothseps(p)
if unc: if unc:
return rest in ("", "/", "\\") return rest in p[:0] + seps
p = splitdrive(path)[1] p = splitdrive(path)[1]
return len(p) == 1 and p[0] in '/\\' return len(p) == 1 and p[0] in seps
# Expand paths beginning with '~' or '~user'. # Expand paths beginning with '~' or '~user'.
...@@ -239,10 +277,14 @@ def expanduser(path): ...@@ -239,10 +277,14 @@ def expanduser(path):
"""Expand ~ and ~user constructs. """Expand ~ and ~user constructs.
If user or $HOME is unknown, do nothing.""" If user or $HOME is unknown, do nothing."""
if path[:1] != '~': if isinstance(path, bytes):
tilde = b'~'
else:
tilde = '~'
if not path.startswith(tilde):
return path return path
i, n = 1, len(path) i, n = 1, len(path)
while i < n and path[i] not in '/\\': while i < n and path[i] not in _get_bothseps(path):
i = i + 1 i = i + 1
if 'HOME' in os.environ: if 'HOME' in os.environ:
...@@ -258,6 +300,9 @@ def expanduser(path): ...@@ -258,6 +300,9 @@ def expanduser(path):
drive = '' drive = ''
userhome = join(drive, os.environ['HOMEPATH']) userhome = join(drive, os.environ['HOMEPATH'])
if isinstance(path, bytes):
userhome = userhome.encode(sys.getfilesystemencoding())
if i != 1: #~user if i != 1: #~user
userhome = join(dirname(userhome), path[1:i]) userhome = join(dirname(userhome), path[1:i])
...@@ -281,72 +326,104 @@ def expandvars(path): ...@@ -281,72 +326,104 @@ def expandvars(path):
"""Expand shell variables of the forms $var, ${var} and %var%. """Expand shell variables of the forms $var, ${var} and %var%.
Unknown variables are left unchanged.""" Unknown variables are left unchanged."""
if isinstance(path, bytes):
if ord('$') not in path and ord('%') not in path:
return path
import string
varchars = bytes(string.ascii_letters + string.digits + '_-', 'ascii')
else:
if '$' not in path and '%' not in path: if '$' not in path and '%' not in path:
return path return path
import string import string
varchars = string.ascii_letters + string.digits + '_-' varchars = string.ascii_letters + string.digits + '_-'
res = '' res = path[:0]
index = 0 index = 0
pathlen = len(path) pathlen = len(path)
while index < pathlen: while index < pathlen:
c = path[index] c = path[index:index+1]
if c == '\'': # no expansion within single quotes if c in ('\'', b'\''): # no expansion within single quotes
path = path[index + 1:] path = path[index + 1:]
pathlen = len(path) pathlen = len(path)
try: try:
index = path.index('\'') index = path.index(c)
res = res + '\'' + path[:index + 1] res = res + c + path[:index + 1]
except ValueError: except ValueError:
res = res + path res = res + path
index = pathlen - 1 index = pathlen - 1
elif c == '%': # variable or '%' elif c in ('%', b'%'): # variable or '%'
if path[index + 1:index + 2] == '%': if isinstance(path, bytes):
percent = b'%'
else:
percent = '%'
if path[index + 1:index + 2] == percent:
res = res + c res = res + c
index = index + 1 index = index + 1
else: else:
path = path[index+1:] path = path[index+1:]
pathlen = len(path) pathlen = len(path)
try: try:
index = path.index('%') index = path.index(percent)
except ValueError: except ValueError:
res = res + '%' + path res = res + percent + path
index = pathlen - 1 index = pathlen - 1
else: else:
var = path[:index] var = path[:index]
if isinstance(path, bytes):
var = var.decode('ascii')
if var in os.environ: if var in os.environ:
res = res + os.environ[var] value = os.environ[var]
else: else:
res = res + '%' + var + '%' value = '%' + var + '%'
elif c == '$': # variable or '$$' if isinstance(path, bytes):
value = value.encode('ascii')
res = res + value
elif c in ('$', b'$'): # variable or '$$'
if path[index + 1:index + 2] == '$': if path[index + 1:index + 2] == '$':
res = res + c res = res + c
index = index + 1 index = index + 1
elif path[index + 1:index + 2] == '{': elif path[index + 1:index + 2] in ('{', b'{'):
path = path[index+2:] path = path[index+2:]
pathlen = len(path) pathlen = len(path)
try: try:
if isinstance(path, bytes):
index = path.index(b'}')
else:
index = path.index('}') index = path.index('}')
var = path[:index] var = path[:index]
if isinstance(path, bytes):
var = var.decode('ascii')
if var in os.environ: if var in os.environ:
res = res + os.environ[var] value = os.environ[var]
else: else:
res = res + '${' + var + '}' value = '${' + var + '}'
if isinstance(path, bytes):
value = value.encode('ascii')
res = res + value
except ValueError: except ValueError:
if isinstance(path, bytes):
res = res + b'${' + path
else:
res = res + '${' + path res = res + '${' + path
index = pathlen - 1 index = pathlen - 1
else: else:
var = '' var = ''
index = index + 1 index = index + 1
c = path[index:index + 1] c = path[index:index + 1]
while c != '' and c in varchars: while c and c in varchars:
if isinstance(path, bytes):
var = var + c.decode('ascii')
else:
var = var + c var = var + c
index = index + 1 index = index + 1
c = path[index:index + 1] c = path[index:index + 1]
if var in os.environ: if var in os.environ:
res = res + os.environ[var] value = os.environ[var]
else: else:
res = res + '$' + var value = '$' + var
if c != '': if isinstance(path, bytes):
value = value.encode('ascii')
res = res + value
if c:
index = index - 1 index = index - 1
else: else:
res = res + c res = res + c
...@@ -360,7 +437,8 @@ def expandvars(path): ...@@ -360,7 +437,8 @@ def expandvars(path):
def normpath(path): def normpath(path):
"""Normalize path, eliminating double slashes, etc.""" """Normalize path, eliminating double slashes, etc."""
path = path.replace("/", "\\") sep = _get_sep(path)
path = path.replace(_get_altsep(path), sep)
prefix, path = splitdrive(path) prefix, path = splitdrive(path)
# We need to be careful here. If the prefix is empty, and the path starts # We need to be careful here. If the prefix is empty, and the path starts
# with a backslash, it could either be an absolute path on the current # with a backslash, it could either be an absolute path on the current
...@@ -371,20 +449,20 @@ def normpath(path): ...@@ -371,20 +449,20 @@ def normpath(path):
# letter. This means that the invalid filename \\\a\b is preserved # letter. This means that the invalid filename \\\a\b is preserved
# unchanged, where a\\\b is normalised to a\b. It's not clear that there # unchanged, where a\\\b is normalised to a\b. It's not clear that there
# is any better behaviour for such edge cases. # is any better behaviour for such edge cases.
if prefix == '': if not prefix:
# No drive letter - preserve initial backslashes # No drive letter - preserve initial backslashes
while path[:1] == "\\": while path[:1] == sep:
prefix = prefix + "\\" prefix = prefix + sep
path = path[1:] path = path[1:]
else: else:
# We have a drive letter - collapse initial backslashes # We have a drive letter - collapse initial backslashes
if path.startswith("\\"): if path.startswith(sep):
prefix = prefix + "\\" prefix = prefix + sep
path = path.lstrip("\\") path = path.lstrip(sep)
comps = path.split("\\") comps = path.split(sep)
i = 0 i = 0
while i < len(comps): while i < len(comps):
if comps[i] in ('.', ''): if comps[i] in ('.', '', b'.', b''):
del comps[i] del comps[i]
elif comps[i] == '..': elif comps[i] == '..':
if i > 0 and comps[i-1] != '..': if i > 0 and comps[i-1] != '..':
...@@ -394,12 +472,20 @@ def normpath(path): ...@@ -394,12 +472,20 @@ def normpath(path):
del comps[i] del comps[i]
else: else:
i += 1 i += 1
elif comps[i] == b'..':
if i > 0 and comps[i-1] != b'..':
del comps[i-1:i+1]
i -= 1
elif i == 0 and prefix.endswith(b"\\"):
del comps[i]
else:
i += 1
else: else:
i += 1 i += 1
# If the path is now empty, substitute '.' # If the path is now empty, substitute '.'
if not prefix and not comps: if not prefix and not comps:
comps.append('.') comps.append(_get_dot(path))
return prefix + "\\".join(comps) return prefix + sep.join(comps)
# Return an absolute path. # Return an absolute path.
...@@ -410,7 +496,11 @@ except ImportError: # not running on Windows - mock up something sensible ...@@ -410,7 +496,11 @@ except ImportError: # not running on Windows - mock up something sensible
def abspath(path): def abspath(path):
"""Return the absolute version of a path.""" """Return the absolute version of a path."""
if not isabs(path): if not isabs(path):
path = join(os.getcwd(), path) if isinstance(path, bytes):
cwd = os.getcwdb()
else:
cwd = os.getcwd()
path = join(cwd, path)
return normpath(path) return normpath(path)
else: # use native Windows method on Windows else: # use native Windows method on Windows
...@@ -434,6 +524,10 @@ supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and ...@@ -434,6 +524,10 @@ supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
def relpath(path, start=curdir): def relpath(path, start=curdir):
"""Return a relative version of a path""" """Return a relative version of a path"""
sep = _get_sep(path)
if start is curdir:
start = _get_dot(path)
if not path: if not path:
raise ValueError("no path specified") raise ValueError("no path specified")
...@@ -455,7 +549,11 @@ def relpath(path, start=curdir): ...@@ -455,7 +549,11 @@ def relpath(path, start=curdir):
else: else:
i += 1 i += 1
if isinstance(path, bytes):
pardir = b'..'
else:
pardir = '..'
rel_list = [pardir] * (len(start_list)-i) + path_list[i:] rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
if not rel_list: if not rel_list:
return curdir return _get_dot(path)
return join(*rel_list) return join(*rel_list)
...@@ -12,6 +12,23 @@ def tester(fn, wantResult): ...@@ -12,6 +12,23 @@ def tester(fn, wantResult):
raise TestFailed("%s should return: %s but returned: %s" \ raise TestFailed("%s should return: %s but returned: %s" \
%(str(fn), str(wantResult), str(gotResult))) %(str(fn), str(wantResult), str(gotResult)))
# then with bytes
fn = fn.replace("('", "(b'")
fn = fn.replace('("', '(b"')
fn = fn.replace("['", "[b'")
fn = fn.replace('["', '[b"')
fn = fn.replace(", '", ", b'")
fn = fn.replace(', "', ', b"')
gotResult = eval(fn)
if isinstance(wantResult, str):
wantResult = wantResult.encode('ascii')
elif isinstance(wantResult, tuple):
wantResult = tuple(r.encode('ascii') for r in wantResult)
gotResult = eval(fn)
if wantResult != gotResult:
raise TestFailed("%s should return: %s but returned: %s" \
%(str(fn), str(wantResult), repr(gotResult)))
class TestNtpath(unittest.TestCase): class TestNtpath(unittest.TestCase):
def test_splitext(self): def test_splitext(self):
......
...@@ -1651,7 +1651,7 @@ static PyObject * ...@@ -1651,7 +1651,7 @@ static PyObject *
posix_chdir(PyObject *self, PyObject *args) posix_chdir(PyObject *self, PyObject *args)
{ {
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
return win32_1str(args, "chdir", "s:chdir", win32_chdir, "U:chdir", win32_wchdir); return win32_1str(args, "chdir", "y:chdir", win32_chdir, "U:chdir", win32_wchdir);
#elif defined(PYOS_OS2) && defined(PYCC_GCC) #elif defined(PYOS_OS2) && defined(PYCC_GCC)
return posix_1str(args, "et:chdir", _chdir2); return posix_1str(args, "et:chdir", _chdir2);
#elif defined(__VMS) #elif defined(__VMS)
...@@ -2586,7 +2586,7 @@ static PyObject * ...@@ -2586,7 +2586,7 @@ static PyObject *
posix_rmdir(PyObject *self, PyObject *args) posix_rmdir(PyObject *self, PyObject *args)
{ {
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
return win32_1str(args, "rmdir", "s:rmdir", RemoveDirectoryA, "U:rmdir", RemoveDirectoryW); return win32_1str(args, "rmdir", "y:rmdir", RemoveDirectoryA, "U:rmdir", RemoveDirectoryW);
#else #else
return posix_1str(args, "et:rmdir", rmdir); return posix_1str(args, "et:rmdir", rmdir);
#endif #endif
...@@ -2667,7 +2667,7 @@ static PyObject * ...@@ -2667,7 +2667,7 @@ static PyObject *
posix_unlink(PyObject *self, PyObject *args) posix_unlink(PyObject *self, PyObject *args)
{ {
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
return win32_1str(args, "remove", "s:remove", DeleteFileA, "U:remove", DeleteFileW); return win32_1str(args, "remove", "y:remove", DeleteFileA, "U:remove", DeleteFileW);
#else #else
return posix_1str(args, "et:remove", unlink); return posix_1str(args, "et:remove", unlink);
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment