Commit f03d098e authored by Jason R. Coombs's avatar Jason R. Coombs

Merge 4 out of 5 commits from Philip Thiem's unicode_try_2. Ref #193

parents 337ce0de cade48d7
"""setuptools.command.egg_info """setuptools.command.egg_info
Create a distribution's .egg-info directory and contents""" Create a distribution's .egg-info directory and contents"""
from __future__ import with_statement
import os import os
import re import re
...@@ -10,13 +11,14 @@ from setuptools import Command ...@@ -10,13 +11,14 @@ from setuptools import Command
import distutils.errors import distutils.errors
from distutils import log from distutils import log
from setuptools.command.sdist import sdist from setuptools.command.sdist import sdist
from setuptools.compat import basestring, PY3 from setuptools.compat import basestring, PY3, unicode
from setuptools import svn_utils from setuptools import svn_utils
from distutils.util import convert_path from distutils.util import convert_path
from distutils.filelist import FileList as _FileList from distutils.filelist import FileList as _FileList
from pkg_resources import (parse_requirements, safe_name, parse_version, from pkg_resources import (parse_requirements, safe_name, parse_version,
safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename) safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename)
from setuptools.command.sdist import walk_revctrl from setuptools.command.sdist import walk_revctrl
import setuptools.unicode_utils as unicode_utils
class egg_info(Command): class egg_info(Command):
...@@ -226,15 +228,28 @@ class FileList(_FileList): ...@@ -226,15 +228,28 @@ class FileList(_FileList):
self.files = list(filter(self._safe_path, self.files)) self.files = list(filter(self._safe_path, self.files))
def _safe_path(self, path): def _safe_path(self, path):
if not PY3: enc_warn = "'%s' not %s encodable -- skipping"
return os.path.exists(path)
#To avoid accidental trans-codings errors, first to unicode
u_path = unicode_utils.filesys_decode(path)
if u_path is None:
log.warn("'%s' in unexpected encoding -- skipping" % path)
return False
#Must ensure utf-8 encodability
utf8_path = unicode_utils.try_encode(u_path, "utf-8")
if utf8_path is None:
log.warn(enc_warn, path, 'utf-8')
return False
try: try:
if os.path.exists(path) or os.path.exists(path.encode('utf-8')): #accept is either way checks out
if os.path.exists(u_path) or os.path.exists(utf8_path):
return True return True
#this will catch any encode errors decoding u_path
except UnicodeEncodeError: except UnicodeEncodeError:
log.warn("'%s' not %s encodable -- skipping", path, log.warn(enc_warn, path, sys.getfilesystemencoding())
sys.getfilesystemencoding())
class manifest_maker(sdist): class manifest_maker(sdist):
...@@ -262,6 +277,10 @@ class manifest_maker(sdist): ...@@ -262,6 +277,10 @@ class manifest_maker(sdist):
self.filelist.remove_duplicates() self.filelist.remove_duplicates()
self.write_manifest() self.write_manifest()
def _manifest_normalize(self, path):
path = unicode_utils.filesys_decode(path)
return path.replace(os.sep, '/')
def write_manifest(self): def write_manifest(self):
""" """
Write the file list in 'self.filelist' to the manifest file Write the file list in 'self.filelist' to the manifest file
...@@ -269,7 +288,8 @@ class manifest_maker(sdist): ...@@ -269,7 +288,8 @@ class manifest_maker(sdist):
""" """
self.filelist._repair() self.filelist._repair()
files = [f.replace(os.sep, '/') for f in self.filelist.files] #Now _repairs should encodability, but not unicode
files = [self._manifest_normalize(f) for f in self.filelist.files]
msg = "writing manifest file '%s'" % self.manifest msg = "writing manifest file '%s'" % self.manifest
self.execute(write_file, (self.manifest, files), msg) self.execute(write_file, (self.manifest, files), msg)
...@@ -303,10 +323,13 @@ def write_file(filename, contents): ...@@ -303,10 +323,13 @@ def write_file(filename, contents):
sequence of strings without line terminators) to it. sequence of strings without line terminators) to it.
""" """
contents = "\n".join(contents) contents = "\n".join(contents)
#assuming the contents has been vetted for utf-8 encoding
contents = contents.encode("utf-8") contents = contents.encode("utf-8")
f = open(filename, "wb") # always write POSIX-style manifest
f.write(contents) with open(filename, "wb") as f: # always write POSIX-style manifest
f.close() f.write(contents)
def write_pkg_info(cmd, basename, filename): def write_pkg_info(cmd, basename, filename):
log.info("writing %s", filename) log.info("writing %s", filename)
......
...@@ -414,7 +414,6 @@ class TestSdistTest(unittest.TestCase): ...@@ -414,7 +414,6 @@ class TestSdistTest(unittest.TestCase):
except UnicodeDecodeError: except UnicodeDecodeError:
self.assertFalse(filename in cmd.filelist.files) self.assertFalse(filename in cmd.filelist.files)
class TestDummyOutput(environment.ZippedEnvironment): class TestDummyOutput(environment.ZippedEnvironment):
def setUp(self): def setUp(self):
......
import unicodedata
import sys
from setuptools.compat import unicode as decoded_string
# HFS Plus uses decomposed UTF-8
def decompose(path):
if isinstance(path, decoded_string):
return unicodedata.normalize('NFD', path)
try:
path = path.decode('utf-8')
path = unicodedata.normalize('NFD', path)
path = path.encode('utf-8')
except UnicodeError:
pass # Not UTF-8
return path
def filesys_decode(path):
"""
Ensure that the given path is decoded,
NONE when no expected encoding works
"""
fs_enc = sys.getfilesystemencoding()
if isinstance(path, decoded_string):
return path
for enc in (fs_enc, "utf-8"):
try:
return path.decode(enc)
except UnicodeDecodeError:
continue
def try_encode(string, enc):
"turn unicode encoding into a functional routine"
try:
return string.encode(enc)
except UnicodeEncodeError:
return None
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment