Commit f03d098e authored by Jason R. Coombs's avatar Jason R. Coombs

Merge 4 out of 5 commits from Philip Thiem's unicode_try_2. Ref #193

parents 337ce0de cade48d7
"""setuptools.command.egg_info
Create a distribution's .egg-info directory and contents"""
from __future__ import with_statement
import os
import re
......@@ -10,13 +11,14 @@ from setuptools import Command
import distutils.errors
from distutils import log
from setuptools.command.sdist import sdist
from setuptools.compat import basestring, PY3
from setuptools.compat import basestring, PY3, unicode
from setuptools import svn_utils
from distutils.util import convert_path
from distutils.filelist import FileList as _FileList
from pkg_resources import (parse_requirements, safe_name, parse_version,
safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename)
from setuptools.command.sdist import walk_revctrl
import setuptools.unicode_utils as unicode_utils
class egg_info(Command):
......@@ -226,15 +228,28 @@ class FileList(_FileList):
self.files = list(filter(self._safe_path, self.files))
def _safe_path(self, path):
if not PY3:
return os.path.exists(path)
enc_warn = "'%s' not %s encodable -- skipping"
#To avoid accidental trans-codings errors, first to unicode
u_path = unicode_utils.filesys_decode(path)
if u_path is None:
log.warn("'%s' in unexpected encoding -- skipping" % path)
return False
#Must ensure utf-8 encodability
utf8_path = unicode_utils.try_encode(u_path, "utf-8")
if utf8_path is None:
log.warn(enc_warn, path, 'utf-8')
return False
try:
if os.path.exists(path) or os.path.exists(path.encode('utf-8')):
#accept is either way checks out
if os.path.exists(u_path) or os.path.exists(utf8_path):
return True
#this will catch any encode errors decoding u_path
except UnicodeEncodeError:
log.warn("'%s' not %s encodable -- skipping", path,
sys.getfilesystemencoding())
log.warn(enc_warn, path, sys.getfilesystemencoding())
class manifest_maker(sdist):
......@@ -262,6 +277,10 @@ class manifest_maker(sdist):
self.filelist.remove_duplicates()
self.write_manifest()
def _manifest_normalize(self, path):
path = unicode_utils.filesys_decode(path)
return path.replace(os.sep, '/')
def write_manifest(self):
"""
Write the file list in 'self.filelist' to the manifest file
......@@ -269,7 +288,8 @@ class manifest_maker(sdist):
"""
self.filelist._repair()
files = [f.replace(os.sep, '/') for f in self.filelist.files]
#Now _repairs should encodability, but not unicode
files = [self._manifest_normalize(f) for f in self.filelist.files]
msg = "writing manifest file '%s'" % self.manifest
self.execute(write_file, (self.manifest, files), msg)
......@@ -303,10 +323,13 @@ def write_file(filename, contents):
sequence of strings without line terminators) to it.
"""
contents = "\n".join(contents)
#assuming the contents has been vetted for utf-8 encoding
contents = contents.encode("utf-8")
f = open(filename, "wb") # always write POSIX-style manifest
f.write(contents)
f.close()
with open(filename, "wb") as f: # always write POSIX-style manifest
f.write(contents)
def write_pkg_info(cmd, basename, filename):
log.info("writing %s", filename)
......
......@@ -414,7 +414,6 @@ class TestSdistTest(unittest.TestCase):
except UnicodeDecodeError:
self.assertFalse(filename in cmd.filelist.files)
class TestDummyOutput(environment.ZippedEnvironment):
def setUp(self):
......
import unicodedata
import sys
from setuptools.compat import unicode as decoded_string
# HFS Plus uses decomposed UTF-8
def decompose(path):
if isinstance(path, decoded_string):
return unicodedata.normalize('NFD', path)
try:
path = path.decode('utf-8')
path = unicodedata.normalize('NFD', path)
path = path.encode('utf-8')
except UnicodeError:
pass # Not UTF-8
return path
def filesys_decode(path):
"""
Ensure that the given path is decoded,
NONE when no expected encoding works
"""
fs_enc = sys.getfilesystemencoding()
if isinstance(path, decoded_string):
return path
for enc in (fs_enc, "utf-8"):
try:
return path.decode(enc)
except UnicodeDecodeError:
continue
def try_encode(string, enc):
"turn unicode encoding into a functional routine"
try:
return string.encode(enc)
except UnicodeEncodeError:
return None
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment