Commit e46a3692 authored by Reinout van Rees's avatar Reinout van Rees

Merge pull request #250 from buildout/reinout-unicode-filenames-hash-fix

Buildout no longer breaks on packages with non-ascii filenames in them.
parents 3d22f9c5 7a81f1f6
......@@ -36,6 +36,10 @@ Change History
higher for this functionality.
[lrowe]
- Buildout no longer breaks on packages that contain a file with a non-ascii
filename. Fixes #89 and #148.
[reinout]
- Updated buildout's `travis-ci <https://travis-ci.org/buildout/buildout>`_
configuration so that tests run much quicker so that buildout is easier and
quicker to develop.
......
......@@ -44,6 +44,19 @@ import tempfile
import zc.buildout
import zc.buildout.download
PY3 = sys.version_info[0] == 3
if PY3:
text_type = str
else:
text_type = unicode
def fs_to_text(fs_name):
"""Return filesystem name always as unicode(2)/str(3)."""
if not isinstance(fs_name, text_type):
fs_name = fs_name.decode(sys.getfilesystemencoding(),
'surrogateescape')
return fs_name
def _print_options(sep=' ', end='\n', file=None):
return sep, end, file
......@@ -1619,18 +1632,23 @@ def _open(base, filename, seen, dl_options, override, downloaded):
ignore_directories = '.svn', 'CVS', '__pycache__'
_dir_hashes = {}
def _dir_hash(dir):
dir = fs_to_text(dir)
# ^^^ fs_to_text ensures unicode, needed for os.walk() on python2 to work
# well with non-ascii filenames.
dir_hash = _dir_hashes.get(dir, None)
if dir_hash is not None:
return dir_hash
hash = md5()
for (dirpath, dirnames, filenames) in os.walk(dir):
dirnames[:] = [fs_to_text(dirname) for dirname in dirnames]
filenames[:] = [fs_to_text(filename) for filename in filenames]
dirnames[:] = sorted(n for n in dirnames if n not in ignore_directories)
filenames[:] = sorted(f for f in filenames
if (not (f.endswith('pyc') or f.endswith('pyo'))
and os.path.exists(os.path.join(dirpath, f)))
)
hash.update(' '.join(dirnames).encode())
hash.update(' '.join(filenames).encode())
)
hash.update(' '.join(dirnames).encode('utf-8'))
hash.update(' '.join(filenames).encode('utf-8'))
for name in filenames:
path = os.path.join(dirpath, name)
if name == 'entry_points.txt':
......
# -*- coding: utf-8 -*-
##############################################################################
#
# Copyright (c) 2004-2009 Zope Foundation and Contributors.
......@@ -1156,6 +1157,20 @@ because of the missing target file.
"""
def unicode_filename_doesnt_break_hash():
"""
Buildout's _dir_hash() used to break on non-ascii filenames on python 2.
>>> mkdir('héhé')
>>> write('héhé', 'héhé.py',
... '''
... print('Example filename from pyramid tests')
... ''')
>>> from zc.buildout.buildout import _dir_hash
>>> dont_care = _dir_hash('héhé')
"""
def o_option_sets_offline():
"""
>>> print_(system(join(sample_buildout, 'bin', 'buildout')+' -vvo'), end='')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment