Commit bb45468d authored by Tim Heap's avatar Tim Heap

Much faster implementation of FileList, for big egg_info speedups

parent 7edbffcc
This diff is collapsed.
"""
Filename globbing utility. Mostly a copy of `glob` from Python 3.5.
Changes include:
* `yield from` and PEP3102 `*` removed.
* `bytes` changed to `six.binary_type`.
* Hidden files are not ignored.
"""
import os
import re
import fnmatch
from setuptools.extern.six import binary_type
__all__ = ["glob", "iglob", "escape"]
def glob(pathname, recursive=False):
"""Return a list of paths matching a pathname pattern.
The pattern may contain simple shell-style wildcards a la
fnmatch. However, unlike fnmatch, filenames starting with a
dot are special cases that are not matched by '*' and '?'
patterns.
If recursive is true, the pattern '**' will match any files and
zero or more directories and subdirectories.
"""
return list(iglob(pathname, recursive=recursive))
def iglob(pathname, recursive=False):
"""Return an iterator which yields the paths matching a pathname pattern.
The pattern may contain simple shell-style wildcards a la
fnmatch. However, unlike fnmatch, filenames starting with a
dot are special cases that are not matched by '*' and '?'
patterns.
If recursive is true, the pattern '**' will match any files and
zero or more directories and subdirectories.
"""
it = _iglob(pathname, recursive)
if recursive and _isrecursive(pathname):
s = next(it) # skip empty string
assert not s
return it
def _iglob(pathname, recursive):
dirname, basename = os.path.split(pathname)
if not has_magic(pathname):
if basename:
if os.path.lexists(pathname):
yield pathname
else:
# Patterns ending with a slash should match only directories
if os.path.isdir(dirname):
yield pathname
return
if not dirname:
if recursive and _isrecursive(basename):
for x in glob2(dirname, basename):
yield x
else:
for x in glob1(dirname, basename):
yield x
return
# `os.path.split()` returns the argument itself as a dirname if it is a
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path
# contains magic characters (i.e. r'\\?\C:').
if dirname != pathname and has_magic(dirname):
dirs = _iglob(dirname, recursive)
else:
dirs = [dirname]
if has_magic(basename):
if recursive and _isrecursive(basename):
glob_in_dir = glob2
else:
glob_in_dir = glob1
else:
glob_in_dir = glob0
for dirname in dirs:
for name in glob_in_dir(dirname, basename):
yield os.path.join(dirname, name)
# These 2 helper functions non-recursively glob inside a literal directory.
# They return a list of basenames. `glob1` accepts a pattern while `glob0`
# takes a literal basename (so it only has to check for its existence).
def glob1(dirname, pattern):
if not dirname:
if isinstance(pattern, binary_type):
dirname = os.curdir.encode('ASCII')
else:
dirname = os.curdir
try:
names = os.listdir(dirname)
except OSError:
return []
return fnmatch.filter(names, pattern)
def glob0(dirname, basename):
if not basename:
# `os.path.split()` returns an empty basename for paths ending with a
# directory separator. 'q*x/' should match only directories.
if os.path.isdir(dirname):
return [basename]
else:
if os.path.lexists(os.path.join(dirname, basename)):
return [basename]
return []
# This helper function recursively yields relative pathnames inside a literal
# directory.
def glob2(dirname, pattern):
assert _isrecursive(pattern)
yield pattern[:0]
for x in _rlistdir(dirname):
yield x
# Recursively yields relative pathnames inside a literal directory.
def _rlistdir(dirname):
if not dirname:
if isinstance(dirname, binary_type):
dirname = binary_type(os.curdir, 'ASCII')
else:
dirname = os.curdir
try:
names = os.listdir(dirname)
except os.error:
return
for x in names:
yield x
path = os.path.join(dirname, x) if dirname else x
for y in _rlistdir(path):
yield os.path.join(x, y)
magic_check = re.compile('([*?[])')
magic_check_bytes = re.compile(b'([*?[])')
def has_magic(s):
if isinstance(s, binary_type):
match = magic_check_bytes.search(s)
else:
match = magic_check.search(s)
return match is not None
def _isrecursive(pattern):
if isinstance(pattern, binary_type):
return pattern == b'**'
else:
return pattern == '**'
def escape(pathname):
"""Escape all special characters.
"""
# Escaping is done by wrapping any of "*?[" between square brackets.
# Metacharacters do not work in the drive part and shouldn't be escaped.
drive, pathname = os.path.splitdrive(pathname)
if isinstance(pathname, binary_type):
pathname = magic_check_bytes.sub(br'[\1]', pathname)
else:
pathname = magic_check.sub(r'[\1]', pathname)
return drive + pathname
......@@ -9,7 +9,7 @@ import tempfile
from distutils import log
from distutils.errors import DistutilsTemplateError
from setuptools.command.egg_info import FileList, egg_info
from setuptools.command.egg_info import FileList, egg_info, translate_pattern
from setuptools.dist import Distribution
from setuptools.extern import six
from setuptools.tests.textwrap import DALS
......@@ -66,6 +66,34 @@ default_files = frozenset(map(make_local_path, [
]))
def get_pattern(glob):
return translate_pattern(make_local_path(glob)).pattern
def test_translated_pattern_test():
l = make_local_path
assert get_pattern('foo') == r'foo\Z(?ms)'
assert get_pattern(l('foo/bar')) == l(r'foo\/bar\Z(?ms)')
# Glob matching
assert get_pattern('*.txt') == l(r'[^\/]*\.txt\Z(?ms)')
assert get_pattern('dir/*.txt') == l(r'dir\/[^\/]*\.txt\Z(?ms)')
assert get_pattern('*/*.py') == l(r'[^\/]*\/[^\/]*\.py\Z(?ms)')
assert get_pattern('docs/page-?.txt') \
== l(r'docs\/page\-[^\/]\.txt\Z(?ms)')
# Globstars change what they mean depending upon where they are
assert get_pattern(l('foo/**/bar')) == l(r'foo\/(?:[^\/]+\/)*bar\Z(?ms)')
assert get_pattern(l('foo/**')) == l(r'foo\/.*\Z(?ms)')
assert get_pattern(l('**')) == r'.*\Z(?ms)'
# Character classes
assert get_pattern('pre[one]post') == r'pre[one]post\Z(?ms)'
assert get_pattern('hello[!one]world') == r'hello[^one]world\Z(?ms)'
assert get_pattern('[]one].txt') == r'[\]one]\.txt\Z(?ms)'
assert get_pattern('foo[!]one]bar') == r'foo[^\]one]bar\Z(?ms)'
class TempDirTestCase(object):
def setup_method(self, method):
......@@ -346,23 +374,21 @@ class TestFileListTest(TempDirTestCase):
def test_include_pattern(self):
# return False if no match
file_list = FileList()
file_list.set_allfiles([])
self.make_files([])
assert not file_list.include_pattern('*.py')
# return True if files match
file_list = FileList()
file_list.set_allfiles(['a.py', 'b.txt'])
self.make_files(['a.py', 'b.txt'])
assert file_list.include_pattern('*.py')
# test * matches all files
file_list = FileList()
assert file_list.allfiles is None
file_list.set_allfiles(['a.py', 'b.txt'])
self.make_files(['a.py', 'b.txt'])
file_list.include_pattern('*')
assert file_list.allfiles == ['a.py', 'b.txt']
assert file_list.files == ['a.py', 'b.txt']
def test_process_template(self):
l = make_local_path
def test_process_template_line_invalid(self):
# invalid lines
file_list = FileList()
for action in ('include', 'exclude', 'global-include',
......@@ -377,9 +403,11 @@ class TestFileListTest(TempDirTestCase):
else:
assert False, "Should have thrown an error"
def test_include(self):
l = make_local_path
# include
file_list = FileList()
file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
self.make_files(['a.py', 'b.txt', l('d/c.py')])
file_list.process_template_line('include *.py')
assert file_list.files == ['a.py']
......@@ -389,6 +417,8 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == ['a.py']
self.assertWarnings()
def test_exclude(self):
l = make_local_path
# exclude
file_list = FileList()
file_list.files = ['a.py', 'b.txt', l('d/c.py')]
......@@ -401,9 +431,11 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == ['b.txt', l('d/c.py')]
self.assertWarnings()
def test_global_include(self):
l = make_local_path
# global-include
file_list = FileList()
file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
self.make_files(['a.py', 'b.txt', l('d/c.py')])
file_list.process_template_line('global-include *.py')
assert file_list.files == ['a.py', l('d/c.py')]
......@@ -413,6 +445,8 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == ['a.py', l('d/c.py')]
self.assertWarnings()
def test_global_exclude(self):
l = make_local_path
# global-exclude
file_list = FileList()
file_list.files = ['a.py', 'b.txt', l('d/c.py')]
......@@ -425,10 +459,11 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == ['b.txt']
self.assertWarnings()
def test_recursive_include(self):
l = make_local_path
# recursive-include
file_list = FileList()
file_list.set_allfiles(['a.py', l('d/b.py'), l('d/c.txt'),
l('d/d/e.py')])
self.make_files(['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')])
file_list.process_template_line('recursive-include d *.py')
assert file_list.files == [l('d/b.py'), l('d/d/e.py')]
......@@ -438,6 +473,8 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == [l('d/b.py'), l('d/d/e.py')]
self.assertWarnings()
def test_recursive_exclude(self):
l = make_local_path
# recursive-exclude
file_list = FileList()
file_list.files = ['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')]
......@@ -450,10 +487,11 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == ['a.py', l('d/c.txt')]
self.assertWarnings()
def test_graft(self):
l = make_local_path
# graft
file_list = FileList()
file_list.set_allfiles(['a.py', l('d/b.py'), l('d/d/e.py'),
l('f/f.py')])
self.make_files(['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')])
file_list.process_template_line('graft d')
assert file_list.files == [l('d/b.py'), l('d/d/e.py')]
......@@ -463,6 +501,8 @@ class TestFileListTest(TempDirTestCase):
assert file_list.files == [l('d/b.py'), l('d/d/e.py')]
self.assertWarnings()
def test_prune(self):
l = make_local_path
# prune
file_list = FileList()
file_list.files = ['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment