Commit 8f323d9a authored by Tim Golden's avatar Tim Golden

issue9584: Add {} list expansion to glob. Original patch by Mathieu Bridon

parent 10ffea88
...@@ -13,10 +13,10 @@ ...@@ -13,10 +13,10 @@
The :mod:`glob` module finds all the pathnames matching a specified pattern The :mod:`glob` module finds all the pathnames matching a specified pattern
according to the rules used by the Unix shell. No tilde expansion is done, but according to the rules used by the Unix shell. No tilde expansion is done, but
``*``, ``?``, and character ranges expressed with ``[]`` will be correctly ``*``, ``?``, character ranges expressed with ``[]`` and list of options
matched. This is done by using the :func:`os.listdir` and expressed with ``{}`` will be correctly matched. This is done by using the
:func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a :func:`os.listdir` and :func:`fnmatch.fnmatch` functions in concert, and not by
subshell. (For tilde and shell variable expansion, use actually invoking a subshell. (For tilde and shell variable expansion, use
:func:`os.path.expanduser` and :func:`os.path.expandvars`.) :func:`os.path.expanduser` and :func:`os.path.expandvars`.)
...@@ -47,7 +47,8 @@ preserved. :: ...@@ -47,7 +47,8 @@ preserved. ::
['1.gif', 'card.gif'] ['1.gif', 'card.gif']
>>> glob.glob('?.gif') >>> glob.glob('?.gif')
['1.gif'] ['1.gif']
>>> glob.glob('?.{gif,txt}')
['1.gif', '2.txt']
.. seealso:: .. seealso::
......
...@@ -14,6 +14,7 @@ def glob(pathname): ...@@ -14,6 +14,7 @@ def glob(pathname):
""" """
return list(iglob(pathname)) return list(iglob(pathname))
def iglob(pathname): def iglob(pathname):
"""Return an iterator which yields the paths matching a pathname pattern. """Return an iterator which yields the paths matching a pathname pattern.
...@@ -24,21 +25,24 @@ def iglob(pathname): ...@@ -24,21 +25,24 @@ def iglob(pathname):
if os.path.lexists(pathname): if os.path.lexists(pathname):
yield pathname yield pathname
return return
dirname, basename = os.path.split(pathname) pathnames = expand_braces(pathname)
if not dirname: for pathname in pathnames:
yield from glob1(None, basename) dirname, basename = os.path.split(pathname)
return if not dirname:
if has_magic(dirname): yield from glob1(None, basename)
dirs = iglob(dirname) return
else:
dirs = [dirname] if has_magic(dirname):
if has_magic(basename): dirs = iglob(dirname)
glob_in_dir = glob1 else:
else: dirs = [dirname]
glob_in_dir = glob0 if has_magic(basename):
for dirname in dirs: glob_in_dir = glob1
for name in glob_in_dir(dirname, basename): else:
yield os.path.join(dirname, name) glob_in_dir = glob0
for dirname in dirs:
for name in glob_in_dir(dirname, basename):
yield os.path.join(dirname, name)
# These 2 helper functions non-recursively glob inside a literal directory. # These 2 helper functions non-recursively glob inside a literal directory.
# They return a list of basenames. `glob1` accepts a pattern while `glob0` # They return a list of basenames. `glob1` accepts a pattern while `glob0`
...@@ -70,12 +74,37 @@ def glob0(dirname, basename): ...@@ -70,12 +74,37 @@ def glob0(dirname, basename):
return [] return []
magic_check = re.compile('[*?[]') magic_check = re.compile('[*?[{]')
magic_check_bytes = re.compile(b'[*?[]') magic_check_bytes = re.compile(b'[*?[{]')
def has_magic(s): def has_magic(s):
if isinstance(s, bytes): if isinstance(s, bytes):
match = magic_check_bytes.search(s) match = magic_check_bytes.search(s)
else: else:
match = magic_check.search(s) match = magic_check.search(s)
return match is not None return match is not None
brace_matcher = re.compile(r'.*(\{.+?[^\\]\})')
def expand_braces(text):
"""Find the rightmost, innermost set of braces and, if it contains a
comma-separated list, expand its contents recursively (any of its items
may itself be a list enclosed in braces).
Return the full set of expanded strings.
"""
res = set()
match = brace_matcher.search(text)
if match is not None:
sub = match.group(1)
open_brace, close_brace = match.span(1)
if "," in sub:
for pat in sub.strip('{}').split(','):
res.update(expand_braces(text[:open_brace] + pat + text[close_brace:]))
else:
res.update(expand_braces(text[:open_brace] + sub.replace('}', '\\}') + text[close_brace:]))
else:
res.add(text.replace('\\}', '}'))
return res
...@@ -5,7 +5,7 @@ import glob ...@@ -5,7 +5,7 @@ import glob
import os import os
import shutil import shutil
class GlobTests(unittest.TestCase): class GlobTestsBase(unittest.TestCase):
def norm(self, *parts): def norm(self, *parts):
return os.path.normpath(os.path.join(self.tempdir, *parts)) return os.path.normpath(os.path.join(self.tempdir, *parts))
...@@ -45,6 +45,8 @@ class GlobTests(unittest.TestCase): ...@@ -45,6 +45,8 @@ class GlobTests(unittest.TestCase):
def assertSequencesEqual_noorder(self, l1, l2): def assertSequencesEqual_noorder(self, l1, l2):
self.assertEqual(set(l1), set(l2)) self.assertEqual(set(l1), set(l2))
class GlobTests(GlobTestsBase):
def test_glob_literal(self): def test_glob_literal(self):
eq = self.assertSequencesEqual_noorder eq = self.assertSequencesEqual_noorder
eq(self.glob('a'), [self.norm('a')]) eq(self.glob('a'), [self.norm('a')])
...@@ -105,9 +107,67 @@ class GlobTests(unittest.TestCase): ...@@ -105,9 +107,67 @@ class GlobTests(unittest.TestCase):
eq(self.glob('sym1'), [self.norm('sym1')]) eq(self.glob('sym1'), [self.norm('sym1')])
eq(self.glob('sym2'), [self.norm('sym2')]) eq(self.glob('sym2'), [self.norm('sym2')])
class GlobBracesTests(GlobTestsBase):
def setUp(self):
super(GlobBracesTests, self).setUp()
self.mktemp('c{}d')
self.mktemp('c{deg')
self.mktemp('c{dfg')
self.mktemp('cd{f}g')
self.mktemp('ce{f}g')
self.mktemp('cdf}g')
self.mktemp('cef}g')
def match_pattern_with_results(self, patterns, paths):
expected = [self.norm(path) for path in [os.path.join(*parts) for parts in paths]]
actual = [os.path.normpath(g) for g in self.glob(*patterns)]
self.assertSequencesEqual_noorder(actual, expected)
def test_two_terms(self):
self.match_pattern_with_results(['a{aa,ab}'], [["aaa"], ["aab"]])
def test_missing_first_plus_nested(self):
self.match_pattern_with_results(['a{,a{a,b}}'], [['a'], ['aaa'], ['aab']])
def test_one_subpath_with_two_file_terms(self):
self.match_pattern_with_results(['a', '{D,bcd}'], [['a', 'D'], ['a', 'bcd']])
def test_two_subpath_terms_with_two_file_terms(self):
self.match_pattern_with_results(['{aaa,aab}', '{F,zzzF}'], [('aaa', 'zzzF'), ('aab', 'F')])
def test_two_subpath_terms_with_wildcard_file_term(self):
self.match_pattern_with_results(['aa{a,b}', '*F'], [('aaa', 'zzzF'), ('aab', 'F')])
def test_wildcard_subpath_with_file_missing_first_term(self):
self.match_pattern_with_results(['aa?', '{,zzz}F'], [('aaa', 'zzzF'), ('aab', 'F')])
#
# Edge cases where braces should not be expanded
#
def test_empty_braces(self):
self.assertSequencesEqual_noorder(self.glob('c{}d'), [self.norm('c{}d')])
def test_missing_end_brace(self):
self.assertSequencesEqual_noorder(self.glob('c{d{e,f}g'), map(self.norm, ['c{deg', 'c{dfg']))
def test_second_brace_one_term(self):
self.assertSequencesEqual_noorder(self.glob('c{d,e}{f}g'), map(self.norm, ['cd{f}g', 'ce{f}g']))
def test_outer_term_missing_first_brace(self):
self.assertSequencesEqual_noorder(self.glob('c{d,e}f}g'), map(self.norm, ['cdf}g', 'cef}g']))
#
# Braces containing folder separators
#
def test_embedded_separator1(self):
self.match_pattern_with_results(['a/{D,bcd/{EF,efg}}'], [('a', 'D'), ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg')])
def test_embedded_separator2(self):
self.match_pattern_with_results(['aa{a/zzz,b/}F'], [('aaa', 'zzzF'), ('aab', 'F')])
def test_main(): def test_main():
run_unittest(GlobTests) run_unittest(GlobTests, GlobBracesTests)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1? ...@@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #9584: glob.glob now expands braces to a list of strings.
Original patch by Mathieu Bridon.
- Issue #8271: the utf-8 decoder now outputs the correct number of U+FFFD - Issue #8271: the utf-8 decoder now outputs the correct number of U+FFFD
characters when used with the 'replace' error handler on invalid utf-8 characters when used with the 'replace' error handler on invalid utf-8
sequences. Patch by Serhiy Storchaka, tests by Ezio Melotti. sequences. Patch by Serhiy Storchaka, tests by Ezio Melotti.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment