Commit be0b62ca authored by Guido van Rossum's avatar Guido van Rossum

Added findall() to RegexObject -- return a list of all matches in a

string.  Added groupdict() to MatchObject -- return the named groups
as a dict.  Added default argument to groups() to specify what to
return for unmatching groups; groupdict() also has this.
parent 80884075
...@@ -57,6 +57,11 @@ def split(pattern, string, maxsplit=0): ...@@ -57,6 +57,11 @@ def split(pattern, string, maxsplit=0):
pattern = _cachecompile(pattern) pattern = _cachecompile(pattern)
return pattern.split(string, maxsplit) return pattern.split(string, maxsplit)
def findall(pattern, string):
if type(pattern) == type(''):
pattern = _cachecompile(pattern)
return pattern.findall(string)
def escape(pattern): def escape(pattern):
"Escape all non-alphanumeric characters in pattern." "Escape all non-alphanumeric characters in pattern."
result = [] result = []
...@@ -80,6 +85,7 @@ def compile(pattern, flags=0): ...@@ -80,6 +85,7 @@ def compile(pattern, flags=0):
# #
class RegexObject: class RegexObject:
def __init__(self, pattern, flags, code, groupindex): def __init__(self, pattern, flags, code, groupindex):
self.code = code self.code = code
self.flags = flags self.flags = flags
...@@ -171,7 +177,7 @@ class RegexObject: ...@@ -171,7 +177,7 @@ class RegexObject:
return (string.join(results, ''), n) return (string.join(results, ''), n)
def split(self, source, maxsplit=0): def split(self, source, maxsplit=0):
"""Split the \var{source} string by the occurrences of the pattern, """Split the source string by the occurrences of the pattern,
returning a list containing the resulting substrings.""" returning a list containing the resulting substrings."""
if maxsplit < 0: if maxsplit < 0:
...@@ -198,13 +204,38 @@ class RegexObject: ...@@ -198,13 +204,38 @@ class RegexObject:
results.append(source[lastmatch:i]) results.append(source[lastmatch:i])
g = m.groups() g = m.groups()
if g: if g:
if type(g)==type( "" ): g = [g]
results[len(results):] = list(g) results[len(results):] = list(g)
pos = lastmatch = j pos = lastmatch = j
n = n + 1 n = n + 1
results.append(source[lastmatch:]) results.append(source[lastmatch:])
return results return results
def findall(self, string):
"""Return a list of all non-overlapping matches in the string.
If one or more groups are present in the pattern, return a
list of groups; this will be a list of tuples if the pattern
has more than one group.
Empty matches are included in the result.
"""
pos = 0
n = len(string)
result = []
while pos <= n:
m = self.search(string, pos)
if not m:
break
gr = m.groups()
if not gr:
gr = m.group()
elif len(gr) == 1:
gr = gr[0]
result.append(gr)
pos = max(m.end(), pos+1)
return result
# The following 3 functions were contributed by Mike Fletcher, and # The following 3 functions were contributed by Mike Fletcher, and
# allow pickling and unpickling of RegexObject instances. # allow pickling and unpickling of RegexObject instances.
def __getinitargs__(self): def __getinitargs__(self):
...@@ -221,6 +252,7 @@ class RegexObject: ...@@ -221,6 +252,7 @@ class RegexObject:
self.code = apply(pcre_compile, statetuple) self.code = apply(pcre_compile, statetuple)
class MatchObject: class MatchObject:
def __init__(self, re, string, pos, endpos, regs): def __init__(self, re, string, pos, endpos, regs):
self.re = re self.re = re
self.string = string self.string = string
...@@ -234,7 +266,7 @@ class MatchObject: ...@@ -234,7 +266,7 @@ class MatchObject:
try: try:
g = self.re.groupindex[g] g = self.re.groupindex[g]
except (KeyError, TypeError): except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined') raise IndexError, 'group %s is undefined' % `g`
return self.regs[g][0] return self.regs[g][0]
def end(self, g = 0): def end(self, g = 0):
...@@ -243,31 +275,31 @@ class MatchObject: ...@@ -243,31 +275,31 @@ class MatchObject:
try: try:
g = self.re.groupindex[g] g = self.re.groupindex[g]
except (KeyError, TypeError): except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined') raise IndexError, 'group %s is undefined' % `g`
return self.regs[g][1] return self.regs[g][1]
def span(self, g = 0): def span(self, g = 0):
"""Return a tuple containing the start,end of the substring "Return (start, end) of the substring matched by group g"
matched by group g"""
if type(g) == type(''): if type(g) == type(''):
try: try:
g = self.re.groupindex[g] g = self.re.groupindex[g]
except (KeyError, TypeError): except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined') raise IndexError, 'group %s is undefined' % `g`
return self.regs[g] return self.regs[g]
def groups(self): def groups(self, default=None):
"Return a tuple containing all subgroups of the match object" "Return a tuple containing all subgroups of the match object"
result = [] result = []
for g in range(1, self.re._num_regs): for g in range(1, self.re._num_regs):
if (self.regs[g][0] == -1) or (self.regs[g][1] == -1): a, b = self.regs[g]
result.append(None) if a == -1 or b == -1:
result.append(default)
else: else:
result.append(self.string[self.regs[g][0]:self.regs[g][1]]) result.append(self.string[a:b])
return tuple(result) return tuple(result)
def group(self, *groups): def group(self, *groups):
"Return one or more groups of the match." "Return one or more groups of the match"
if len(groups) == 0: if len(groups) == 0:
groups = (0,) groups = (0,)
result = [] result = []
...@@ -276,15 +308,28 @@ class MatchObject: ...@@ -276,15 +308,28 @@ class MatchObject:
try: try:
g = self.re.groupindex[g] g = self.re.groupindex[g]
except (KeyError, TypeError): except (KeyError, TypeError):
raise IndexError, ('group "' + g + '" is undefined') raise IndexError, 'group %s is undefined' % `g`
if len(self.regs)<=g: raise IndexError, ('group "' + str(g) + '" is undefined') if g >= len(self.regs):
elif (self.regs[g][0] == -1) or (self.regs[g][1] == -1): raise IndexError, 'group %s is undefined' % `g`
a, b = self.regs[g]
if a == -1 or b == -1:
result.append(None) result.append(None)
else: else:
result.append(self.string[self.regs[g][0]:self.regs[g][1]]) result.append(self.string[a:b])
if len(result) > 1: if len(result) > 1:
return tuple(result) return tuple(result)
elif len(result) == 1: elif len(result) == 1:
return result[0] return result[0]
else: else:
return () return ()
def groupdict(self, default=None):
"Return a dictionary containing all named subgroups of the match"
dict = {}
for name, index in self.re.groupindex.items():
a, b = self.regs[index]
if a == -1 or b == -1:
dict[name] = default
else:
dict[name] = self.string[a:b]
return dict
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment