Commit 31626bce authored by Guido van Rossum's avatar Guido van Rossum

re -> regex conversions by Sjoerd.

parent f81e5b9c
...@@ -14,13 +14,12 @@ ...@@ -14,13 +14,12 @@
import re import re
# Compiled regular expression to "decode" a number # Compiled regular expression to "decode" a number
decoder = re.compile( \ decoder = re.compile(r'^([-+]?)0*(\d*)((?:\.\d*)?)(([eE][-+]?\d+)?)$')
'^([-+]?)0*([0-9]*)((\.[0-9]*)?)(([eE][-+]?[0-9]+)?)$')
# \0 the whole thing # \0 the whole thing
# \1 leading sign or empty # \1 leading sign or empty
# \2 digits left of decimal point # \2 digits left of decimal point
# \3 fraction (empty or begins with point) # \3 fraction (empty or begins with point)
# \5 exponent part (empty or begins with 'e' or 'E') # \4 exponent part (empty or begins with 'e' or 'E')
NotANumber = 'fpformat.NotANumber' NotANumber = 'fpformat.NotANumber'
...@@ -30,9 +29,9 @@ NotANumber = 'fpformat.NotANumber' ...@@ -30,9 +29,9 @@ NotANumber = 'fpformat.NotANumber'
# fraction is 0 or more digits # fraction is 0 or more digits
# expo is an integer # expo is an integer
def extract(s): def extract(s):
m = decoder.match(s) res = decoder.match(s)
if not m: raise NotANumber if res is None: raise NotANumber
sign, intpart, fraction, exppart = m.group(1, 2, 3, 5) sign, intpart, fraction, exppart = res.group(1,2,3,4)
if sign == '+': sign = '' if sign == '+': sign = ''
if fraction: fraction = fraction[1:] if fraction: fraction = fraction[1:]
if exppart: expo = eval(exppart[1:]) if exppart: expo = eval(exppart[1:])
...@@ -135,3 +134,4 @@ def test(): ...@@ -135,3 +134,4 @@ def test():
print x, fix(x, digs), sci(x, digs) print x, fix(x, digs), sci(x, digs)
except (EOFError, KeyboardInterrupt): except (EOFError, KeyboardInterrupt):
pass pass
...@@ -27,21 +27,16 @@ CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail ...@@ -27,21 +27,16 @@ CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
QUOTE = '> ' # string replies are quoted with QUOTE = '> ' # string replies are quoted with
# End configure # End configure
import regex, regsub, string import re, string
qp = regex.compile('^content-transfer-encoding:[ \t]*quoted-printable', qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I)
regex.casefold) base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I)
base64_re = regex.compile('^content-transfer-encoding:[ \t]*base64', mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S)
regex.casefold) chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S)
mp = regex.compile('^content-type:[\000-\377]*multipart/[\000-\377]*boundary="?\\([^;"\n]*\\)', he = re.compile('^-*\n')
regex.casefold) mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I)
chrset = regex.compile('^\\(content-type:.*charset="\\)\\(us-ascii\\|iso-8859-[0-9]+\\)\\("[\000-\377]*\\)', mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I)
regex.casefold) repl = re.compile('^subject:\\s+re: ', re.I)
he = regex.compile('^-*$')
mime_code = regex.compile('=\\([0-9a-f][0-9a-f]\\)', regex.casefold)
mime_head = regex.compile('=\\?iso-8859-1\\?q\\?\\([^? \t\n]+\\)\\?=',
regex.casefold)
repl = regex.compile('^subject:[ \t]+re: ', regex.casefold)
class File: class File:
'''A simple fake file object that knows about limited '''A simple fake file object that knows about limited
...@@ -81,7 +76,7 @@ class HeaderFile: ...@@ -81,7 +76,7 @@ class HeaderFile:
line = self.file.readline() line = self.file.readline()
if not line: if not line:
return line return line
if he.match(line) >= 0: if he.match(line):
return line return line
while 1: while 1:
self.peek = self.file.readline() self.peek = self.file.readline()
...@@ -95,26 +90,26 @@ def mime_decode(line): ...@@ -95,26 +90,26 @@ def mime_decode(line):
'''Decode a single line of quoted-printable text to 8bit.''' '''Decode a single line of quoted-printable text to 8bit.'''
newline = '' newline = ''
while 1: while 1:
i = mime_code.search(line) res = mime_code.search(line)
if i < 0: if res is None:
break break
newline = newline + line[:i] + \ newline = newline + line[:res.start(0)] + \
chr(string.atoi(mime_code.group(1), 16)) chr(string.atoi(res.group(1), 16))
line = line[i+3:] line = line[res.end(0):]
return newline + line return newline + line
def mime_decode_header(line): def mime_decode_header(line):
'''Decode a header line to 8bit.''' '''Decode a header line to 8bit.'''
newline = '' newline = ''
while 1: while 1:
i = mime_head.search(line) res = mime_head.search(line)
if i < 0: if res is None:
break break
match0, match1 = mime_head.group(0, 1) match = res.group(1)
# convert underscores to spaces (before =XX conversion!) # convert underscores to spaces (before =XX conversion!)
match1 = string.join(string.split(match1, '_'), ' ') match = string.join(string.split(match, '_'), ' ')
newline = newline + line[:i] + mime_decode(match1) newline = newline + line[:res.start(0)] + mime_decode(match)
line = line[i + len(match0):] line = line[res.end(0):]
return newline + line return newline + line
def unmimify_part(ifile, ofile, decode_base64 = 0): def unmimify_part(ifile, ofile, decode_base64 = 0):
...@@ -140,19 +135,20 @@ def unmimify_part(ifile, ofile, decode_base64 = 0): ...@@ -140,19 +135,20 @@ def unmimify_part(ifile, ofile, decode_base64 = 0):
else: else:
pref = '' pref = ''
line = mime_decode_header(line) line = mime_decode_header(line)
if qp.match(line) >= 0: if qp.match(line):
quoted_printable = 1 quoted_printable = 1
continue # skip this header continue # skip this header
if decode_base64 and base64_re.match(line) >= 0: if decode_base64 and base64_re.match(line):
is_base64 = 1 is_base64 = 1
continue continue
ofile.write(pref + line) ofile.write(pref + line)
if not prefix and repl.match(line) >= 0: if not prefix and repl.match(line):
# we're dealing with a reply message # we're dealing with a reply message
is_repl = 1 is_repl = 1
if mp.match(line) >= 0: mp_res = mp.match(line)
multipart = '--' + mp.group(1) if mp_res:
if he.match(line) >= 0: multipart = '--' + mp_res.group(1)
if he.match(line):
break break
if is_repl and (quoted_printable or multipart): if is_repl and (quoted_printable or multipart):
is_repl = 0 is_repl = 0
...@@ -162,7 +158,7 @@ def unmimify_part(ifile, ofile, decode_base64 = 0): ...@@ -162,7 +158,7 @@ def unmimify_part(ifile, ofile, decode_base64 = 0):
line = ifile.readline() line = ifile.readline()
if not line: if not line:
return return
line = regsub.gsub(mime_head, '\\1', line) line = re.sub(mime_head, '\\1', line)
if prefix and line[:len(prefix)] == prefix: if prefix and line[:len(prefix)] == prefix:
line = line[len(prefix):] line = line[len(prefix):]
pref = prefix pref = prefix
...@@ -216,8 +212,8 @@ def unmimify(infile, outfile, decode_base64 = 0): ...@@ -216,8 +212,8 @@ def unmimify(infile, outfile, decode_base64 = 0):
unmimify_part(nifile, ofile, decode_base64) unmimify_part(nifile, ofile, decode_base64)
ofile.flush() ofile.flush()
mime_char = regex.compile('[=\240-\377]') # quote these chars in body mime_char = re.compile('[=\240-\377]') # quote these chars in body
mime_header_char = regex.compile('[=?\240-\377]') # quote these in header mime_header_char = re.compile('[=?\240-\377]') # quote these in header
def mime_encode(line, header): def mime_encode(line, header):
'''Code a single line as quoted-printable. '''Code a single line as quoted-printable.
...@@ -232,12 +228,12 @@ def mime_encode(line, header): ...@@ -232,12 +228,12 @@ def mime_encode(line, header):
newline = string.upper('=%02x' % ord('F')) newline = string.upper('=%02x' % ord('F'))
line = line[1:] line = line[1:]
while 1: while 1:
i = reg.search(line) res = reg.search(line)
if i < 0: if res is None:
break break
newline = newline + line[:i] + \ newline = newline + line[:res.start(0)] + \
string.upper('=%02x' % ord(line[i])) string.upper('=%02x' % ord(line[res.group(0)]))
line = line[i+1:] line = line[res.end(0):]
line = newline + line line = newline + line
newline = '' newline = ''
...@@ -250,25 +246,25 @@ def mime_encode(line, header): ...@@ -250,25 +246,25 @@ def mime_encode(line, header):
line = line[i:] line = line[i:]
return newline + line return newline + line
mime_header = regex.compile('\\([ \t(]\\|^\\)\\([-a-zA-Z0-9_+]*[\240-\377][-a-zA-Z0-9_+\240-\377]*\\)\\([ \t)]\\|$\\)') mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\240-\377][-a-zA-Z0-9_+\240-\377]*)([ \t)]|\n)')
def mime_encode_header(line): def mime_encode_header(line):
'''Code a single header line as quoted-printable.''' '''Code a single header line as quoted-printable.'''
newline = '' newline = ''
while 1: while 1:
i = mime_header.search(line) res = mime_header.search(line)
if i < 0: if res is None:
break break
newline = newline + line[:i] + mime_header.group(1) + \ newline = newline + line[:res.start(0)] + res.group(1) + \
'=?' + CHARSET + '?Q?' + \ '=?' + CHARSET + '?Q?' + \
mime_encode(mime_header.group(2), 1) + \ mime_encode(res.group(2), 1) + \
'?=' + mime_header.group(3) '?=' + res.group(3)
line = line[i+len(mime_header.group(0)):] line = line[res.end(0):]
return newline + line return newline + line
mv = regex.compile('^mime-version:', regex.casefold) mv = re.compile('^mime-version:', re.I)
cte = regex.compile('^content-transfer-encoding:', regex.casefold) cte = re.compile('^content-transfer-encoding:', re.I)
iso_char = regex.compile('[\240-\377]') iso_char = re.compile('[\240-\377]')
def mimify_part(ifile, ofile, is_mime): def mimify_part(ifile, ofile, is_mime):
'''Convert an 8bit part of a MIME mail message to quoted-printable.''' '''Convert an 8bit part of a MIME mail message to quoted-printable.'''
...@@ -286,19 +282,20 @@ def mimify_part(ifile, ofile, is_mime): ...@@ -286,19 +282,20 @@ def mimify_part(ifile, ofile, is_mime):
line = hfile.readline() line = hfile.readline()
if not line: if not line:
break break
if not must_quote_header and iso_char.search(line) >= 0: if not must_quote_header and iso_char.search(line):
must_quote_header = 1 must_quote_header = 1
if mv.match(line) >= 0: if mv.match(line):
is_mime = 1 is_mime = 1
if cte.match(line) >= 0: if cte.match(line):
has_cte = 1 has_cte = 1
if qp.match(line) >= 0: if qp.match(line):
is_qp = 1 is_qp = 1
elif base64_re.match(line) >= 0: elif base64_re.match(line):
is_base64 = 1 is_base64 = 1
if mp.match(line) >= 0: mp_res = mp.match(line)
multipart = '--' + mp.group(1) if mp_res:
if he.match(line) >= 0: multipart = '--' + mp_res.group(1)
if he.match(line):
header_end = line header_end = line
break break
header.append(line) header.append(line)
...@@ -328,7 +325,7 @@ def mimify_part(ifile, ofile, is_mime): ...@@ -328,7 +325,7 @@ def mimify_part(ifile, ofile, is_mime):
line = mime_decode(line) line = mime_decode(line)
message.append(line) message.append(line)
if not has_iso_chars: if not has_iso_chars:
if iso_char.search(line) >= 0: if iso_char.search(line):
has_iso_chars = must_quote_body = 1 has_iso_chars = must_quote_body = 1
if not must_quote_body: if not must_quote_body:
if len(line) > MAXLEN: if len(line) > MAXLEN:
...@@ -338,16 +335,17 @@ def mimify_part(ifile, ofile, is_mime): ...@@ -338,16 +335,17 @@ def mimify_part(ifile, ofile, is_mime):
for line in header: for line in header:
if must_quote_header: if must_quote_header:
line = mime_encode_header(line) line = mime_encode_header(line)
if chrset.match(line) >= 0: chrset_res = chrset.match(line)
if chrset_res:
if has_iso_chars: if has_iso_chars:
# change us-ascii into iso-8859-1 # change us-ascii into iso-8859-1
if string.lower(chrset.group(2)) == 'us-ascii': if string.lower(chrset_res.group(2)) == 'us-ascii':
line = chrset.group(1) + \ line = chrset_res.group(1) + \
CHARSET + chrset.group(3) CHARSET + chrset_res.group(3)
else: else:
# change iso-8859-* into us-ascii # change iso-8859-* into us-ascii
line = chrset.group(1) + 'us-ascii' + chrset.group(3) line = chrset_res.group(1) + 'us-ascii' + chrset_res.group(3)
if has_cte and cte.match(line) >= 0: if has_cte and cte.match(line):
line = 'Content-Transfer-Encoding: ' line = 'Content-Transfer-Encoding: '
if is_base64: if is_base64:
line = line + 'base64\n' line = line + 'base64\n'
...@@ -445,3 +443,4 @@ if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'): ...@@ -445,3 +443,4 @@ if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
if decode_base64: if decode_base64:
encode_args = encode_args + (decode_base64,) encode_args = encode_args + (decode_base64,)
apply(encode, encode_args) apply(encode, encode_args)
...@@ -35,17 +35,17 @@ the hell out of the parser, but it usually works.''' ...@@ -35,17 +35,17 @@ the hell out of the parser, but it usually works.'''
import os import os
import sys import sys
import imp import imp
import regex import re
import string import string
id = '\\(<id>[A-Za-z_][A-Za-z0-9_]*\\)' # match identifier id = '(?P<id>[A-Za-z_][A-Za-z0-9_]*)' # match identifier
blank_line = regex.compile('^[ \t]*\\($\\|#\\)') blank_line = re.compile('^[ \t]*($|#)')
is_class = regex.symcomp('^class[ \t]+'+id+'[ \t]*\\(<sup>([^)]*)\\)?[ \t]*:') is_class = re.compile('^class[ \t]+'+id+'[ \t]*(?P<sup>\([^)]*\))?[ \t]*:')
is_method = regex.symcomp('^[ \t]+def[ \t]+'+id+'[ \t]*(') is_method = re.compile('^[ \t]+def[ \t]+'+id+'[ \t]*\(')
is_import = regex.symcomp('^import[ \t]*\\(<imp>[^#]+\\)') is_import = re.compile('^import[ \t]*(?P<imp>[^#]+)')
is_from = regex.symcomp('^from[ \t]+'+id+'[ \t]+import[ \t]+\\(<imp>[^#]+\\)') is_from = re.compile('^from[ \t]+'+id+'[ \t]+import[ \t]+(?P<imp>[^#]+)')
dedent = regex.compile('^[^ \t]') dedent = re.compile('^[^ \t]')
indent = regex.compile('^[^ \t]*') indent = re.compile('^[^ \t]*')
_modules = {} # cache of modules we've seen _modules = {} # cache of modules we've seen
...@@ -116,14 +116,16 @@ def readmodule(module, path = []): ...@@ -116,14 +116,16 @@ def readmodule(module, path = []):
break break
lineno = lineno + 1 # count lines lineno = lineno + 1 # count lines
line = line[:-1] # remove line feed line = line[:-1] # remove line feed
if blank_line.match(line) >= 0: if blank_line.match(line):
# ignore blank (and comment only) lines # ignore blank (and comment only) lines
continue continue
## if indent.match(line) >= 0: ## res = indent.match(line)
## indentation = len(string.expandtabs(indent.group(0), 8)) ## if res:
if is_import.match(line) >= 0: ## indentation = len(string.expandtabs(res.group(0), 8))
res = is_import.match(line)
if res:
# import module # import module
for n in string.splitfields(is_import.group('imp'), ','): for n in string.splitfields(res.group('imp'), ','):
n = string.strip(n) n = string.strip(n)
try: try:
# recursively read the # recursively read the
...@@ -133,10 +135,11 @@ def readmodule(module, path = []): ...@@ -133,10 +135,11 @@ def readmodule(module, path = []):
print 'module',n,'not found' print 'module',n,'not found'
pass pass
continue continue
if is_from.match(line) >= 0: res = is_from.match(line)
if res:
# from module import stuff # from module import stuff
mod = is_from.group('id') mod = res.group('id')
names = string.splitfields(is_from.group('imp'), ',') names = string.splitfields(res.group('imp'), ',')
try: try:
# recursively read the imported module # recursively read the imported module
d = readmodule(mod, path) d = readmodule(mod, path)
...@@ -161,10 +164,11 @@ def readmodule(module, path = []): ...@@ -161,10 +164,11 @@ def readmodule(module, path = []):
not dict.has_key(n): not dict.has_key(n):
dict[n] = d[n] dict[n] = d[n]
continue continue
if is_class.match(line) >= 0: res = is_class.match(line)
if res:
# we found a class definition # we found a class definition
class_name = is_class.group('id') class_name = res.group('id')
inherit = is_class.group('sup') inherit = res.group('sup')
if inherit: if inherit:
# the class inherits from other classes # the class inherits from other classes
inherit = string.strip(inherit[1:-1]) inherit = string.strip(inherit[1:-1])
...@@ -194,15 +198,17 @@ def readmodule(module, path = []): ...@@ -194,15 +198,17 @@ def readmodule(module, path = []):
cur_class = Class(module, class_name, inherit, file, lineno) cur_class = Class(module, class_name, inherit, file, lineno)
dict[class_name] = cur_class dict[class_name] = cur_class
continue continue
if is_method.match(line) >= 0: res = is_method.match(line)
if res:
# found a method definition # found a method definition
if cur_class: if cur_class:
# and we know the class it belongs to # and we know the class it belongs to
meth_name = is_method.group('id') meth_name = res.group('id')
cur_class._addmethod(meth_name, lineno) cur_class._addmethod(meth_name, lineno)
continue continue
if dedent.match(line) >= 0: if dedent.match(line):
# end of class definition # end of class definition
cur_class = None cur_class = None
f.close() f.close()
return dict return dict
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment