Commit 7cc767a5 authored by Guido van Rossum's avatar Guido van Rossum

Entirely rewritten parseaddr() function by Sjoerd Mullender.

(Includes a patch he sent me a few days later.)
parent b0dac34b
...@@ -307,90 +307,129 @@ def unquote(str): ...@@ -307,90 +307,129 @@ def unquote(str):
# Parse an address into (name, address) tuple # Parse an address into (name, address) tuple
# (By Sjoerd Mullender)
error = 'parseaddr.error'
specials = regex.compile('[][()<>,.;:@\\" \000-\037\177-\377]')
def quote(str):
return '"%s"' % string.join(
string.split(
string.join(
string.split(str, '\\'),
'\\\\'),
'"'),
'\\"')
def parseaddr(address): def parseaddr(address):
import string token = [] # the current token
str = '' tokens = [] # the list of tokens
email = ''
comment = ''
backslash = 0 backslash = 0
dquote = 0 dquote = 0
was_quoted = 0
space = 0 space = 0
paren = 0 paren = 0
bracket = 0
seen_bracket = 0
for c in address: for c in address:
if backslash: if backslash:
str = str + c token.append(c)
backslash = 0 backslash = 0
continue
if c == '\\': if c == '\\':
backslash = 1 backslash = 1
was_quoted = 1
continue continue
if dquote: if dquote:
if c == '"': if c == '"':
dquote = 0 dquote = 0
else: else:
str = str + c token.append(c)
continue continue
if c == '"': if c == '"':
dquote = 1 dquote = 1
was_quoted = 1
continue continue
if c in string.whitespace:
space = 1
continue
if space:
str = str + ' '
space = 0
if paren: if paren:
if c == '(': if c == '(':
paren = paren + 1 paren = paren + 1
str = str + c elif c == ')':
continue
if c == ')':
paren = paren - 1 paren = paren - 1
if paren == 0: if paren == 0:
comment = comment + str token = string.join(token, '')
str = '' tokens.append((2, token))
token = []
continue continue
token.append(c)
continue
if c == '(': if c == '(':
paren = paren + 1 paren = 1
if bracket: token = string.join(token, '')
email = email + str tokens.append((was_quoted, token))
str = '' was_quoted = 0
elif not seen_bracket: token = []
email = email + str
str = ''
continue continue
if bracket: if c in string.whitespace:
if c == '>': space = 1
bracket = 0 continue
email = email + str if c in '<>@,;:.[]':
str = '' token = string.join(token, '')
continue tokens.append((was_quoted, token))
if c == '<': was_quoted = 0
bracket = 1 token = []
seen_bracket = 1 tokens.append((0, c))
comment = comment + str space = 0
str = ''
email = ''
continue continue
if c == '#' and not bracket and not paren: if space:
# rest is comment token = string.join(token, '')
break tokens.append((was_quoted, token))
str = str + c was_quoted = 0
if str: token = []
if seen_bracket: space = 0
if bracket: token.append(c)
email = str token = string.join(token, '')
tokens.append((was_quoted, token))
if (0, '<') in tokens:
name = []
addr = []
cur = name
for token in tokens:
if token[1] == '':
continue
if token == (0, '<'):
if addr:
raise error, 'syntax error'
cur = addr
elif token == (0, '>'):
if cur is not addr:
raise error, 'syntax error'
cur = name
elif token[0] == 2:
if cur is name:
name.append('(' + token[1] + ')')
else:
name.append(token[1])
elif token[0] == 1 and cur is addr:
if specials.search(token[1]) >= 0:
cur.append(quote(token[1]))
else:
cur.append(token[1])
else: else:
comment = comment + str cur.append(token[1])
else: else:
if paren: name = []
comment = comment + str addr = []
for token in tokens:
if token[1] == '':
continue
if token[0] == 2:
name.append(token[1])
elif token[0] == 1:
if specials.search(token[1]) >= 0:
addr.append(quote(token[1]))
else:
addr.append(token[1])
else: else:
email = email + str addr.append(token[1])
return string.strip(comment), string.strip(email) return string.join(name, ' '), string.join(addr, '')
# Parse a date field # Parse a date field
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment