Entirely rewritten parseaddr() function by Sjoerd Mullender.

(Includes a patch he sent me a few days later.)

Entirely rewritten parseaddr() function by Sjoerd Mullender.
(Includes a patch he sent me a few days later.)
7cc767a5 · Guido van Rossum · b0dac34b · 7cc767a5
Commit 7cc767a5 authored Sep 15, 1997 by Guido van Rossum
Hide whitespace changes
Inline Side-by-side

Showing with 92 additions and 53 deletions

Lib/rfc822.py Lib/rfc822.py +92 -53

No files found.
--- a/Lib/rfc822.py
+++ b/Lib/rfc822.py
@@ -307,90 +307,129 @@ def unquote(str):
 # Parse an address into (name, address) tuple
+# (By Sjoerd Mullender)
+error = 'parseaddr.error'
+specials = regex.compile('[][()<>,.;:@\\" \000-\037\177-\377]')
+def quote(str):
+	return '"%s"' % string.join(
+	    string.split(
+		string.join(
+		    string.split(str, '\\'),
+		    '\\\\'),
+		'"'),
+	    '\\"')
 def parseaddr(address):
-	import string
+	token = []			# the current token
-	str = ''
+	tokens = []			# the list of tokens
-	email = ''
-	comment = ''
 	backslash = 0
 	dquote = 0
+	was_quoted = 0
 	space = 0
 	paren = 0
-	bracket = 0
-	seen_bracket = 0
 	for c in address:
 		if backslash:
-			str = str + c
+			token.append(c)
 			backslash = 0
-			continue
 		if c == '\\':
 			backslash = 1
+			was_quoted = 1
 			continue
 		if dquote:
 			if c == '"':
 				dquote = 0
 			else:
-				str = str + c
+				token.append(c)
 			continue
 		if c == '"':
 			dquote = 1
+			was_quoted = 1
 			continue
-		if c in string.whitespace:
-			space = 1
-			continue
-		if space:
-			str = str + ' '
-			space = 0
 		if paren:
 			if c == '(':
 				paren = paren + 1
-				str = str + c
+			elif c == ')':
-				continue
-			if c == ')':
 				paren = paren - 1
 				if paren == 0:
-					comment = comment + str
+					token = string.join(token, '')
-					str = ''
+					tokens.append((2, token))
+					token = []
 					continue
+			token.append(c)
+			continue
 		if c == '(':
-			paren = paren + 1
+			paren = 1
-			if bracket:
+			token = string.join(token, '')
-				email = email + str
+			tokens.append((was_quoted, token))
-				str = ''
+			was_quoted = 0
-			elif not seen_bracket:
+			token = []
-				email = email + str
-				str = ''
 			continue
-		if bracket:
+		if c in string.whitespace:
-			if c == '>':
+			space = 1
-				bracket = 0
+			continue
-				email = email + str
+		if c in '<>@,;:.[]':
-				str = ''
+			token = string.join(token, '')
-				continue
+			tokens.append((was_quoted, token))
-		if c == '<':
+			was_quoted = 0
-			bracket = 1
+			token = []
-			seen_bracket = 1
+			tokens.append((0, c))
-			comment = comment + str
+			space = 0
-			str = ''
-			email = ''
 			continue
-		if c == '#' and not bracket and not paren:
+		if space:
-			# rest is comment
+			token = string.join(token, '')
-			break
+			tokens.append((was_quoted, token))
-		str = str + c
+			was_quoted = 0
-	if str:
+			token = []
-		if seen_bracket:
+			space = 0
-			if bracket:
+		token.append(c)
-				email = str
+	token = string.join(token, '')
+	tokens.append((was_quoted, token))
+	if (0, '<') in tokens:
+		name = []
+		addr = []
+		cur = name
+		for token in tokens:
+			if token[1] == '':
+				continue
+			if token == (0, '<'):
+				if addr:
+					raise error, 'syntax error'
+				cur = addr
+			elif token == (0, '>'):
+				if cur is not addr:
+					raise error, 'syntax error'
+				cur = name
+			elif token[0] == 2:
+				if cur is name:
+					name.append('(' + token[1] + ')')
+				else:
+					name.append(token[1])
+			elif token[0] == 1 and cur is addr:
+				if specials.search(token[1]) >= 0:
+					cur.append(quote(token[1]))
+				else:
+					cur.append(token[1])
 			else:
-				comment = comment + str
+				cur.append(token[1])
-		else:
+	else:
-			if paren:
+		name = []
-				comment = comment + str
+		addr = []
+		for token in tokens:
+			if token[1] == '':
+				continue
+			if token[0] == 2:
+				name.append(token[1])
+			elif token[0] == 1:
+				if specials.search(token[1]) >= 0:
+					addr.append(quote(token[1]))
+				else:
+					addr.append(token[1])
 			else:
-				email = email + str
+				addr.append(token[1])
-	return string.strip(comment), string.strip(email)
+	return string.join(name, ' '), string.join(addr, '')
 # Parse a date field