Commit 63563cdf authored by R. David Murray's avatar R. David Murray

#9286: Fix the rfc822 parser to preserve whitespace in address local part.

Such addresses are not RFC compliant except under the 'obsolete syntax'
rules, but before this fix the whitespace was dropped from the input,
concatenating the pieces.  That breaks one of the principles of the
email package, that of preserving the input as much as possible.
It also denies the application program the opportunity to apply its
own heuristics to interpretation of such non-compliant addresses.

It is possible users of the email package were depending on the local
part always being a single token, so this fix will not be backported.
parent 2b37ce7f
...@@ -199,14 +199,18 @@ class AddrlistClass: ...@@ -199,14 +199,18 @@ class AddrlistClass:
self.commentlist = [] self.commentlist = []
def gotonext(self): def gotonext(self):
"""Parse up to the start of the next address.""" """Skip white space and extract comments."""
wslist = []
while self.pos < len(self.field): while self.pos < len(self.field):
if self.field[self.pos] in self.LWS + '\n\r': if self.field[self.pos] in self.LWS + '\n\r':
if self.field[self.pos] not in '\n\r':
wslist.append(self.field[self.pos])
self.pos += 1 self.pos += 1
elif self.field[self.pos] == '(': elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment()) self.commentlist.append(self.getcomment())
else: else:
break break
return EMPTYSTRING.join(wslist)
def getaddrlist(self): def getaddrlist(self):
"""Parse all addresses. """Parse all addresses.
...@@ -319,16 +323,24 @@ class AddrlistClass: ...@@ -319,16 +323,24 @@ class AddrlistClass:
self.gotonext() self.gotonext()
while self.pos < len(self.field): while self.pos < len(self.field):
preserve_ws = True
if self.field[self.pos] == '.': if self.field[self.pos] == '.':
if aslist and not aslist[-1].strip():
aslist.pop()
aslist.append('.') aslist.append('.')
self.pos += 1 self.pos += 1
preserve_ws = False
elif self.field[self.pos] == '"': elif self.field[self.pos] == '"':
aslist.append('"%s"' % quote(self.getquote())) aslist.append('"%s"' % quote(self.getquote()))
elif self.field[self.pos] in self.atomends: elif self.field[self.pos] in self.atomends:
if aslist and not aslist[-1].strip():
aslist.pop()
break break
else: else:
aslist.append(self.getatom()) aslist.append(self.getatom())
self.gotonext() ws = self.gotonext()
if preserve_ws and ws:
aslist.append(ws)
if self.pos >= len(self.field) or self.field[self.pos] != '@': if self.pos >= len(self.field) or self.field[self.pos] != '@':
return EMPTYSTRING.join(aslist) return EMPTYSTRING.join(aslist)
......
...@@ -2342,6 +2342,24 @@ class TestMiscellaneous(TestEmailBase): ...@@ -2342,6 +2342,24 @@ class TestMiscellaneous(TestEmailBase):
eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
('', '"\\\\"example\\\\" example"@example.com')) ('', '"\\\\"example\\\\" example"@example.com'))
def test_parseaddr_preserves_spaces_in_local_part(self):
# issue 9286. A normal RFC5322 local part should not contain any
# folding white space, but legacy local parts can (they are a sequence
# of atoms, not dotatoms). On the other hand we strip whitespace from
# before the @ and around dots, on the assumption that the whitespace
# around the punctuation is a mistake in what would otherwise be
# an RFC5322 local part. Leading whitespace is, usual, stripped as well.
self.assertEqual(('', "merwok wok@xample.com"),
utils.parseaddr("merwok wok@xample.com"))
self.assertEqual(('', "merwok wok@xample.com"),
utils.parseaddr("merwok wok@xample.com"))
self.assertEqual(('', "merwok wok@xample.com"),
utils.parseaddr(" merwok wok @xample.com"))
self.assertEqual(('', 'merwok"wok" wok@xample.com'),
utils.parseaddr('merwok"wok" wok@xample.com'))
self.assertEqual(('', 'merwok.wok.wok@xample.com'),
utils.parseaddr('merwok. wok . wok@xample.com'))
def test_multiline_from_comment(self): def test_multiline_from_comment(self):
x = """\ x = """\
Foo Foo
......
...@@ -23,6 +23,9 @@ Core and Builtins ...@@ -23,6 +23,9 @@ Core and Builtins
Library Library
------- -------
- Issue #9286: email.utils.parseaddr no longer concatenates blank-separated
words in the local part of email addresses, thereby preserving the input.
- Issue #6791: Limit header line length (to 65535 bytes) in http.client - Issue #6791: Limit header line length (to 65535 bytes) in http.client
and http.server, to avoid denial of services from the other party. and http.server, to avoid denial of services from the other party.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment