Commit 0400d339 authored by R David Murray's avatar R David Murray

#16983: Apply postel's law to encoded words inside quoted strings.

This applies only to the new parser.  The old parser decodes encoded words
inside quoted strings already, although it gets the whitespace wrong
when it does so.

This version of the patch only handles the most common case (a single encoded
word surrounded by quotes), but I haven't seen any other variations of this in
the wild yet, so its good enough for now.
parent 905c8c3d
...@@ -1559,6 +1559,13 @@ def get_bare_quoted_string(value): ...@@ -1559,6 +1559,13 @@ def get_bare_quoted_string(value):
while value and value[0] != '"': while value and value[0] != '"':
if value[0] in WSP: if value[0] in WSP:
token, value = get_fws(value) token, value = get_fws(value)
elif value[:2] == '=?':
try:
token, value = get_encoded_word(value)
bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
"encoded word inside quoted string"))
except errors.HeaderParseError:
token, value = get_qcontent(value)
else: else:
token, value = get_qcontent(value) token, value = get_qcontent(value)
bare_quoted_string.append(token) bare_quoted_string.append(token)
......
...@@ -540,6 +540,15 @@ class TestParser(TestParserMixin, TestEmailBase): ...@@ -540,6 +540,15 @@ class TestParser(TestParserMixin, TestEmailBase):
self._test_get_x(parser.get_bare_quoted_string, self._test_get_x(parser.get_bare_quoted_string,
'""', '""', '', [], '') '""', '""', '', [], '')
# Issue 16983: apply postel's law to some bad encoding.
def test_encoded_word_inside_quotes(self):
self._test_get_x(parser.get_bare_quoted_string,
'"=?utf-8?Q?not_really_valid?="',
'"not really valid"',
'not really valid',
[errors.InvalidHeaderDefect],
'')
# get_comment # get_comment
def test_get_comment_only(self): def test_get_comment_only(self):
......
...@@ -1143,6 +1143,16 @@ class TestAddressHeader(TestHeaderBase): ...@@ -1143,6 +1143,16 @@ class TestAddressHeader(TestHeaderBase):
'example.com', 'example.com',
None), None),
'rfc2047_atom_in_quoted_string_is_decoded':
('"=?utf-8?q?=C3=89ric?=" <foo@example.com>',
[errors.InvalidHeaderDefect],
'Éric <foo@example.com>',
'Éric',
'foo@example.com',
'foo',
'example.com',
None),
} }
# XXX: Need many more examples, and in particular some with names in # XXX: Need many more examples, and in particular some with names in
......
...@@ -48,6 +48,9 @@ Core and Builtins ...@@ -48,6 +48,9 @@ Core and Builtins
Library Library
------- -------
- Issue #16983: the new email header parsing code will now decode encoded words
that are (incorrectly) surrounded by quotes, and register a defect.
- Issue #19772: email.generator no longer mutates the message object when - Issue #19772: email.generator no longer mutates the message object when
doing a down-transform from 8bit to 7bit CTEs. doing a down-transform from 8bit to 7bit CTEs.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment