Commit c5b242f8 authored by Ashwin Ramaswami's avatar Ashwin Ramaswami Committed by Miss Islington (bot)

bpo-37764: Fix infinite loop when parsing unstructured email headers. (GH-15239)



Fixes a case in which email._header_value_parser.get_unstructured hangs the system for some invalid headers. This covers the cases in which the header contains either:
- a case without trailing whitespace
- an invalid encoded word

https://bugs.python.org/issue37764

This fix should also be backported to 3.7 and 3.8


https://bugs.python.org/issue37764
parent daa82d01
...@@ -935,6 +935,10 @@ class EWWhiteSpaceTerminal(WhiteSpaceTerminal): ...@@ -935,6 +935,10 @@ class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
return '' return ''
class _InvalidEwError(errors.HeaderParseError):
"""Invalid encoded word found while parsing headers."""
# XXX these need to become classes and used as instances so # XXX these need to become classes and used as instances so
# that a program can't change them in a parse tree and screw # that a program can't change them in a parse tree and screw
# up other parse trees. Maybe should have tests for that, too. # up other parse trees. Maybe should have tests for that, too.
...@@ -1039,7 +1043,10 @@ def get_encoded_word(value): ...@@ -1039,7 +1043,10 @@ def get_encoded_word(value):
raise errors.HeaderParseError( raise errors.HeaderParseError(
"expected encoded word but found {}".format(value)) "expected encoded word but found {}".format(value))
remstr = ''.join(remainder) remstr = ''.join(remainder)
if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits: if (len(remstr) > 1 and
remstr[0] in hexdigits and
remstr[1] in hexdigits and
tok.count('?') < 2):
# The ? after the CTE was followed by an encoded word escape (=XX). # The ? after the CTE was followed by an encoded word escape (=XX).
rest, *remainder = remstr.split('?=', 1) rest, *remainder = remstr.split('?=', 1)
tok = tok + '?=' + rest tok = tok + '?=' + rest
...@@ -1051,7 +1058,7 @@ def get_encoded_word(value): ...@@ -1051,7 +1058,7 @@ def get_encoded_word(value):
try: try:
text, charset, lang, defects = _ew.decode('=?' + tok + '?=') text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
except ValueError: except ValueError:
raise errors.HeaderParseError( raise _InvalidEwError(
"encoded word format invalid: '{}'".format(ew.cte)) "encoded word format invalid: '{}'".format(ew.cte))
ew.charset = charset ew.charset = charset
ew.lang = lang ew.lang = lang
...@@ -1101,9 +1108,12 @@ def get_unstructured(value): ...@@ -1101,9 +1108,12 @@ def get_unstructured(value):
token, value = get_fws(value) token, value = get_fws(value)
unstructured.append(token) unstructured.append(token)
continue continue
valid_ew = True
if value.startswith('=?'): if value.startswith('=?'):
try: try:
token, value = get_encoded_word(value) token, value = get_encoded_word(value)
except _InvalidEwError:
valid_ew = False
except errors.HeaderParseError: except errors.HeaderParseError:
# XXX: Need to figure out how to register defects when # XXX: Need to figure out how to register defects when
# appropriate here. # appropriate here.
...@@ -1125,7 +1135,10 @@ def get_unstructured(value): ...@@ -1125,7 +1135,10 @@ def get_unstructured(value):
# Split in the middle of an atom if there is a rfc2047 encoded word # Split in the middle of an atom if there is a rfc2047 encoded word
# which does not have WSP on both sides. The defect will be registered # which does not have WSP on both sides. The defect will be registered
# the next time through the loop. # the next time through the loop.
if rfc2047_matcher.search(tok): # This needs to only be performed when the encoded word is valid;
# otherwise, performing it on an invalid encoded word can cause
# the parser to go in an infinite loop.
if valid_ew and rfc2047_matcher.search(tok):
tok, *remainder = value.partition('=?') tok, *remainder = value.partition('=?')
vtext = ValueTerminal(tok, 'vtext') vtext = ValueTerminal(tok, 'vtext')
_validate_xtext(vtext) _validate_xtext(vtext)
......
...@@ -383,6 +383,22 @@ class TestParser(TestParserMixin, TestEmailBase): ...@@ -383,6 +383,22 @@ class TestParser(TestParserMixin, TestEmailBase):
[errors.InvalidHeaderDefect], [errors.InvalidHeaderDefect],
'') '')
def test_get_unstructured_without_trailing_whitespace_hang_case(self):
self._test_get_x(self._get_unst,
'=?utf-8?q?somevalue?=aa',
'somevalueaa',
'somevalueaa',
[errors.InvalidHeaderDefect],
'')
def test_get_unstructured_invalid_ew(self):
self._test_get_x(self._get_unst,
'=?utf-8?q?=somevalue?=',
'=?utf-8?q?=somevalue?=',
'=?utf-8?q?=somevalue?=',
[],
'')
# get_qp_ctext # get_qp_ctext
def test_get_qp_ctext_only(self): def test_get_qp_ctext_only(self):
......
...@@ -5381,6 +5381,27 @@ Content-Type: application/x-foo; ...@@ -5381,6 +5381,27 @@ Content-Type: application/x-foo;
eq(language, 'en-us') eq(language, 'en-us')
eq(s, 'My Document For You') eq(s, 'My Document For You')
def test_should_not_hang_on_invalid_ew_messages(self):
messages = ["""From: user@host.com
To: user@host.com
Bad-Header:
=?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
=?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
=?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
Hello!
""", """From: ����� �������� <xxx@xxx>
To: "xxx" <xxx@xxx>
Subject: ��� ���������� ����� ����� � ��������� �� ����
MIME-Version: 1.0
Content-Type: text/plain; charset="windows-1251";
Content-Transfer-Encoding: 8bit
�� ����� � ���� ������ ��� ��������
"""]
for m in messages:
with self.subTest(m=m):
msg = email.message_from_string(m)
# Tests to ensure that signed parts of an email are completely preserved, as # Tests to ensure that signed parts of an email are completely preserved, as
......
...@@ -1336,6 +1336,7 @@ Burton Radons ...@@ -1336,6 +1336,7 @@ Burton Radons
Abhilash Raj Abhilash Raj
Shorya Raj Shorya Raj
Dhushyanth Ramasamy Dhushyanth Ramasamy
Ashwin Ramaswami
Jeff Ramnani Jeff Ramnani
Bayard Randel Bayard Randel
Varpu Rantala Varpu Rantala
......
Fixes email._header_value_parser.get_unstructured going into an infinite loop for a specific case in which the email header does not have trailing whitespace, and the case in which it contains an invalid encoded word. Patch by Ashwin Ramaswami.
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment