Commit adbdcdbd authored by R David Murray's avatar R David Murray

#14925: email now registers a defect for missing header/body separator.

This patch also deprecates the MalformedHeaderDefect.  My best guess is that
this defect was rendered obsolete by a refactoring of the parser, and the
corresponding defect for the new parser (which this patch introduces) was
overlooked.
parent 2c172d04
...@@ -79,9 +79,18 @@ this class is *not* an exception! ...@@ -79,9 +79,18 @@ this class is *not* an exception!
* :class:`MisplacedEnvelopeHeaderDefect` - A "Unix From" header was found in the * :class:`MisplacedEnvelopeHeaderDefect` - A "Unix From" header was found in the
middle of a header block. middle of a header block.
* :class:`MissingHeaderBodySeparatorDefect` - A line was found while parsing
headers that had no leading white space but contained no ':'. Parsing
continues assuming that the line represents the first line of the body.
.. versionadded: 3.3
* :class:`MalformedHeaderDefect` -- A header was found that was missing a colon, * :class:`MalformedHeaderDefect` -- A header was found that was missing a colon,
or was otherwise malformed. or was otherwise malformed.
.. deprecated:: 3.3
This defect has not been used for several Python versions.
* :class:`MultipartInvariantViolationDefect` -- A message claimed to be a * :class:`MultipartInvariantViolationDefect` -- A message claimed to be a
:mimetype:`multipart`, but no subparts were found. Note that when a message has :mimetype:`multipart`, but no subparts were found. Note that when a message has
this defect, its :meth:`is_multipart` method may return false even though its this defect, its :meth:`is_multipart` method may return false even though its
......
...@@ -48,8 +48,10 @@ class FirstHeaderLineIsContinuationDefect(MessageDefect): ...@@ -48,8 +48,10 @@ class FirstHeaderLineIsContinuationDefect(MessageDefect):
class MisplacedEnvelopeHeaderDefect(MessageDefect): class MisplacedEnvelopeHeaderDefect(MessageDefect):
"""A 'Unix-from' header was found in the middle of a header block.""" """A 'Unix-from' header was found in the middle of a header block."""
class MalformedHeaderDefect(MessageDefect): class MissingHeaderBodySeparatorDefect(MessageDefect):
"""Found a header that was missing a colon, or was otherwise malformed.""" """Found line with no leading whitespace and no colon before blank line."""
# XXX: backward compatibility, just in case (it was never emitted).
MalformedHeaderDefect = MissingHeaderBodySeparatorDefect
class MultipartInvariantViolationDefect(MessageDefect): class MultipartInvariantViolationDefect(MessageDefect):
"""A message claimed to be a multipart but no subparts were found.""" """A message claimed to be a multipart but no subparts were found."""
......
...@@ -219,6 +219,8 @@ class FeedParser: ...@@ -219,6 +219,8 @@ class FeedParser:
# (i.e. newline), just throw it away. Otherwise the line is # (i.e. newline), just throw it away. Otherwise the line is
# part of the body so push it back. # part of the body so push it back.
if not NLCRE.match(line): if not NLCRE.match(line):
defect = errors.MissingHeaderBodySeparatorDefect()
self.policy.handle_defect(self._cur, defect)
self._input.unreadline(line) self._input.unreadline(line)
break break
headers.append(line) headers.append(line)
...@@ -488,12 +490,10 @@ class FeedParser: ...@@ -488,12 +490,10 @@ class FeedParser:
self._cur.defects.append(defect) self._cur.defects.append(defect)
continue continue
# Split the line on the colon separating field name from value. # Split the line on the colon separating field name from value.
# There will always be a colon, because if there wasn't the part of
# the parser that calls us would have started parsing the body.
i = line.find(':') i = line.find(':')
if i < 0: assert i>0, "_parse_headers fed line with no : and no leading WS"
defect = errors.MalformedHeaderDefect(line)
# XXX: fixme (defect not going through policy)
self._cur.defects.append(defect)
continue
lastheader = line[:i] lastheader = line[:i]
lastvalue = [line] lastvalue = [line]
# Done with all the lines, so handle the last header. # Done with all the lines, so handle the last header.
......
...@@ -1960,15 +1960,27 @@ counter to RFC 2822, there's no separating newline here ...@@ -1960,15 +1960,27 @@ counter to RFC 2822, there's no separating newline here
# test_parser.TestMessageDefectDetectionBase # test_parser.TestMessageDefectDetectionBase
def test_first_line_is_continuation_header(self): def test_first_line_is_continuation_header(self):
eq = self.assertEqual eq = self.assertEqual
m = ' Line 1\nLine 2\nLine 3' m = ' Line 1\nSubject: test\n\nbody'
msg = email.message_from_string(m) msg = email.message_from_string(m)
eq(msg.keys(), []) eq(msg.keys(), ['Subject'])
eq(msg.get_payload(), 'Line 2\nLine 3') eq(msg.get_payload(), 'body')
eq(len(msg.defects), 1) eq(len(msg.defects), 1)
self.assertTrue(isinstance(msg.defects[0], self.assertDefectsEqual(msg.defects,
errors.FirstHeaderLineIsContinuationDefect)) [errors.FirstHeaderLineIsContinuationDefect])
eq(msg.defects[0].line, ' Line 1\n') eq(msg.defects[0].line, ' Line 1\n')
# test_parser.TestMessageDefectDetectionBase
def test_missing_header_body_separator(self):
# Our heuristic if we see a line that doesn't look like a header (no
# leading whitespace but no ':') is to assume that the blank line that
# separates the header from the body is missing, and to stop parsing
# headers and start parsing the body.
msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
self.assertEqual(msg.keys(), ['Subject'])
self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
self.assertDefectsEqual(msg.defects,
[errors.MissingHeaderBodySeparatorDefect])
# Test RFC 2047 header encoding and decoding # Test RFC 2047 header encoding and decoding
class TestRFC2047(TestEmailBase): class TestRFC2047(TestEmailBase):
......
...@@ -237,17 +237,33 @@ class TestMessageDefectDetectionBase: ...@@ -237,17 +237,33 @@ class TestMessageDefectDetectionBase:
policy=self.policy.clone(raise_on_defect=True)) policy=self.policy.clone(raise_on_defect=True))
def test_first_line_is_continuation_header(self): def test_first_line_is_continuation_header(self):
msg = self._str_msg(' Line 1\nLine 2\nLine 3') msg = self._str_msg(' Line 1\nSubject: test\n\nbody')
self.assertEqual(msg.keys(), []) self.assertEqual(msg.keys(), ['Subject'])
self.assertEqual(msg.get_payload(), 'Line 2\nLine 3') self.assertEqual(msg.get_payload(), 'body')
self.assertEqual(len(self.get_defects(msg)), 1) self.assertEqual(len(self.get_defects(msg)), 1)
self.assertTrue(isinstance(self.get_defects(msg)[0], self.assertDefectsEqual(self.get_defects(msg),
errors.FirstHeaderLineIsContinuationDefect)) [errors.FirstHeaderLineIsContinuationDefect])
self.assertEqual(self.get_defects(msg)[0].line, ' Line 1\n') self.assertEqual(self.get_defects(msg)[0].line, ' Line 1\n')
def test_first_line_is_continuation_header_raise_on_defect(self): def test_first_line_is_continuation_header_raise_on_defect(self):
with self.assertRaises(errors.FirstHeaderLineIsContinuationDefect): with self.assertRaises(errors.FirstHeaderLineIsContinuationDefect):
self._str_msg(' Line 1\nLine 2\nLine 3', self._str_msg(' Line 1\nSubject: test\n\nbody\n',
policy=self.policy.clone(raise_on_defect=True))
def test_missing_header_body_separator(self):
# Our heuristic if we see a line that doesn't look like a header (no
# leading whitespace but no ':') is to assume that the blank line that
# separates the header from the body is missing, and to stop parsing
# headers and start parsing the body.
msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
self.assertEqual(msg.keys(), ['Subject'])
self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
self.assertDefectsEqual(self.get_defects(msg),
[errors.MissingHeaderBodySeparatorDefect])
def test_missing_header_body_separator_raise_on_defect(self):
with self.assertRaises(errors.MissingHeaderBodySeparatorDefect):
self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n',
policy=self.policy.clone(raise_on_defect=True)) policy=self.policy.clone(raise_on_defect=True))
......
...@@ -49,6 +49,10 @@ Core and Builtins ...@@ -49,6 +49,10 @@ Core and Builtins
Library Library
------- -------
- Issue #14925: email now registers a defect when the parser decides that there
is a missing header/body separator line. MalformedHeaderDefect, which the
existing code would never actually generate, is deprecated.
- Issue #10365: File open dialog now works instead of crashing - Issue #10365: File open dialog now works instead of crashing
even when parent window is closed. Patch by Roger Serwy. even when parent window is closed. Patch by Roger Serwy.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment