Commit 7aeac918 authored by Barry Warsaw's avatar Barry Warsaw

Anthony Baxter's cleanup patch. Python project SF patch # 583190,

quoting:

  in non-strict mode, messages don't require a blank line at the end
  with a missing end-terminator. A single newline is sufficient now.

  Handle trailing whitespace at the end of a boundary. Had to switch
  from using string.split() to re.split()

  Handle whitespace on the end of a parameter list for Content-type.

  Handle whitespace on the end of a plain content-type header.

Specifically,

get_type(): Strip the content type string.

_get_params_preserve(): Strip the parameter names and values on both
sides.

_parsebody(): Lots of changes as described above, with some stylistic
changes by Barry (who hopefully didn't screw things up ;).
parent e21262ca
......@@ -373,7 +373,7 @@ class Message:
value = self.get('content-type', missing)
if value is missing:
return failobj
return paramre.split(value)[0].lower()
return paramre.split(value)[0].lower().strip()
def get_main_type(self, failobj=None):
"""Return the message's main content type if present."""
......@@ -428,11 +428,11 @@ class Message:
for p in paramre.split(value):
try:
name, val = p.split('=', 1)
name = name.rstrip()
val = val.lstrip()
name = name.strip()
val = val.strip()
except ValueError:
# Must have been a bare attribute
name = p
name = p.strip()
val = ''
params.append((name, val))
params = Utils.decode_params(params)
......
......@@ -124,19 +124,25 @@ class Parser:
if boundary:
preamble = epilogue = None
# Split into subparts. The first boundary we're looking for won't
# have the leading newline since we're at the start of the body
# text.
# always have a leading newline since we're at the start of the
# body text, and there's not always a preamble before the first
# boundary.
separator = '--' + boundary
payload = fp.read()
start = payload.find(separator)
if start < 0:
# We use an RE here because boundaries can have trailing
# whitespace.
mo = re.search(
r'(?P<sep>' + re.escape(separator) + r')(?P<ws>[ \t]*)',
payload)
if not mo:
raise Errors.BoundaryError(
"Couldn't find starting boundary: %s" % boundary)
start = mo.start()
if start > 0:
# there's some pre-MIME boundary preamble
preamble = payload[0:start]
# Find out what kind of line endings we're using
start += len(separator)
start += len(mo.group('sep')) + len(mo.group('ws'))
cre = re.compile('\r\n|\r|\n')
mo = cre.search(payload, start)
if mo:
......@@ -151,31 +157,32 @@ class Parser:
terminator = mo.start()
linesep = mo.group('sep')
if mo.end() < len(payload):
# there's some post-MIME boundary epilogue
# There's some post-MIME boundary epilogue
epilogue = payload[mo.end():]
elif self._strict:
raise Errors.BoundaryError(
"Couldn't find terminating boundary: %s" % boundary)
else:
# handle the case of no trailing boundary. I hate mail clients.
# check that it ends in a blank line
endre = re.compile('(?P<sep>\r\n|\r|\n){2}$')
mo = endre.search(payload)
# Handle the case of no trailing boundary. Check that it ends
# in a blank line. Some cases (spamspamspam) don't even have
# that!
mo = re.search('(?P<sep>\r\n|\r|\n){2}$', payload)
if not mo:
raise Errors.BoundaryError(
"Couldn't find terminating boundary, and no "+
"trailing empty line")
else:
linesep = mo.group('sep')
terminator = len(payload)
mo = re.search('(?P<sep>\r\n|\r|\n)$', payload)
if not mo:
raise Errors.BoundaryError(
'No terminating boundary and no trailing empty line')
linesep = mo.group('sep')
terminator = len(payload)
# We split the textual payload on the boundary separator, which
# includes the trailing newline. If the container is a
# multipart/digest then the subparts are by default message/rfc822
# instead of text/plain. In that case, they'll have a optional
# block of MIME headers, then an empty line followed by the
# message headers.
separator += linesep
parts = payload[start:terminator].split(linesep + separator)
parts = re.split(
linesep + re.escape(separator) + r'[ \t]*' + linesep,
payload[start:terminator])
for part in parts:
if isdigest:
if part[0] == linesep:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment