Commit 7d0325d6 authored by R David Murray's avatar R David Murray

#23745: handle duplicate MIME parameter names in new parser.

This mimics get_param's error handling for the most part.  It is slightly
better in some regards as get_param can produce some really weird results for
duplicate *0* parts.  It departs from get_param slightly in that if we have a
mix of non-extended and extended pieces for the same parameter name, the new
parser assumes they were all supposed to be extended and concatenates all the
values, whereas get_param always picks the non-extended parameter value.  All
of this error recovery is pretty much arbitrary decisions...
parent a3a100b5
......@@ -71,6 +71,7 @@ import re
import urllib # For urllib.parse.unquote
from string import hexdigits
from collections import OrderedDict
from operator import itemgetter
from email import _encoded_words as _ew
from email import errors
from email import utils
......@@ -1098,15 +1099,34 @@ class MimeParameters(TokenList):
params[name] = []
params[name].append((token.section_number, token))
for name, parts in params.items():
parts = sorted(parts)
# XXX: there might be more recovery we could do here if, for
# example, this is really a case of a duplicate attribute name.
parts = sorted(parts, key=itemgetter(0))
first_param = parts[0][1]
charset = first_param.charset
# Our arbitrary error recovery is to ignore duplicate parameters,
# to use appearance order if there are duplicate rfc 2231 parts,
# and to ignore gaps. This mimics the error recovery of get_param.
if not first_param.extended and len(parts) > 1:
if parts[1][0] == 0:
parts[1][1].defects.append(errors.InvalidHeaderDefect(
'duplicate parameter name; duplicate(s) ignored'))
parts = parts[:1]
# Else assume the *0* was missing...note that this is different
# from get_param, but we registered a defect for this earlier.
value_parts = []
charset = parts[0][1].charset
for i, (section_number, param) in enumerate(parts):
i = 0
for section_number, param in parts:
if section_number != i:
# We could get fancier here and look for a complete
# duplicate extended parameter and ignore the second one
# seen. But we're not doing that. The old code didn't.
if not param.extended:
param.defects.append(errors.InvalidHeaderDefect(
"inconsistent multipart parameter numbering"))
'duplicate parameter name; duplicate ignored'))
continue
else:
param.defects.append(errors.InvalidHeaderDefect(
"inconsistent RFC2231 parameter numbering"))
i += 1
value = param.param_value
if param.extended:
try:
......
......@@ -2456,6 +2456,115 @@ class TestParser(TestParserMixin, TestEmailBase):
";foo", ";foo", ";foo", [errors.InvalidHeaderDefect]*3
)
@parameterize
class Test_parse_mime_parameters(TestParserMixin, TestEmailBase):
def mime_parameters_as_value(self,
value,
tl_str,
tl_value,
params,
defects):
mime_parameters = self._test_parse_x(parser.parse_mime_parameters,
value, tl_str, tl_value, defects)
self.assertEqual(mime_parameters.token_type, 'mime-parameters')
self.assertEqual(list(mime_parameters.params), params)
mime_parameters_params = {
'simple': (
'filename="abc.py"',
' filename="abc.py"',
'filename=abc.py',
[('filename', 'abc.py')],
[]),
'multiple_keys': (
'filename="abc.py"; xyz=abc',
' filename="abc.py"; xyz="abc"',
'filename=abc.py; xyz=abc',
[('filename', 'abc.py'), ('xyz', 'abc')],
[]),
'split_value': (
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
' filename="201.tif"',
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
[('filename', '201.tif')],
[]),
# Note that it is undefined what we should do for error recovery when
# there are duplicate parameter names or duplicate parts in a split
# part. We choose to ignore all duplicate parameters after the first
# and to take duplicate or missing rfc 2231 parts in apperance order.
# This is backward compatible with get_param's behavior, but the
# decisions are arbitrary.
'duplicate_key': (
'filename=abc.gif; filename=def.tiff',
' filename="abc.gif"',
"filename=abc.gif; filename=def.tiff",
[('filename', 'abc.gif')],
[errors.InvalidHeaderDefect]),
'duplicate_key_with_split_value': (
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
" filename=abc.gif",
' filename="201.tif"',
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
" filename=abc.gif",
[('filename', '201.tif')],
[errors.InvalidHeaderDefect]),
'duplicate_key_with_split_value_other_order': (
"filename=abc.gif; "
" filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
' filename="abc.gif"',
"filename=abc.gif;"
" filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
[('filename', 'abc.gif')],
[errors.InvalidHeaderDefect]),
'duplicate_in_split_value': (
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
" filename*1*=abc.gif",
' filename="201.tifabc.gif"',
"filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
" filename*1*=abc.gif",
[('filename', '201.tifabc.gif')],
[errors.InvalidHeaderDefect]),
'missing_split_value': (
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;",
' filename="201.tif"',
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;",
[('filename', '201.tif')],
[errors.InvalidHeaderDefect]),
'duplicate_and_missing_split_value': (
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;"
" filename*3*=abc.gif",
' filename="201.tifabc.gif"',
"filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;"
" filename*3*=abc.gif",
[('filename', '201.tifabc.gif')],
[errors.InvalidHeaderDefect]*2),
# Here we depart from get_param and assume the *0* was missing.
'duplicate_with_broken_split_value': (
"filename=abc.gif; "
" filename*2*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66",
' filename="abc.gif201.tif"',
"filename=abc.gif;"
" filename*2*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66",
[('filename', 'abc.gif201.tif')],
# Defects are apparent missing *0*, and two 'out of sequence'.
[errors.InvalidHeaderDefect]*3),
}
@parameterize
class Test_parse_mime_version(TestParserMixin, TestEmailBase):
......
......@@ -21,6 +21,9 @@ Core and Builtins
Library
-------
- Issue #23745: The new email header parser now handles duplicate MIME
parameter names without error, similar to how get_param behaves.
- Issue #23792: Ignore KeyboardInterrupt when the pydoc pager is active.
This mimics the behavior of the standard unix pagers, and prevents
pipepager from shutting down while the pager itself is still running.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment