Commit a215023b authored by R David Murray's avatar R David Murray

#11243: tests and fixes for handling of 'dirty data' in additional methods

parent 4e432682
...@@ -48,9 +48,9 @@ def _sanitize_header(name, value): ...@@ -48,9 +48,9 @@ def _sanitize_header(name, value):
def _splitparam(param): def _splitparam(param):
# Split header parameters. BAW: this may be too simple. It isn't # Split header parameters. BAW: this may be too simple. It isn't
# strictly RFC 2045 (section 5.1) compliant, but it catches most headers # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
# found in the wild. We may eventually need a full fledged parser # found in the wild. We may eventually need a full fledged parser.
# eventually. # RDM: we might have a Header here; for now just stringify it.
a, sep, b = param.partition(';') a, sep, b = str(param).partition(';')
if not sep: if not sep:
return a.strip(), None return a.strip(), None
return a.strip(), b.strip() return a.strip(), b.strip()
...@@ -90,6 +90,8 @@ def _formatparam(param, value=None, quote=True): ...@@ -90,6 +90,8 @@ def _formatparam(param, value=None, quote=True):
return param return param
def _parseparam(s): def _parseparam(s):
# RDM This might be a Header, so for now stringify it.
s = ';' + str(s)
plist = [] plist = []
while s[:1] == ';': while s[:1] == ';':
s = s[1:] s = s[1:]
...@@ -240,7 +242,8 @@ class Message: ...@@ -240,7 +242,8 @@ class Message:
if i is not None and not isinstance(self._payload, list): if i is not None and not isinstance(self._payload, list):
raise TypeError('Expected list, got %s' % type(self._payload)) raise TypeError('Expected list, got %s' % type(self._payload))
payload = self._payload payload = self._payload
cte = self.get('content-transfer-encoding', '').lower() # cte might be a Header, so for now stringify it.
cte = str(self.get('content-transfer-encoding', '')).lower()
# payload may be bytes here. # payload may be bytes here.
if isinstance(payload, str): if isinstance(payload, str):
if _has_surrogates(payload): if _has_surrogates(payload):
...@@ -561,7 +564,7 @@ class Message: ...@@ -561,7 +564,7 @@ class Message:
if value is missing: if value is missing:
return failobj return failobj
params = [] params = []
for p in _parseparam(';' + value): for p in _parseparam(value):
try: try:
name, val = p.split('=', 1) name, val = p.split('=', 1)
name = name.strip() name = name.strip()
......
...@@ -2995,6 +2995,58 @@ class Test8BitBytesHandling(unittest.TestCase): ...@@ -2995,6 +2995,58 @@ class Test8BitBytesHandling(unittest.TestCase):
['foo@bar.com', ['foo@bar.com',
'g\uFFFD\uFFFDst']) 'g\uFFFD\uFFFDst'])
def test_get_content_type_with_8bit(self):
msg = email.message_from_bytes(textwrap.dedent("""\
Content-Type: text/pl\xA7in; charset=utf-8
""").encode('latin-1'))
self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
self.assertEqual(msg.get_content_maintype(), "text")
self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
def test_get_params_with_8bit(self):
msg = email.message_from_bytes(
'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
self.assertEqual(msg.get_params(header='x-header'),
[('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
# XXX: someday you might be able to get 'b\xa7r', for now you can't.
self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
def test_get_rfc2231_params_with_8bit(self):
msg = email.message_from_bytes(textwrap.dedent("""\
Content-Type: text/plain; charset=us-ascii;
title*=us-ascii'en'This%20is%20not%20f\xa7n"""
).encode('latin-1'))
self.assertEqual(msg.get_param('title'),
('us-ascii', 'en', 'This is not f\uFFFDn'))
def test_set_rfc2231_params_with_8bit(self):
msg = email.message_from_bytes(textwrap.dedent("""\
Content-Type: text/plain; charset=us-ascii;
title*=us-ascii'en'This%20is%20not%20f\xa7n"""
).encode('latin-1'))
msg.set_param('title', 'test')
self.assertEqual(msg.get_param('title'), 'test')
def test_del_rfc2231_params_with_8bit(self):
msg = email.message_from_bytes(textwrap.dedent("""\
Content-Type: text/plain; charset=us-ascii;
title*=us-ascii'en'This%20is%20not%20f\xa7n"""
).encode('latin-1'))
msg.del_param('title')
self.assertEqual(msg.get_param('title'), None)
self.assertEqual(msg.get_content_maintype(), 'text')
def test_get_payload_with_8bit_cte_header(self):
msg = email.message_from_bytes(textwrap.dedent("""\
Content-Transfer-Encoding: b\xa7se64
Content-Type: text/plain; charset=latin-1
payload
""").encode('latin-1'))
self.assertEqual(msg.get_payload(), 'payload\n')
self.assertEqual(msg.get_payload(decode=True), b'payload\n')
non_latin_bin_msg = textwrap.dedent("""\ non_latin_bin_msg = textwrap.dedent("""\
From: foo@bar.com From: foo@bar.com
To: báz To: báz
......
...@@ -40,6 +40,9 @@ Core and Builtins ...@@ -40,6 +40,9 @@ Core and Builtins
Library Library
------- -------
- Issue #11243: fix the parameter querying methods of Message to work if
the headers contain un-encoded non-ASCII data.
- Issue #11401: fix handling of headers with no value; this fixes a regression - Issue #11401: fix handling of headers with no value; this fixes a regression
relative to Python2 and the result is now the same as it was in Python2. relative to Python2 and the result is now the same as it was in Python2.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment