Commit e2922835 authored by R David Murray's avatar R David Murray

Merge #14291: if a header has non-ascii unicode, default to CTE using utf-8

In Python2, if a unicode string was assigned as the value of a header,
email would automatically CTE encode it using the UTF8 charset.
This capability was lost in the Python3 translation, and this patch
restores it.

Patch by Ali Ikinci, assisted by R. David Murray.

I also added a fix for the mailbox test that was depending (with a comment
that it was a bad idea to so depend) on non-ASCII causing message_from_string
to raise an error.  It now uses support.patch to induce an error during
message serialization.
parents b20a019d 7441a7ae
......@@ -283,7 +283,12 @@ class Header:
# character set, otherwise an early error is thrown.
output_charset = charset.output_codec or 'us-ascii'
if output_charset != _charset.UNKNOWN8BIT:
try:
s.encode(output_charset, errors)
except UnicodeEncodeError:
if output_charset!='us-ascii':
raise
charset = UTF8
self._chunks.append((s, charset))
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
......
......@@ -604,6 +604,19 @@ class TestMessageAPI(TestEmailBase):
msg['Dummy'] = 'dummy\nX-Injected-Header: test'
self.assertRaises(errors.HeaderParseError, msg.as_string)
def test_unicode_header_defaults_to_utf8_encoding(self):
# Issue 14291
m = MIMEText('abc\n')
m['Subject'] = 'É test'
self.assertEqual(str(m),textwrap.dedent("""\
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: =?utf-8?q?=C3=89_test?=
abc
"""))
# Test the email.encoders module
class TestEncoders(unittest.TestCase):
......@@ -1045,9 +1058,13 @@ Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-W
'f\xfcr Offshore-Windkraftprojekte '
'<a-very-long-address@example.com>')
msg['Reply-To'] = header_string
self.assertRaises(UnicodeEncodeError, msg.as_string)
eq(msg.as_string(maxheaderlen=78), """\
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
=?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
""")
msg = Message()
msg['Reply-To'] = Header(header_string, 'utf-8',
msg['Reply-To'] = Header(header_string,
header_name='Reply-To')
eq(msg.as_string(maxheaderlen=78), """\
Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
......
......@@ -111,10 +111,10 @@ class TestMailbox(TestBase):
self.assertMailboxEmpty()
def test_add_that_raises_leaves_mailbox_empty(self):
# XXX This test will start failing when Message learns to handle
# non-ASCII string headers, and a different internal failure will
# need to be found or manufactured.
with self.assertRaises(ValueError):
def raiser(*args, **kw):
raise Exception("a fake error")
support.patch(self, email.generator.BytesGenerator, 'flatten', raiser)
with self.assertRaises(Exception):
self._box.add(email.message_from_string("From: Alphöso"))
self.assertEqual(len(self._box), 0)
self._box.close()
......
......@@ -470,6 +470,7 @@ Gerhard Häring
Fredrik Håård
Catalin Iacob
Mihai Ibanescu
Ali Ikinci
Lars Immisch
Bobby Impollonia
Meador Inge
......
......@@ -24,6 +24,9 @@ Core and Builtins
Library
-------
- Issue #14291: Email now defaults to utf-8 for non-ASCII unicode headers
instead of raising an error. This fixes a regression relative to 2.7.
- Issue #989712: Support using Tk without a mainloop.
- Issue #5219: Prevent event handler cascade in IDLE.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment