Commit e5197be1 authored by R David Murray's avatar R David Murray

#20531: Apply the 3.3 version of the #19063 fix.

So passing unicode to set_payload works again (but still doesn't
do what you want when the message is serialized).
parent 0e7fc387
...@@ -386,7 +386,8 @@ class Charset: ...@@ -386,7 +386,8 @@ class Charset:
string using the ascii codec produces the correct string version string using the ascii codec produces the correct string version
of the content. of the content.
""" """
# 7bit/8bit encodings return the string unchanged (module conversions) if not string:
return string
if self.body_encoding is BASE64: if self.body_encoding is BASE64:
if isinstance(string, str): if isinstance(string, str):
string = string.encode(self.output_charset) string = string.encode(self.output_charset)
...@@ -398,13 +399,9 @@ class Charset: ...@@ -398,13 +399,9 @@ class Charset:
# character set, then, we must turn it into pseudo bytes via the # character set, then, we must turn it into pseudo bytes via the
# latin1 charset, which will encode any byte as a single code point # latin1 charset, which will encode any byte as a single code point
# between 0 and 255, which is what body_encode is expecting. # between 0 and 255, which is what body_encode is expecting.
#
# Note that this clause doesn't handle the case of a _payload that
# is already bytes. It never did, and the semantics of _payload
# being bytes has never been nailed down, so fixing that is a
# longer term TODO.
if isinstance(string, str): if isinstance(string, str):
string = string.encode(self.output_charset).decode('latin1') string = string.encode(self.output_charset)
string = string.decode('latin1')
return email.quoprimime.body_encode(string) return email.quoprimime.body_encode(string)
else: else:
if isinstance(string, str): if isinstance(string, str):
......
...@@ -301,9 +301,19 @@ class Message: ...@@ -301,9 +301,19 @@ class Message:
Optional charset sets the message's default character set. See Optional charset sets the message's default character set. See
set_charset() for details. set_charset() for details.
""" """
if isinstance(payload, bytes): if hasattr(payload, 'encode'):
payload = payload.decode('ascii', 'surrogateescape') if charset is None:
self._payload = payload # We should check for ASCII-only here, but we can't do that
# for backward compatibility reasons. Fixed in 3.4.
self._payload = payload
return
if not isinstance(charset, Charset):
charset = Charset(charset)
payload = payload.encode(charset.output_charset)
if hasattr(payload, 'decode'):
self._payload = payload.decode('ascii', 'surrogateescape')
else:
self._payload = payload
if charset is not None: if charset is not None:
self.set_charset(charset) self.set_charset(charset)
...@@ -342,7 +352,15 @@ class Message: ...@@ -342,7 +352,15 @@ class Message:
try: try:
cte(self) cte(self)
except TypeError: except TypeError:
self._payload = charset.body_encode(self._payload) # This if is for backward compatibility and will be removed
# in 3.4 when the ascii check is added to set_payload.
payload = self._payload
if payload:
try:
payload = payload.encode('ascii', 'surrogateescape')
except UnicodeError:
payload = payload.encode(charset.output_charset)
self._payload = charset.body_encode(payload)
self.add_header('Content-Transfer-Encoding', cte) self.add_header('Content-Transfer-Encoding', cte)
def get_charset(self): def get_charset(self):
......
...@@ -92,6 +92,38 @@ class TestMessageAPI(TestEmailBase): ...@@ -92,6 +92,38 @@ class TestMessageAPI(TestEmailBase):
msg.set_payload('This is a string payload', charset) msg.set_payload('This is a string payload', charset)
self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
def test_set_payload_with_8bit_data_and_charset(self):
data = b'\xd0\x90\xd0\x91\xd0\x92'
charset = Charset('utf-8')
msg = Message()
msg.set_payload(data, charset)
self.assertEqual(msg['content-transfer-encoding'], 'base64')
self.assertEqual(msg.get_payload(decode=True), data)
self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
data = b'\xd0\x90\xd0\x91\xd0\x92'
charset = Charset('utf-8')
charset.body_encoding = None # Disable base64 encoding
msg = Message()
msg.set_payload(data.decode('utf-8'), charset)
self.assertEqual(msg['content-transfer-encoding'], '8bit')
self.assertEqual(msg.get_payload(decode=True), data)
def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
data = b'\xd0\x90\xd0\x91\xd0\x92'
charset = Charset('utf-8')
charset.body_encoding = None # Disable base64 encoding
msg = Message()
msg.set_payload(data, charset)
self.assertEqual(msg['content-transfer-encoding'], '8bit')
self.assertEqual(msg.get_payload(decode=True), data)
def test_set_payload_to_list(self):
msg = Message()
msg.set_payload([])
self.assertEqual(msg.get_payload(), [])
def test_get_charsets(self): def test_get_charsets(self):
eq = self.assertEqual eq = self.assertEqual
......
...@@ -24,6 +24,10 @@ Core and Builtins ...@@ -24,6 +24,10 @@ Core and Builtins
Library Library
------- -------
- Issue #20531: Revert 3.4 version of fix for #19063, and apply the 3.3
version. That is, do *not* raise an error if unicode is passed to
email.message.Message.set_payload.
- Issue #20476: If a non-compat32 policy is used with any of the email parsers, - Issue #20476: If a non-compat32 policy is used with any of the email parsers,
EmailMessage is now used as the factory class. The factory class should EmailMessage is now used as the factory class. The factory class should
really come from the policy; that will get fixed in 3.5. really come from the policy; that will get fixed in 3.5.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment