Commit 27e9de66 authored by R David Murray's avatar R David Murray

#20531: Revert e20f98a8ed71, the 3.4 version of the #19063 fix.

parent 790202d6
...@@ -196,13 +196,7 @@ Here are the methods of the :class:`Message` class: ...@@ -196,13 +196,7 @@ Here are the methods of the :class:`Message` class:
Set the entire message object's payload to *payload*. It is the client's Set the entire message object's payload to *payload*. It is the client's
responsibility to ensure the payload invariants. Optional *charset* sets responsibility to ensure the payload invariants. Optional *charset* sets
the message's character set; see :meth:`set_charset` for details. If the message's default character set; see :meth:`set_charset` for details.
*payload* is a string containing non-ASCII characters, *charset* is
required.
.. versionchanged:: 3.4
Previous to 3.4 *charset* was not required when *payload* was a
non-ASCII string, but omitting it produced nonsense results.
.. method:: set_charset(charset) .. method:: set_charset(charset)
......
...@@ -378,19 +378,18 @@ class Charset: ...@@ -378,19 +378,18 @@ class Charset:
return None return None
def body_encode(self, string): def body_encode(self, string):
"""Body-encode a string, converting it first to bytes if needed. """Body-encode a string by converting it first to bytes.
The type of encoding (base64 or quoted-printable) will be based on The type of encoding (base64 or quoted-printable) will be based on
self.body_encoding. If body_encoding is None, we perform no CTE self.body_encoding. If body_encoding is None, we assume the
encoding (the CTE will be either 7bit or 8bit), we just encode the output charset is a 7bit encoding, so re-encoding the decoded
binary representation to ascii using the surrogateescape error handler, string using the ascii codec produces the correct string version
which will enable the Generators to produce the correct output. of the content.
""" """
if not string: # 7bit/8bit encodings return the string unchanged (module conversions)
return string if self.body_encoding is BASE64:
if isinstance(string, str): if isinstance(string, str):
string = string.encode(self.output_charset) string = string.encode(self.output_charset)
if self.body_encoding is BASE64:
return email.base64mime.body_encode(string) return email.base64mime.body_encode(string)
elif self.body_encoding is QP: elif self.body_encoding is QP:
# quopromime.body_encode takes a string, but operates on it as if # quopromime.body_encode takes a string, but operates on it as if
...@@ -399,7 +398,15 @@ class Charset: ...@@ -399,7 +398,15 @@ class Charset:
# character set, then, we must turn it into pseudo bytes via the # character set, then, we must turn it into pseudo bytes via the
# latin1 charset, which will encode any byte as a single code point # latin1 charset, which will encode any byte as a single code point
# between 0 and 255, which is what body_encode is expecting. # between 0 and 255, which is what body_encode is expecting.
string = string.decode('latin1') #
# Note that this clause doesn't handle the case of a _payload that
# is already bytes. It never did, and the semantics of _payload
# being bytes has never been nailed down, so fixing that is a
# longer term TODO.
if isinstance(string, str):
string = string.encode(self.output_charset).decode('latin1')
return email.quoprimime.body_encode(string) return email.quoprimime.body_encode(string)
else: else:
return string.decode('ascii', 'surrogateescape') if isinstance(string, str):
string = string.encode(self.output_charset).decode('ascii')
return string
...@@ -301,22 +301,8 @@ class Message: ...@@ -301,22 +301,8 @@ class Message:
Optional charset sets the message's default character set. See Optional charset sets the message's default character set. See
set_charset() for details. set_charset() for details.
""" """
if hasattr(payload, 'encode'): if isinstance(payload, bytes):
if charset is None: payload = payload.decode('ascii', 'surrogateescape')
try:
payload.encode('ascii', 'surrogateescape')
except UnicodeError:
raise TypeError("charset argument must be specified"
" when non-ASCII characters are used in the"
" payload") from None
self._payload = payload
return
if not isinstance(charset, Charset):
charset = Charset(charset)
payload = payload.encode(charset.output_charset)
if hasattr(payload, 'decode'):
self._payload = payload.decode('ascii', 'surrogateescape')
else:
self._payload = payload self._payload = payload
if charset is not None: if charset is not None:
self.set_charset(charset) self.set_charset(charset)
...@@ -356,7 +342,7 @@ class Message: ...@@ -356,7 +342,7 @@ class Message:
try: try:
cte(self) cte(self)
except TypeError: except TypeError:
self._payload = charset.body_encode(self.get_payload(decode=True)) self._payload = charset.body_encode(self._payload)
self.add_header('Content-Transfer-Encoding', cte) self.add_header('Content-Transfer-Encoding', cte)
def get_charset(self): def get_charset(self):
......
...@@ -208,11 +208,12 @@ class TestRawDataManager(TestEmailBase): ...@@ -208,11 +208,12 @@ class TestRawDataManager(TestEmailBase):
"Basìc tëxt.\n") "Basìc tëxt.\n")
def test_get_text_plain_utf8_base64_recoverable_bad_CTE_data(self): def test_get_text_plain_utf8_base64_recoverable_bad_CTE_data(self):
m = self._bytes_msg(textwrap.dedent("""\ m = self._str_msg(textwrap.dedent("""\
Content-Type: text/plain; charset="utf8" Content-Type: text/plain; charset="utf8"
Content-Transfer-Encoding: base64 Content-Transfer-Encoding: base64
QmFzw6xjIHTDq3h0Lgo""").encode('ascii') + b'\xFF=\n') QmFzw6xjIHTDq3h0Lgo\xFF=
"""))
self.assertEqual(raw_data_manager.get_content(m, errors='ignore'), self.assertEqual(raw_data_manager.get_content(m, errors='ignore'),
"Basìc tëxt.\n") "Basìc tëxt.\n")
......
...@@ -92,44 +92,6 @@ class TestMessageAPI(TestEmailBase): ...@@ -92,44 +92,6 @@ class TestMessageAPI(TestEmailBase):
msg.set_payload('This is a string payload', charset) msg.set_payload('This is a string payload', charset)
self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
def test_set_payload_with_8bit_data_and_charset(self):
data = b'\xd0\x90\xd0\x91\xd0\x92'
charset = Charset('utf-8')
msg = Message()
msg.set_payload(data, charset)
self.assertEqual(msg['content-transfer-encoding'], 'base64')
self.assertEqual(msg.get_payload(decode=True), data)
self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
data = b'\xd0\x90\xd0\x91\xd0\x92'
charset = Charset('utf-8')
charset.body_encoding = None # Disable base64 encoding
msg = Message()
msg.set_payload(data.decode('utf-8'), charset)
self.assertEqual(msg['content-transfer-encoding'], '8bit')
self.assertEqual(msg.get_payload(decode=True), data)
def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
data = b'\xd0\x90\xd0\x91\xd0\x92'
charset = Charset('utf-8')
charset.body_encoding = None # Disable base64 encoding
msg = Message()
msg.set_payload(data, charset)
self.assertEqual(msg['content-transfer-encoding'], '8bit')
self.assertEqual(msg.get_payload(decode=True), data)
def test_set_payload_to_list(self):
msg = Message()
msg.set_payload([])
self.assertEqual(msg.get_payload(), [])
def test_set_payload_with_non_ascii_and_no_charset_raises(self):
data = b'\xd0\x90\xd0\x91\xd0\x92'.decode('utf-8')
msg = Message()
with self.assertRaises(TypeError):
msg.set_payload(data)
def test_get_charsets(self): def test_get_charsets(self):
eq = self.assertEqual eq = self.assertEqual
...@@ -596,10 +558,20 @@ class TestMessageAPI(TestEmailBase): ...@@ -596,10 +558,20 @@ class TestMessageAPI(TestEmailBase):
self.assertIsInstance(msg.defects[0], self.assertIsInstance(msg.defects[0],
errors.InvalidBase64CharactersDefect) errors.InvalidBase64CharactersDefect)
def test_broken_unicode_payload(self):
# This test improves coverage but is not a compliance test.
# The behavior in this situation is currently undefined by the API.
x = 'this is a br\xf6ken thing to do'
msg = Message()
msg['content-type'] = 'text/plain'
msg['content-transfer-encoding'] = '8bit'
msg.set_payload(x)
self.assertEqual(msg.get_payload(decode=True),
bytes(x, 'raw-unicode-escape'))
def test_questionable_bytes_payload(self): def test_questionable_bytes_payload(self):
# This test improves coverage but is not a compliance test, # This test improves coverage but is not a compliance test,
# since it involves poking inside the black box in a way # since it involves poking inside the black box.
# that actually breaks the model invariants.
x = 'this is a quéstionable thing to do'.encode('utf-8') x = 'this is a quéstionable thing to do'.encode('utf-8')
msg = Message() msg = Message()
msg['content-type'] = 'text/plain; charset="utf-8"' msg['content-type'] = 'text/plain; charset="utf-8"'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment