Commit ccb9d05b authored by R. David Murray's avatar R. David Murray

Merged revisions 87217 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r87217 | r.david.murray | 2010-12-13 18:51:19 -0500 (Mon, 13 Dec 2010) | 5 lines

  #1078919: make add_header automatically do RFC2231 encoding when needed.

  Also document the use of three-tuples if control of the charset
  and language is desired.
........
parent fa66d583
...@@ -257,7 +257,15 @@ Here are the methods of the :class:`Message` class: ...@@ -257,7 +257,15 @@ Here are the methods of the :class:`Message` class:
taken as the parameter name, with underscores converted to dashes (since taken as the parameter name, with underscores converted to dashes (since
dashes are illegal in Python identifiers). Normally, the parameter will dashes are illegal in Python identifiers). Normally, the parameter will
be added as ``key="value"`` unless the value is ``None``, in which case be added as ``key="value"`` unless the value is ``None``, in which case
only the key will be added. only the key will be added. If the value contains non-ASCII characters,
it can be specified as a three tuple in the format
``(CHARSET, LANGUAGE, VALUE)``, where ``CHARSET`` is a string naming the
charset to be used to encode the value, ``LANGUAGE`` can usually be set
to ``None`` or the empty string (see :RFC:`2231` for other possibilities),
and ``VALUE`` is the string value containing non-ASCII code points. If
a three tuple is not passed and the value contains non-ASCII characters,
it is automatically encoded in :RFC`2231` format using a ``CHARSET``
of ``utf-8`` and a ``LANGUAGE`` of ``None``.
Here's an example:: Here's an example::
...@@ -267,6 +275,15 @@ Here are the methods of the :class:`Message` class: ...@@ -267,6 +275,15 @@ Here are the methods of the :class:`Message` class:
Content-Disposition: attachment; filename="bud.gif" Content-Disposition: attachment; filename="bud.gif"
An example with with non-ASCII characters::
msg.add_header('Content-Disposition', 'attachment',
filename=('iso-8859-1', '', 'Fußballer.ppt'))
Which produces ::
Content-Disposition: attachment; filename*="iso-8859-1''Fu%DFballer.ppt"
.. method:: replace_header(_name, _value) .. method:: replace_header(_name, _value)
...@@ -356,7 +373,7 @@ Here are the methods of the :class:`Message` class: ...@@ -356,7 +373,7 @@ Here are the methods of the :class:`Message` class:
:rfc:`2231`, you can collapse the parameter value by calling :rfc:`2231`, you can collapse the parameter value by calling
:func:`email.utils.collapse_rfc2231_value`, passing in the return value :func:`email.utils.collapse_rfc2231_value`, passing in the return value
from :meth:`get_param`. This will return a suitably decoded Unicode from :meth:`get_param`. This will return a suitably decoded Unicode
string whn the value is a tuple, or the original string unquoted if it string when the value is a tuple, or the original string unquoted if it
isn't. For example:: isn't. For example::
rawparam = msg.get_param('foo') rawparam = msg.get_param('foo')
......
...@@ -39,7 +39,11 @@ def _splitparam(param): ...@@ -39,7 +39,11 @@ def _splitparam(param):
def _formatparam(param, value=None, quote=True): def _formatparam(param, value=None, quote=True):
"""Convenience function to format and return a key=value pair. """Convenience function to format and return a key=value pair.
This will quote the value if needed or if quote is true. This will quote the value if needed or if quote is true. If value is a
three tuple (charset, language, value), it will be encoded according
to RFC2231 rules. If it contains non-ascii characters it will likewise
be encoded according to RFC2231 rules, using the utf-8 charset and
a null language.
""" """
if value is not None and len(value) > 0: if value is not None and len(value) > 0:
# A tuple is used for RFC 2231 encoded parameter values where items # A tuple is used for RFC 2231 encoded parameter values where items
...@@ -49,6 +53,12 @@ def _formatparam(param, value=None, quote=True): ...@@ -49,6 +53,12 @@ def _formatparam(param, value=None, quote=True):
# Encode as per RFC 2231 # Encode as per RFC 2231
param += '*' param += '*'
value = utils.encode_rfc2231(value[2], value[0], value[1]) value = utils.encode_rfc2231(value[2], value[0], value[1])
else:
try:
value.encode('ascii')
except UnicodeEncodeError:
param += '*'
value = utils.encode_rfc2231(value, 'utf-8', '')
# BAW: Please check this. I think that if quote is set it should # BAW: Please check this. I think that if quote is set it should
# force quoting even if not necessary. # force quoting even if not necessary.
if quote or tspecials.search(value): if quote or tspecials.search(value):
...@@ -391,11 +401,19 @@ class Message: ...@@ -391,11 +401,19 @@ class Message:
name is the header field to add. keyword arguments can be used to set name is the header field to add. keyword arguments can be used to set
additional parameters for the header field, with underscores converted additional parameters for the header field, with underscores converted
to dashes. Normally the parameter will be added as key="value" unless to dashes. Normally the parameter will be added as key="value" unless
value is None, in which case only the key will be added. value is None, in which case only the key will be added. If a
parameter value contains non-ASCII characters it can be specified as a
three-tuple of (charset, language, value), in which case it will be
encoded according to RFC2231 rules. Otherwise it will be encoded using
the utf-8 charset and a language of ''.
Example: Examples:
msg.add_header('content-disposition', 'attachment', filename='bud.gif') msg.add_header('content-disposition', 'attachment', filename='bud.gif')
msg.add_header('content-disposition', 'attachment',
filename=('utf-8', '', Fußballer.ppt'))
msg.add_header('content-disposition', 'attachment',
filename='Fußballer.ppt'))
""" """
parts = [] parts = []
for k, v in _params.items(): for k, v in _params.items():
......
...@@ -504,6 +504,29 @@ class TestMessageAPI(TestEmailBase): ...@@ -504,6 +504,29 @@ class TestMessageAPI(TestEmailBase):
self.assertEqual(msg.get_payload(decode=True), self.assertEqual(msg.get_payload(decode=True),
bytes(x, 'raw-unicode-escape')) bytes(x, 'raw-unicode-escape'))
# Issue 1078919
def test_ascii_add_header(self):
msg = Message()
msg.add_header('Content-Disposition', 'attachment',
filename='bud.gif')
self.assertEqual('attachment; filename="bud.gif"',
msg['Content-Disposition'])
def test_noascii_add_header(self):
msg = Message()
msg.add_header('Content-Disposition', 'attachment',
filename="Fußballer.ppt")
self.assertEqual(
'attachment; filename*="utf-8\'\'Fu%C3%9Fballer.ppt"',
msg['Content-Disposition'])
def test_nonascii_add_header_via_triple(self):
msg = Message()
msg.add_header('Content-Disposition', 'attachment',
filename=('iso-8859-1', '', 'Fußballer.ppt'))
self.assertEqual(
'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
msg['Content-Disposition'])
# Test the email.encoders module # Test the email.encoders module
......
...@@ -17,6 +17,9 @@ Core and Builtins ...@@ -17,6 +17,9 @@ Core and Builtins
Library Library
------- -------
- Issue #1078919: add_header now automatically RFC2231 encodes parameters
that contain non-ascii values.
- Issue #10107: Warn about unsaved files in IDLE on OSX. - Issue #10107: Warn about unsaved files in IDLE on OSX.
- Issue #7904: Changes to urllib.parse.urlsplit to handle schemes as defined by - Issue #7904: Changes to urllib.parse.urlsplit to handle schemes as defined by
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment