Commit f4fdff71 authored by Barry Warsaw's avatar Barry Warsaw

Header.__init__(), .append(): Add an optional argument `errors' which

is passed straight through to the unicode() and ustr.encode() calls.
I think it's the best we can do to address the UnicodeErrors in badly
encoded headers such as is described in SF bug #648119.
parent 72261c9d
...@@ -127,7 +127,7 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None, ...@@ -127,7 +127,7 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,
class Header: class Header:
def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None, def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
continuation_ws=' '): continuation_ws=' ', errors='strict'):
"""Create a MIME-compliant header that can contain many character sets. """Create a MIME-compliant header that can contain many character sets.
Optional s is the initial header value. If None, the initial header Optional s is the initial header value. If None, the initial header
...@@ -150,6 +150,8 @@ class Header: ...@@ -150,6 +150,8 @@ class Header:
continuation_ws must be RFC 2822 compliant folding whitespace (usually continuation_ws must be RFC 2822 compliant folding whitespace (usually
either a space or a hard tab) which will be prepended to continuation either a space or a hard tab) which will be prepended to continuation
lines. lines.
errors is passed through to the .append() call.
""" """
if charset is None: if charset is None:
charset = USASCII charset = USASCII
...@@ -161,7 +163,7 @@ class Header: ...@@ -161,7 +163,7 @@ class Header:
# BAW: I believe `chunks' and `maxlinelen' should be non-public. # BAW: I believe `chunks' and `maxlinelen' should be non-public.
self._chunks = [] self._chunks = []
if s is not None: if s is not None:
self.append(s, charset) self.append(s, charset, errors)
if maxlinelen is None: if maxlinelen is None:
maxlinelen = MAXLINELEN maxlinelen = MAXLINELEN
if header_name is None: if header_name is None:
...@@ -196,7 +198,7 @@ class Header: ...@@ -196,7 +198,7 @@ class Header:
def __ne__(self, other): def __ne__(self, other):
return not self == other return not self == other
def append(self, s, charset=None): def append(self, s, charset=None, errors='strict'):
"""Append a string to the MIME header. """Append a string to the MIME header.
Optional charset, if given, should be a Charset instance or the name Optional charset, if given, should be a Charset instance or the name
...@@ -213,6 +215,9 @@ class Header: ...@@ -213,6 +215,9 @@ class Header:
using RFC 2047 rules, the Unicode string will be encoded using the using RFC 2047 rules, the Unicode string will be encoded using the
following charsets in order: us-ascii, the charset hint, utf-8. The following charsets in order: us-ascii, the charset hint, utf-8. The
first character set not to provoke a UnicodeError is used. first character set not to provoke a UnicodeError is used.
Optional `errors' is passed as the third argument to any unicode() or
ustr.encode() call.
""" """
if charset is None: if charset is None:
charset = self._charset charset = self._charset
...@@ -227,12 +232,12 @@ class Header: ...@@ -227,12 +232,12 @@ class Header:
# Possibly raise UnicodeError if the byte string can't be # Possibly raise UnicodeError if the byte string can't be
# converted to a unicode with the input codec of the charset. # converted to a unicode with the input codec of the charset.
incodec = charset.input_codec or 'us-ascii' incodec = charset.input_codec or 'us-ascii'
ustr = unicode(s, incodec) ustr = unicode(s, incodec, errors)
# Now make sure that the unicode could be converted back to a # Now make sure that the unicode could be converted back to a
# byte string with the output codec, which may be different # byte string with the output codec, which may be different
# than the iput coded. Still, use the original byte string. # than the iput coded. Still, use the original byte string.
outcodec = charset.output_codec or 'us-ascii' outcodec = charset.output_codec or 'us-ascii'
ustr.encode(outcodec) ustr.encode(outcodec, errors)
elif isinstance(s, UnicodeType): elif isinstance(s, UnicodeType):
# Now we have to be sure the unicode string can be converted # Now we have to be sure the unicode string can be converted
# to a byte string with a reasonable output codec. We want to # to a byte string with a reasonable output codec. We want to
...@@ -240,7 +245,7 @@ class Header: ...@@ -240,7 +245,7 @@ class Header:
for charset in USASCII, charset, UTF8: for charset in USASCII, charset, UTF8:
try: try:
outcodec = charset.output_codec or 'us-ascii' outcodec = charset.output_codec or 'us-ascii'
s = s.encode(outcodec) s = s.encode(outcodec, errors)
break break
except UnicodeError: except UnicodeError:
pass pass
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment