Commit 67f8f2fe authored by Barry Warsaw's avatar Barry Warsaw

append(): Fixing the test for convertability after consultation with

Ben.  If s is a byte string, make sure it can be converted to unicode
with the input codec, and from unicode with the output codec, or raise
a UnicodeError exception early.  Skip this test (and the unicode->byte
string conversion) when the charset is our faux 8bit raw charset.
parent 816aebdf
...@@ -218,20 +218,34 @@ class Header: ...@@ -218,20 +218,34 @@ class Header:
charset = self._charset charset = self._charset
elif not isinstance(charset, Charset): elif not isinstance(charset, Charset):
charset = Charset(charset) charset = Charset(charset)
# Normalize and check the string # If the charset is our faux 8bit charset, leave the string unchanged
if isinstance(s, StringType): if charset <> '8bit':
# Possibly raise UnicodeError if it can't be encoded # We need to test that the string can be converted to unicode and
unicode(s, charset.get_output_charset()) # back to a byte string, given the input and output codecs of the
elif isinstance(s, UnicodeType): # charset.
# Convert Unicode to byte string for later concatenation if isinstance(s, StringType):
for charset in USASCII, charset, UTF8: # Possibly raise UnicodeError if the byte string can't be
try: # converted to a unicode with the input codec of the charset.
s = s.encode(charset.get_output_charset()) incodec = charset.input_codec or 'us-ascii'
break ustr = unicode(s, incodec)
except UnicodeError: # Now make sure that the unicode could be converted back to a
pass # byte string with the output codec, which may be different
else: # than the iput coded. Still, use the original byte string.
assert False, 'Could not encode to utf-8' outcodec = charset.output_codec or 'us-ascii'
ustr.encode(outcodec)
elif isinstance(s, UnicodeType):
# Now we have to be sure the unicode string can be converted
# to a byte string with a reasonable output codec. We want to
# use the byte string in the chunk.
for charset in USASCII, charset, UTF8:
try:
outcodec = charset.output_codec or 'us-ascii'
s = s.encode(outcodec)
break
except UnicodeError:
pass
else:
assert False, 'utf-8 conversion failed'
self._chunks.append((s, charset)) self._chunks.append((s, charset))
def _split(self, s, charset, firstline=False): def _split(self, s, charset, firstline=False):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment