Commit 48488053 authored by Barry Warsaw's avatar Barry Warsaw

__unicode__(): When converting to a unicode string, we need to

preserve spaces in the encoded/unencoded word boundaries.  RFC 2047 is
ambiguous here, but most people expect the space to be preserved.
Really closes SF bug # 640110.
parent e05dcce6
......@@ -28,8 +28,10 @@ CRLFSPACE = '\r\n '
CRLF = '\r\n'
NL = '\n'
SPACE = ' '
USPACE = u' '
SPACE8 = ' ' * 8
EMPTYSTRING = ''
UEMPTYSTRING = u''
MAXLINELEN = 76
......@@ -204,9 +206,24 @@ class Header:
def __unicode__(self):
"""Helper for the built-in unicode function."""
# charset item is a Charset instance so we need to stringify it.
uchunks = [unicode(s, str(charset)) for s, charset in self._chunks]
return u''.join(uchunks)
uchunks = []
lastcs = None
for s, charset in self._chunks:
# We must preserve spaces between encoded and non-encoded word
# boundaries, which means for us we need to add a space when we go
# from a charset to None/us-ascii, or from None/us-ascii to a
# charset. Only do this for the second and subsequent chunks.
nextcs = charset
if uchunks:
if lastcs is not None:
if nextcs is None or nextcs == 'us-ascii':
uchunks.append(USPACE)
nextcs = None
elif nextcs is not None and nextcs <> 'us-ascii':
uchunks.append(USPACE)
lastcs = nextcs
uchunks.append(unicode(s, str(charset)))
return UEMPTYSTRING.join(uchunks)
# Rich comparison operators for equality only. BAW: does it make sense to
# have or explicitly disable <, <=, >, >= operators?
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment