__unicode__(): When converting to a unicode string, we need to

preserve spaces in the encoded/unencoded word boundaries. RFC 2047 is ambiguous here, but most people expect the space to be preserved. Really closes SF bug # 640110.

unicode(): When converting to a unicode string, we need to
preserve spaces in the encoded/unencoded word boundaries. RFC 2047 is ambiguous here, but most people expect the space to be preserved. Really closes SF bug # 640110.
48488053 · Barry Warsaw · e05dcce6 · 48488053
Commit 48488053 authored Mar 06, 2003 by Barry Warsaw
Hide whitespace changes
Inline Side-by-side

Showing with 20 additions and 3 deletions

Lib/email/Header.py Lib/email/Header.py +20 -3

No files found.
--- a/Lib/email/Header.py
+++ b/Lib/email/Header.py
@@ -28,8 +28,10 @@ CRLFSPACE = '\r\n '
 CRLF = '\r\n'
 NL = '\n'
 SPACE = ' '
+USPACE = u' '
 SPACE8 = ' ' * 8
 EMPTYSTRING = ''
+UEMPTYSTRING = u''

 MAXLINELEN = 76

@@ -204,9 +206,24 @@ class Header:

    def __unicode__(self):
        """Helper for the built-in unicode function."""
-        # charset item is a Charset instance so we need to stringify it.
-        uchunks = [unicode(s, str(charset)) for s, charset in self._chunks]
-        return u''.join(uchunks)
+        uchunks = []
+        lastcs = None
+        for s, charset in self._chunks:
+            # We must preserve spaces between encoded and non-encoded word
+            # boundaries, which means for us we need to add a space when we go
+            # from a charset to None/us-ascii, or from None/us-ascii to a
+            # charset.  Only do this for the second and subsequent chunks.
+            nextcs = charset
+            if uchunks:
+                if lastcs is not None:
+                    if nextcs is None or nextcs == 'us-ascii':
+                        uchunks.append(USPACE)
+                        nextcs = None
+                elif nextcs is not None and nextcs <> 'us-ascii':
+                    uchunks.append(USPACE)
+            lastcs = nextcs
+            uchunks.append(unicode(s, str(charset)))
+        return UEMPTYSTRING.join(uchunks)

    # Rich comparison operators for equality only.  BAW: does it make sense to
    # have or explicitly disable <, <=, >, >= operators?