Sync'ing with standalone email package 2.0.1. This adds support for

non-us-ascii character sets in headers and bodies. Some API changes (with DeprecationWarnings for the old APIs). Better RFC-compliant implementations of base64 and quoted-printable. Updated test cases. Documentation updates to follow (after I finish writing them ;).

Sync'ing with standalone email package 2.0.1. This adds support for
non-us-ascii character sets in headers and bodies. Some API changes (with DeprecationWarnings for the old APIs). Better RFC-compliant implementations of base64 and quoted-printable. Updated test cases. Documentation updates to follow (after I finish writing them ;).
409a4c08 · Barry Warsaw · 68e69338 · 409a4c08 · 409a4c08 · 409a4c08
Commit 409a4c08 authored Apr 10, 2002 by Barry Warsaw
20 changed files
--- a/Lib/email/Charset.py
+++ b/Lib/email/Charset.py
--- a/Lib/email/Encoders.py
+++ b/Lib/email/Encoders.py
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)

 """Module containing encoding functions for Image.Image and Text.Text.
@@ -11,7 +11,9 @@ from quopri import encodestring as _encodestring

 # Helpers
 def _qencode(s):
-    return _encodestring(s, quotetabs=1)
+    enc = _encodestring(s, quotetabs=1)
+    # Must encode spaces, which quopri.encodestring() doesn't do
+    return enc.replace(' ', '=20')


 def _bencode(s):
@@ -54,6 +56,10 @@ def encode_quopri(msg):
 def encode_7or8bit(msg):
    """Set the Content-Transfer-Encoding: header to 7bit or 8bit."""
    orig = msg.get_payload()
+    if orig is None:
+        # There's no payload.  For backwards compatibility we use 7bit
+        msg['Content-Transfer-Encoding'] = '7bit'
+        return
    # We play a trick to make this go fast.  If encoding to ASCII succeeds, we
    # know the data must be 7bit, otherwise treat it as 8bit.
    try:

--- a/Lib/email/Errors.py
+++ b/Lib/email/Errors.py
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)

 """email package exception classes.

--- a/Lib/email/Generator.py
+++ b/Lib/email/Generator.py
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)

 """Classes to generate plain text from a message object tree.
@@ -166,30 +166,33 @@ class Generator:
            return text
        rtn = []
        for line in text.split('\n'):
+            splitline = []
            # Short lines can remain unchanged
            if len(line.replace('\t', SPACE8)) <= maxheaderlen:
-                rtn.append(line)
-                SEMINLTAB.join(rtn)
+                splitline.append(line)
+                rtn.append(SEMINLTAB.join(splitline))
            else:
-                oldlen = len(text)
+                oldlen = len(line)
                # Try to break the line on semicolons, but if that doesn't
                # work, try to split on folding whitespace.
-                while len(text) > maxheaderlen:
-                    i = text.rfind(';', 0, maxheaderlen)
+                while len(line) > maxheaderlen:
+                    i = line.rfind(';', 0, maxheaderlen)
                    if i < 0:
                        break
-                    rtn.append(text[:i])
-                    text = text[i+1:].lstrip()
-                if len(text) <> oldlen:
+                    splitline.append(line[:i])
+                    line = line[i+1:].lstrip()
+                if len(line) <> oldlen:
                    # Splitting on semis worked
-                    rtn.append(text)
-                    return SEMINLTAB.join(rtn)
+                    splitline.append(line)
+                    rtn.append(SEMINLTAB.join(splitline))
+                    continue
                # Splitting on semis didn't help, so try to split on
                # whitespace.
-                parts = re.split(r'(\s+)', text)
+                parts = re.split(r'(\s+)', line)
                # Watch out though for "Header: longnonsplittableline"
                if parts[0].endswith(':') and len(parts) == 3:
-                    return text
+                    rtn.append(line)
+                    continue
                first = parts.pop(0)
                sublines = [first]
                acc = len(first)
@@ -203,13 +206,14 @@ class Generator:
                    else:
                        # Split it here, but don't forget to ignore the
                        # next whitespace-only part
-                        rtn.append(EMPTYSTRING.join(sublines))
+                        splitline.append(EMPTYSTRING.join(sublines))
                        del parts[0]
                        first = parts.pop(0)
                        sublines = [first]
                        acc = len(first)
-                rtn.append(EMPTYSTRING.join(sublines))
-                return NLTAB.join(rtn)
+                splitline.append(EMPTYSTRING.join(sublines))
+                rtn.append(NLTAB.join(splitline))
+        return NL.join(rtn)

    #
    # Handlers for writing types and subtypes
@@ -219,6 +223,9 @@ class Generator:
        payload = msg.get_payload()
        if payload is None:
            return
+        cset = msg.get_charset()
+        if cset is not None:
+            payload = cset.body_encode(payload)
        if not isinstance(payload, StringType):
            raise TypeError, 'string payload expected: %s' % type(payload)
        if self._mangle_from_:
@@ -233,7 +240,18 @@ class Generator:
        # together, and then make sure that the boundary we've chosen isn't
        # present in the payload.
        msgtexts = []
-        for part in msg.get_payload():
+        subparts = msg.get_payload()
+        if subparts is None:
+            # Nothing has every been attached
+            boundary = msg.get_boundary(failobj=_make_boundary())
+            print >> self._fp, '--' + boundary
+            print >> self._fp, '\n'
+            print >> self._fp, '--' + boundary + '--'
+            return
+        elif not isinstance(subparts, ListType):
+            # Scalar payload
+            subparts = [subparts]
+        for part in subparts:
            s = StringIO()
            g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
            g(part, unixfrom=0)
@@ -365,7 +383,7 @@ class DecodedGenerator(Generator):


 # Helper
-def _make_boundary(self, text=None):
+def _make_boundary(text=None):
    # Craft a random boundary.  If text is given, ensure that the chosen
    # boundary doesn't appear in the text.
    boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='

--- a/Lib/email/Header.py
+++ b/Lib/email/Header.py
+# Copyright (C) 2002 Python Software Foundation
+# Author: che@debian.org (Ben Gertzfield)
+
+"""Header encoding and decoding functionality."""
+
+import re
+import email.quopriMIME
+import email.base64MIME
+from email.Charset import Charset
+
+CRLFSPACE = '\r\n '
+CRLF = '\r\n'
+NLSPACE = '\n '
+
+MAXLINELEN = 76
+
+ENCODE = 1
+DECODE = 2
+
+# Match encoded-word strings in the form =?charset?q?Hello_World?=
+ecre = re.compile(r'''
+  =\?                   # literal =?
+  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
+  \?                    # literal ?
+  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
+  \?                    # literal ?
+  (?P<encoded>.*?)      # non-greedy up to the next ?= is the encoded string
+  \?=                   # literal ?=
+  ''', re.VERBOSE | re.IGNORECASE)
+
+
+
+# Helpers
+_max_append = email.quopriMIME._max_append
+
+
+
+def decode_header(header):
+    """Decode a message header value without converting charset.
+
+    Returns a list of (decoded_string, charset) pairs containing each of the
+    decoded parts of the header.  Charset is None for non-encoded parts of the
+    header, otherwise a lower-case string containing the name of the character
+    set specified in the encoded string.
+    """
+    # If no encoding, just return the header
+    header = str(header)
+    if not ecre.search(header):
+        return [(header, None)]
+
+    decoded = []
+    dec = ''
+    for line in header.splitlines():
+        # This line might not have an encoding in it
+        if not ecre.search(line):
+            decoded.append((line, None))
+            continue
+        
+        parts = ecre.split(line)
+        while parts:
+            unenc = parts.pop(0).strip()
+            if unenc:
+                # Should we continue a long line?
+                if decoded and decoded[-1][1] is None:
+                    decoded[-1] = (decoded[-1][0] + dec, None)
+                else:
+                    decoded.append((unenc, None))
+            if parts:
+                charset, encoding = [s.lower() for s in parts[0:2]]
+                encoded = parts[2]
+                dec = ''
+                if encoding == 'q':
+                    dec = email.quopriMIME.header_decode(encoded)
+                elif encoding == 'b':
+                    dec = email.base64MIME.decode(encoded)
+                else:
+                    dec = encoded
+
+                if decoded and decoded[-1][1] == charset:
+                    decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
+                else:
+                    decoded.append((dec, charset))
+            del parts[0:3]
+    return decoded
+
+
+
+class Header:
+    def __init__(self, s, charset=None, maxlinelen=MAXLINELEN,
+                 header_name=None):
+        """Create a MIME-compliant header that can contain many languages.
+
+        Specify the initial header value in s.  Specify its character set as a
+        Charset object in the charset argument.  If none, a default Charset
+        instance will be used.
+
+        You can later append to the header with append(s, charset) below;
+        charset does not have to be the same as the one initially specified
+        here.  In fact, it's optional, and if not given, defaults to the
+        charset specified in the constructor.
+
+        The maximum line length can either be specified by maxlinelen, or you
+        can pass in the name of the header field (e.g. "Subject") to let this
+        class guess the best line length to use to prevent wrapping.  The
+        default maxlinelen is 76.
+        """
+        if charset is None:
+            charset = Charset()
+        self._charset = charset
+        # BAW: I believe `chunks' and `maxlinelen' should be non-public.
+        self._chunks = []
+        self.append(s, charset)
+        self._maxlinelen = maxlinelen
+        if header_name is not None:
+            self.guess_maxlinelen(header_name)
+
+    def __str__(self):
+        """A synonym for self.encode()."""
+        return self.encode()
+
+    def guess_maxlinelen(self, s=None):
+        """Guess the maximum length to make each header line.
+
+        Given a header name (e.g. "Subject"), set this header's maximum line
+        length to an appropriate length to avoid line wrapping.  If s is not
+        given, return the previous maximum line length and don't set it.
+
+        Returns the new maximum line length.
+        """
+        # BAW: is this semantic necessary?
+        if s is not None:
+            self._maxlinelen = MAXLINELEN - len(s) - 2
+        return self._maxlinelen
+
+    def append(self, s, charset=None):
+        """Append string s with Charset charset to the MIME header.
+
+        charset defaults to the one given in the class constructor.
+        """
+        if charset is None:
+            charset = self._charset
+        self._chunks.append((s, charset))
+        
+    def _split(self, s, charset):
+        # Split up a header safely for use with encode_chunks.  BAW: this
+        # appears to be a private convenience method.
+        splittable = charset.to_splittable(s)
+        encoded = charset.from_splittable(splittable)
+        
+        if charset.encoded_header_len(encoded) < self._maxlinelen:
+            return [(encoded, charset)]
+        else:
+            # Divide and conquer.  BAW: halfway depends on integer division.
+            # When porting to Python 2.2, use the // operator.
+            halfway = len(splittable) // 2
+            first = charset.from_splittable(splittable[:halfway], 0)
+            last = charset.from_splittable(splittable[halfway:], 0)
+            return self._split(first, charset) + self._split(last, charset)
+
+    def encode(self):
+        """Encode a message header, possibly converting charset and encoding.
+
+        There are many issues involved in converting a given string for use in
+        an email header.  Only certain character sets are readable in most
+        email clients, and as header strings can only contain a subset of
+        7-bit ASCII, care must be taken to properly convert and encode (with
+        Base64 or quoted-printable) header strings.  In addition, there is a
+        75-character length limit on any given encoded header field, so
+        line-wrapping must be performed, even with double-byte character sets.
+        
+        This method will do its best to convert the string to the correct
+        character set used in email, and encode and line wrap it safely with
+        the appropriate scheme for that character set.
+
+        If the given charset is not known or an error occurs during
+        conversion, this function will return the header untouched.
+        """
+        newchunks = []
+        for s, charset in self._chunks:
+            newchunks += self._split(s, charset)
+        self._chunks = newchunks
+        return self.encode_chunks()
+
+    def encode_chunks(self):
+        """MIME-encode a header with many different charsets and/or encodings.
+
+        Given a list of pairs (string, charset), return a MIME-encoded string
+        suitable for use in a header field.  Each pair may have different
+        charsets and/or encodings, and the resulting header will accurately
+        reflect each setting.
+
+        Each encoding can be email.Utils.QP (quoted-printable, for ASCII-like
+        character sets like iso-8859-1), email.Utils.BASE64 (Base64, for
+        non-ASCII like character sets like KOI8-R and iso-2022-jp), or None
+        (no encoding).
+
+        Each pair will be represented on a separate line; the resulting string
+        will be in the format:
+
+        "=?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
+          =?charset2?b?SvxyZ2VuIEL2aW5n?="
+        """
+        chunks = []
+        for header, charset in self._chunks:
+            if charset is None:
+                _max_append(chunks, header, self._maxlinelen, ' ')
+            else:
+                _max_append(chunks, charset.header_encode(header, 0),
+                            self._maxlinelen, ' ')
+        return NLSPACE.join(chunks)
--- a/Lib/email/Iterators.py
+++ b/Lib/email/Iterators.py
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)

 """Various types of useful iterators and generators.

--- a/Lib/email/MIMEBase.py
+++ b/Lib/email/MIMEBase.py
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)

 """Base class for MIME specializations.

--- a/Lib/email/MIMEImage.py
+++ b/Lib/email/MIMEImage.py
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)

 """Class representing image/* type MIME documents.

--- a/Lib/email/MIMEMessage.py
+++ b/Lib/email/MIMEMessage.py
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)

 """Class representing message/* MIME documents.

--- a/Lib/email/MIMEText.py
+++ b/Lib/email/MIMEText.py
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)

 """Class representing text/* type MIME documents.
 """

+import warnings
 import MIMEBase
 from Encoders import encode_7or8bit

@@ -13,7 +14,7 @@ class MIMEText(MIMEBase.MIMEBase):
    """Class for generating text/* type MIME documents."""

    def __init__(self, _text, _subtype='plain', _charset='us-ascii',
-                 _encoder=encode_7or8bit):
+                 _encoder=None):
        """Create a text/* type MIME document.

        _text is the string for this message object.  If the text does not end
@@ -22,20 +23,26 @@ class MIMEText(MIMEBase.MIMEBase):
        _subtype is the MIME sub content type, defaulting to "plain".

        _charset is the character set parameter added to the Content-Type:
-        header.  This defaults to "us-ascii".
-
-        _encoder is a function which will perform the actual encoding for
-        transport of the text data.  It takes one argument, which is this
-        Text instance.  It should use get_payload() and set_payload() to
-        change the payload to the encoded form.  It should also add any
-        Content-Transfer-Encoding: or other headers to the message as
-        necessary.  The default encoding doesn't actually modify the payload,
-        but it does set Content-Transfer-Encoding: to either `7bit' or `8bit'
-        as appropriate.
+        header.  This defaults to "us-ascii".  Note that as a side-effect, the
+        Content-Transfer-Encoding: header will also be set.
+
+        The use of the _encoder is deprecated.  The encoding of the payload,
+        and the setting of the character set parameter now happens implicitly
+        based on the _charset argument.  If _encoder is supplied, then a
+        DeprecationWarning is used, and the _encoder functionality may
+        override any header settings indicated by _charset.  This is probably
+        not what you want.
        """
        MIMEBase.MIMEBase.__init__(self, 'text', _subtype,
                                   **{'charset': _charset})
        if _text and _text[-1] <> '\n':
            _text += '\n'
-        self.set_payload(_text)
+        self.set_payload(_text, _charset)
+        if _encoder is not None:
+            warnings.warn('_encoder argument is obsolete.',
+                          DeprecationWarning, 2)
+            # Because set_payload() with a _charset will set its own
+            # Content-Transfer-Encoding: header, we need to delete the
+            # existing one or will end up with two of them. :(
+            del self['content-transfer-encoding']
            _encoder(self)
--- a/Lib/email/Message.py
+++ b/Lib/email/Message.py
--- a/Lib/email/Parser.py
+++ b/Lib/email/Parser.py
@@ -51,9 +51,16 @@ class Parser:
        lastvalue = []
        lineno = 0
        while 1:
-            line = fp.readline()[:-1]
-            if not line or not line.strip():
+            # Don't strip the line before we test for the end condition,
+            # because whitespace-only header lines are RFC compliant
+            # continuation lines.
+            line = fp.readline()
+            if not line:
                break
+            line = line.splitlines()[0]
+            if not line:
+                break
+            # Ignore the trailing newline
            lineno += 1
            # Check for initial Unix From_ line
            if line.startswith('From '):
@@ -63,7 +70,6 @@ class Parser:
                else:
                    raise Errors.HeaderParseError(
                        'Unix-from in headers after first rfc822 header')
-            #
            # Header continuation line
            if line[0] in ' \t':
                if not lastheader:
@@ -134,11 +140,11 @@ class Parser:
                msgobj = self.parsestr(part)
                container.preamble = preamble
                container.epilogue = epilogue
-                # Ensure that the container's payload is a list
-                if not isinstance(container.get_payload(), ListType):
-                    container.set_payload([msgobj])
-                else:
-                    container.add_payload(msgobj)
+                container.attach(msgobj)
+        elif container.get_main_type() == 'multipart':
+            # Very bad.  A message is a multipart with no boundary!
+            raise Errors.BoundaryError(
+                'multipart message with no defined boundary')
        elif container.get_type() == 'message/delivery-status':
            # This special kind of type contains blocks of headers separated
            # by a blank line.  We'll represent each header block as a
@@ -160,9 +166,9 @@ class Parser:
            except Errors.HeaderParseError:
                msg = self._class()
                self._parsebody(msg, fp)
-            container.add_payload(msg)
+            container.set_payload(msg)
        else:
-            container.add_payload(fp.read())
+            container.set_payload(fp.read())




--- a/Lib/email/Utils.py
+++ b/Lib/email/Utils.py
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)

 """Miscellaneous utilities.
 """

 import time
+import socket
 import re
+import random
+import os
+import warnings
+from cStringIO import StringIO
+from types import ListType

-from rfc822 import unquote, quote, parseaddr
-from rfc822 import dump_address_pair
+from rfc822 import unquote, quote
 from rfc822 import AddrlistClass as _AddrlistClass
-from rfc822 import parsedate_tz, parsedate, mktime_tz
+from rfc822 import mktime_tz
+
+# We need wormarounds for bugs in these methods in older Pythons (see below)
+from rfc822 import parsedate as _parsedate
+from rfc822 import parsedate_tz as _parsedate_tz
+from rfc822 import parseaddr as _parseaddr

 from quopri import decodestring as _qdecode
 import base64
@@ -20,6 +30,10 @@ from Encoders import _bencode, _qencode

 COMMASPACE = ', '
 UEMPTYSTRING = u''
+CRLF = '\r\n'
+
+specialsre = re.compile(r'[][\()<>@,:;".]')
+escapesre = re.compile(r'[][\()"]')



@@ -43,6 +57,41 @@ def _bdecode(s):
    return value


+
+def fix_eols(s):
+    """Replace all line-ending characters with \r\n."""
+    # Fix newlines with no preceding carriage return
+    s = re.sub(r'(?<!\r)\n', CRLF, s)
+    # Fix carriage returns with no following newline
+    s = re.sub(r'\r(?!\n)', CRLF, s)
+    return s
+
+
+
+def formataddr(pair):
+    """The inverse of parseaddr(), this takes a 2-tuple of the form
+    (realname, email_address) and returns the string value suitable
+    for an RFC 2822 From:, To: or Cc:.
+    
+    If the first element of pair is false, then the second element is
+    returned unmodified.
+    """
+    name, address = pair
+    if name:
+        quotes = ''
+        if specialsre.search(name):
+            quotes = '"'
+        name = escapesre.sub(r'\\\g<0>', name)
+        return '%s%s%s <%s>' % (quotes, name, quotes, address)
+    return address
+
+# For backwards compatibility
+def dump_address_pair(pair):
+    warnings.warn('Use email.Utils.formataddr() instead',
+                  DeprecationWarning, 2)
+    return formataddr(pair)
+
+

 def getaddresses(fieldvalues):
    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
@@ -64,30 +113,26 @@ ecre = re.compile(r'''


 def decode(s):
-    """Return a decoded string according to RFC 2047, as a unicode string."""
+    """Return a decoded string according to RFC 2047, as a unicode string.
+
+    NOTE: This function is deprecated.  Use Header.decode_header() instead.
+    """
+    warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2)
+    # Intra-package import here to avoid circular import problems.
+    from Header import decode_header
+    L = decode_header(s)
+    if not isinstance(L, ListType):
+        # s wasn't decoded
+        return s
+
    rtn = []
-    parts = ecre.split(s, 1)
-    while parts:
-        # If there are less than 4 parts, it can't be encoded and we're done
-        if len(parts) < 5:
-            rtn.extend(parts)
-            break
-        # The first element is any non-encoded leading text
-        rtn.append(parts[0])
-        charset = parts[1]
-        encoding = parts[2].lower()
-        atom = parts[3]
-        # The next chunk to decode should be in parts[4]
-        parts = ecre.split(parts[4])
-        # The encoding must be either `q' or `b', case-insensitive
-        if encoding == 'q':
-            func = _qdecode
-        elif encoding == 'b':
-            func = _bdecode
+    for atom, charset in L:
+        if charset is None:
+            rtn.append(atom)
        else:
-            func = _identity
-        # Decode and get the unicode in the charset
-        rtn.append(unicode(func(atom), charset))
+            # Convert the string to Unicode using the given encoding.  Leave
+            # Unicode conversion errors to strict.
+            rtn.append(unicode(atom, charset))
    # Now that we've decoded everything, we just need to join all the parts
    # together into the final string.
    return UEMPTYSTRING.join(rtn)
@@ -96,6 +141,7 @@ def decode(s):

 def encode(s, charset='iso-8859-1', encoding='q'):
    """Encode a string according to RFC 2047."""
+    warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2)
    encoding = encoding.lower()
    if encoding == 'q':
        estr = _qencode(s)
@@ -150,3 +196,48 @@ def formatdate(timeval=None, localtime=0):
         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
        now[0], now[3], now[4], now[5],
        zone)
+
+
+
+def make_msgid(idstring=None):
+    """Returns a string suitable for RFC 2822 compliant Message-ID:, e.g:
+
+    <20020201195627.33539.96671@nightshade.la.mastaler.com>
+
+    Optional idstring if given is a string used to strengthen the
+    uniqueness of the Message-ID, otherwise an empty string is used.
+    """
+    timeval = time.time()
+    utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
+    pid = os.getpid()
+    randint = random.randrange(100000)
+    if idstring is None:
+        idstring = ''
+    else:
+        idstring = '.' + idstring
+    idhost = socket.getfqdn()
+    msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
+    return msgid
+
+
+
+# These functions are in the standalone mimelib version only because they've
+# subsequently been fixed in the latest Python versions.  We use this to worm
+# around broken older Pythons.
+def parsedate(data):
+    if not data:
+        return None
+    return _parsedate(data)
+
+
+def parsedate_tz(data):
+    if not data:
+        return None
+    return _parsedate_tz(data)
+
+
+def parseaddr(addr):
+    realname, emailaddr = _parseaddr(addr)
+    if realname == '' and emailaddr is None:
+        return '', ''
+    return realname, emailaddr
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)

 """A package for parsing, handling, and generating email messages.
 """

-__version__ = '1.0'
+__version__ = '2.0'

-__all__ = ['Encoders',
+__all__ = ['Charset',
+           'Encoders',
           'Errors',
           'Generator',
+           'Header',
           'Iterators',
           'MIMEAudio',
           'MIMEBase',
@@ -18,6 +20,8 @@ __all__ = ['Encoders',
           'Message',
           'Parser',
           'Utils',
+           'base64MIME',
+           'quopriMIME',
           'message_from_string',
           'message_from_file',
           ]

--- a/Lib/email/base64MIME.py
+++ b/Lib/email/base64MIME.py
+# Copyright (C) 2002 Python Software Foundation
+# Author: che@debian.org (Ben Gertzfield)
+
+"""Base64 content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
+characters encoding known as Base64.
+
+It is used in the MIME standards for email to attach images, audio, and text
+using some 8-bit character sets to messages.
+
+This module provides an interface to encode and decode both headers and bodies
+with Base64 encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header.  This method is commonly used for 8-bit real names
+in To:, From:, Cc:, etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character conversion
+necessary for proper internationalized headers; it only does dumb encoding and
+decoding.  To deal with the various line wrapping issues, use the email.Header
+module.
+"""
+
+import re
+from binascii import b2a_base64, a2b_base64
+from email.Utils import fix_eols
+
+CRLF = '\r\n'
+NL = '\n'
+EMPTYSTRING = ''
+
+# See also Charset.py
+MISC_LEN = 7
+
+
+
+# Helpers
+def base64_len(s):
+    """Return the length of s when it is encoded with base64."""
+    groups_of_3, leftover = divmod(len(s), 3) 
+    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. 
+    # Thanks, Tim!
+    n = groups_of_3 * 4 
+    if leftover: 
+        n += 4 
+    return n 
+
+
+
+def header_encode(header, charset='iso-8859-1', keep_eols=0, maxlinelen=76,
+                  eol=NL):
+    """Encode a single header line with Base64 encoding in a given charset.
+    
+    Defined in RFC 2045, this Base64 encoding is identical to normal Base64
+    encoding, except that each line must be intelligently wrapped (respecting
+    the Base64 encoding), and subsequent lines must start with a space.
+
+    charset names the character set to use to encode the header.  It defaults
+    to iso-8859-1.
+
+    End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
+    to the canonical email line separator \\r\\n unless the keep_eols
+    parameter is set to true (the default is false).
+
+    Each line of the header will be terminated in the value of eol, which
+    defaults to "\\n".  Set this to "\\r\\n" if you are using the result of
+    this function directly in email.
+
+    The resulting string will be in the form:
+
+    "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
+      =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
+      
+    with each line wrapped at, at most, maxlinelen characters (defaults to 76
+    characters).
+    """
+    # Return empty headers unchanged
+    if not header:
+        return header
+
+    if not keep_eols:
+        header = fix_eols(header)
+    
+    # Base64 encode each line, in encoded chunks no greater than maxlinelen in
+    # length, after the RFC chrome is added in.
+    base64ed = []
+    max_encoded = maxlinelen - len(charset) - MISC_LEN
+    max_unencoded = max_encoded * 3 / 4
+
+    # BAW: Ben's original code used a step of max_unencoded, but I think it
+    # ought to be max_encoded.  Otherwise, where's max_encoded used?  I'm
+    # still not sure what the 
+    for i in range(0, len(header), max_unencoded):
+        base64ed.append(b2a_base64(header[i:i+max_unencoded]))
+
+    # Now add the RFC chrome to each encoded chunk
+    lines = []
+    for line in base64ed:
+        # Ignore the last character of each line if it is a newline
+        if line[-1] == NL:
+            line = line[:-1]
+        # Add the chrome
+        lines.append('=?%s?b?%s?=' % (charset, line))
+    # Glue the lines together and return it.  BAW: should we be able to
+    # specify the leading whitespace in the joiner?
+    joiner = eol + ' '
+    return joiner.join(lines)
+
+
+
+def encode(s, binary=1, maxlinelen=76, eol=NL):
+    """Encode a string with base64.
+
+    Each line will be wrapped at, at most, maxlinelen characters (defaults to
+    76 characters).
+
+    If binary is false, end-of-line characters will be converted to the
+    canonical email end-of-line sequence \\r\\n.  Otherwise they will be left
+    verbatim (this is the default).
+
+    Each line of encoded text will end with eol, which defaults to "\\n".  Set
+    this to "\r\n" if you will be using the result of this function directly
+    in an email.
+    """
+    if not s:
+        return s
+    
+    if not binary:
+        s = fix_eols(s)
+        
+    encvec = []
+    max_unencoded = maxlinelen * 3 / 4
+    for i in range(0, len(s), max_unencoded):
+        # BAW: should encode() inherit b2a_base64()'s dubious behavior in
+        # adding a newline to the encoded string?
+        enc = b2a_base64(s[i:i + max_unencoded])
+        if enc[-1] == NL and eol <> NL:
+            enc = enc[:-1] + eol
+        encvec.append(enc)
+    return EMPTYSTRING.join(encvec)
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_encode = encode
+encodestring = encode
+
+
+
+def decode(s, convert_eols=None):
+    """Decode a raw base64 string.
+
+    If convert_eols is set to a string value, all canonical email linefeeds,
+    e.g. "\\r\\n", in the decoded text will be converted to the value of
+    convert_eols.  os.linesep is a good choice for convert_eols if you are
+    decoding a text attachment.
+
+    This function does not parse a full MIME header value encoded with
+    base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
+    level email.Header class for that functionality.
+    """
+    if not s:
+        return s
+    
+    dec = a2b_base64(s)
+    if convert_eols:
+        return dec.replace(CRLF, convert_eols)
+    return dec
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
--- a/Lib/email/quopriMIME.py
+++ b/Lib/email/quopriMIME.py
--- a/Lib/test/data/msg_24.txt
+++ b/Lib/test/data/msg_24.txt
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+
+--BOUNDARY
+
+
+--BOUNDARY--
--- a/Lib/test/data/msg_25.txt
+++ b/Lib/test/data/msg_25.txt
+From MAILER-DAEMON Fri Apr 06 16:46:09 2001
+Received: from [204.245.199.98] (helo=zinfandel.lacita.com)
+	by www.linux.org.uk with esmtp (Exim 3.13 #1)
+	id 14lYR6-0008Iv-00
+	for linuxuser-admin@www.linux.org.uk; Fri, 06 Apr 2001 16:46:09 +0100
+Received: from localhost (localhost) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with internal id JAB03225; Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
+Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
+From: Mail Delivery Subsystem <MAILER-DAEMON@zinfandel.lacita.com>
+Subject: Returned mail: Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com>
+Message-Id: <200104061723.JAB03225@zinfandel.lacita.com>
+To: <linuxuser-admin@www.linux.org.uk>
+To: postmaster@zinfandel.lacita.com
+MIME-Version: 1.0
+Content-Type: multipart/report; report-type=delivery-status;
+	bo
+Auto-Submitted: auto-generated (failure)
+
+This is a MIME-encapsulated message
+
+--JAB03225.986577786/zinfandel.lacita.com
+
+The original message was received at Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
+from [199.164.235.226]
+
+   ----- The following addresses have delivery notifications -----
+<scoffman@wellpartner.com>  (unrecoverable error)
+
+   ----- Transcript of session follows -----
+554 Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com>
+
+--JAB03225.986577786/zinfandel.lacita.com
+Content-Type: message/delivery-status
+
+Reporting-MTA: dns; zinfandel.lacita.com
+Received-From-MTA: dns; [199.164.235.226]
+Arrival-Date: Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
+
+Final-Recipient: rfc822; scoffman@wellpartner.com
+Action: failed
+Status: 5.4.6
+Last-Attempt-Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
+
+--JAB03225.986577786/zinfandel.lacita.com
+Content-Type: text/rfc822-headers
+
+Return-Path: linuxuser-admin@www.linux.org.uk
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03225 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03221 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:22:18 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03217 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:21:37 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03213 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:56 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03209 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:15 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03205 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:19:33 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03201 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:18:52 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03197 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:17:54 -0800 (GMT-0800)
+Received: from www.linux.org.uk (parcelfarce.linux.theplanet.co.uk [195.92.249.252])
+	by
+	fo
+Received: from localhost.localdomain
+	([
+	by
+	id
+Received: from [212.1.130.11] (helo=s1.uklinux.net ident=root)
+	by
+	id
+	fo
+Received: from server (ppp-2-22.cvx4.telinco.net [212.1.149.22])
+	by
+	fo
+From: Daniel James <daniel@linuxuser.co.uk>
+Organization: LinuxUser
+To: linuxuser@www.linux.org.uk
+X-Mailer: KMail [version 1.1.99]
+Content-Type: text/plain;
+  c
+MIME-Version: 1.0
+Message-Id: <01040616033903.00962@server>
+Content-Transfer-Encoding: 8bit
+Subject: [LinuxUser] bulletin no. 45
+Sender: linuxuser-admin@www.linux.org.uk
+Errors-To: linuxuser-admin@www.linux.org.uk
+X-BeenThere: linuxuser@www.linux.org.uk
+X-Mailman-Version: 2.0.3
+Precedence: bulk
+List-Help: <mailto:linuxuser-request@www.linux.org.uk?subject=help>
+List-Post: <mailto:linuxuser@www.linux.org.uk>
+List-Subscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
+	<m
+List-Id: bulletins from LinuxUser magazine <linuxuser.www.linux.org.uk>
+List-Unsubscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
+	<m
+List-Archive: <http://www.linux.org.uk/pipermail/linuxuser/>
+Date: Fri, 6 Apr 2001 16:03:39 +0100
+
+--JAB03225.986577786/zinfandel.lacita.com--
+
+
--- a/Lib/test/test_email.py
+++ b/Lib/test/test_email.py
--- a/Lib/test/test_email_codecs.py
+++ b/Lib/test/test_email_codecs.py
+# Copyright (C) 2002 Python Software Foundation
+# email package unit tests for (optional) Asian codecs
+
+import unittest
+from test_support import TestSkipped
+
+from email.Charset import Charset
+from email.Header import Header, decode_header
+
+
+# See if we have the Japanese codecs package installed
+try:
+    unicode('foo', 'japanese.iso-2022-jp')
+except LookupError:
+    raise TestSkipped, 'Optional Japanese codecs not installed'
+
+
+
+class TestEmailAsianCodecs(unittest.TestCase):
+    def test_japanese_codecs(self):
+        eq = self.assertEqual
+        j = Charset("euc-jp")
+        g = Charset("iso-8859-1")
+        h = Header("Hello World!")
+        jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
+        ghello = 'Gr\xfc\xdf Gott!'
+        h.append(jhello, j)
+        h.append(ghello, g)
+        eq(h.encode(), 'Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=\n =?iso-8859-1?q?Gr=FC=DF_Gott!?=')
+        eq(decode_header(h.encode()),
+           [('Hello World!', None),
+            ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
+            ('Gr\xfc\xdf Gott!', 'iso-8859-1')])
+        long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
+        h = Header(long, j, header_name="Subject")
+        # test a very long header
+        enc = h.encode()
+        eq(enc, '=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=\n =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?=\n =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=')
+        eq(decode_header(enc), [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
+
+
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(TestEmailAsianCodecs))
+    return suite
+
+
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')