Commit f8bea7d1 authored by Barry Warsaw's avatar Barry Warsaw

Sync'ing with standalone email package 2.0.1. This adds support for

non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).
parent a2d68403
# Copyright (C) 2001,2002 Python Software Foundation
# Author: che@debian.org (Ben Gertzfield)
from types import UnicodeType
from email.Encoders import encode_7or8bit
import email.base64MIME
import email.quopriMIME
# Flags for types of header encodings
QP = 1 # Quoted-Printable
BASE64 = 2 # Base64
# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
MISC_LEN = 7
DEFAULT_CHARSET = 'us-ascii'
# Defaults
CHARSETS = {
# input header enc body enc output conv
'iso-8859-1': (QP, QP, None),
'iso-8859-2': (QP, QP, None),
'us-ascii': (None, None, None),
'big5': (BASE64, BASE64, None),
'gb2312': (BASE64, BASE64, None),
'euc-jp': (BASE64, None, 'iso-2022-jp'),
'shift_jis': (BASE64, None, 'iso-2022-jp'),
'iso-2022-jp': (BASE64, None, None),
'koi8-r': (BASE64, BASE64, None),
'utf-8': (BASE64, BASE64, 'utf-8'),
}
# Aliases for other commonly-used names for character sets. Map
# them to the real ones used in email.
ALIASES = {
'latin_1': 'iso-8859-1',
'latin-1': 'iso-8859-1',
'ascii': 'us-ascii',
}
# Map charsets to their Unicode codec strings. Note that the Japanese
# examples included below do not (yet) come with Python! They are available
# from http://pseudo.grad.sccs.chukyo-u.ac.jp/~kajiyama/python/
# The Chinese and Korean codecs are available from SourceForge:
#
# http://sourceforge.net/projects/python-codecs/
#
# although you'll need to check them out of cvs since they haven't been file
# released yet. You might also try to use
#
# http://www.freshports.org/port-description.php3?port=6702
#
# if you can get logged in. AFAICT, both the Chinese and Korean codecs are
# fairly experimental at this point.
CODEC_MAP = {
'euc-jp': 'japanese.euc-jp',
'iso-2022-jp': 'japanese.iso-2022-jp',
'shift_jis': 'japanese.shift_jis',
'gb2132': 'eucgb2312_cn',
'big5': 'big5_tw',
'utf-8': 'utf-8',
# Hack: We don't want *any* conversion for stuff marked us-ascii, as all
# sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
# Let that stuff pass through without conversion to/from Unicode.
'us-ascii': None,
}
# Convenience functions for extending the above mappings
def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
"""Add charset properties to the global map.
charset is the input character set, and must be the canonical name of a
character set.
Optional header_enc and body_enc is either Charset.QP for
quoted-printable, Charset.BASE64 for base64 encoding, or None for no
encoding. It describes how message headers and message bodies in the
input charset are to be encoded. Default is no encoding.
Optional output_charset is the character set that the output should be
in. Conversions will proceed from input charset, to Unicode, to the
output charset when the method Charset.convert() is called. The default
is to output in the same character set as the input.
Both input_charset and output_charset must have Unicode codec entries in
the module's charset-to-codec mapping; use add_codec(charset, codecname)
to add codecs the module does not know about. See the codec module's
documentation for more information.
"""
CHARSETS[charset] = (header_enc, body_enc, output_charset)
def add_alias(alias, canonical):
"""Add a character set alias.
alias is the alias name, e.g. latin-1
canonical is the character set's canonical name, e.g. iso-8859-1
"""
ALIASES[alias] = canonical
def add_codec(charset, codecname):
"""Add a codec that map characters in the given charset to/from Unicode.
charset is the canonical name of a character set. codecname is the name
of a Python codec, as appropriate for the second argument to the unicode()
built-in, or to the .encode() method of a Unicode string.
"""
CODEC_MAP[charset] = codecname
class Charset:
"""Map character sets to their email properties.
This class provides information about the requirements imposed on email
for a specific character set. It also provides convenience routines for
converting between character sets, given the availability of the
applicable codecs. Given an character set, it will do its best to provide
information on how to use that character set in an email.
Certain character sets must be encoded with quoted-printable or base64
when used in email headers or bodies. Certain character sets must be
converted outright, and are not allowed in email. Instances of this
module expose the following information about a character set:
input_charset: The initial character set specified. Common aliases
are converted to their `official' email names (e.g. latin_1
is converted to iso-8859-1). Defaults to 7-bit us-ascii.
header_encoding: If the character set must be encoded before it can be
used in an email header, this attribute will be set to
Charset.QP (for quoted-printable) or Charset.BASE64 (for
base64 encoding). Otherwise, it will be None.
body_encoding: Same as header_encoding, but describes the encoding for the
mail message's body, which indeed may be different than the
header encoding.
output_charset: Some character sets must be converted before the can be
used in email headers or bodies. If the input_charset is
one of them, this attribute will contain the name of the
charset output will be converted to. Otherwise, it will
be None.
input_codec: The name of the Python codec used to convert the
input_charset to Unicode. If no conversion codec is
necessary, this attribute will be None.
output_codec: The name of the Python codec used to convert Unicode
to the output_charset. If no conversion codec is necessary,
this attribute will have the same value as the input_codec.
"""
def __init__(self, input_charset=DEFAULT_CHARSET):
# Set the input charset after filtering through the aliases
self.input_charset = ALIASES.get(input_charset, input_charset)
# We can try to guess which encoding and conversion to use by the
# charset_map dictionary. Try that first, but let the user override
# it.
henc, benc, conv = CHARSETS.get(self.input_charset,
(BASE64, BASE64, None))
# Set the attributes, allowing the arguments to override the default.
self.header_encoding = henc
self.body_encoding = benc
self.output_charset = ALIASES.get(conv, conv)
# Now set the codecs. If one isn't defined for input_charset,
# guess and try a Unicode codec with the same name as input_codec.
self.input_codec = CODEC_MAP.get(self.input_charset,
self.input_charset)
self.output_codec = CODEC_MAP.get(self.output_charset,
self.input_codec)
def __str__(self):
return self.input_charset.lower()
def __eq__(self, other):
return str(self) == str(other).lower()
def __ne__(self, other):
return not self.__eq__(other)
def get_body_encoding(self):
"""Return the content-transfer-encoding used for body encoding.
This is either the string `quoted-printable' or `base64' depending on
the encoding used, or it is a function in which case you should call
the function with a single argument, the Message object being
encoded. The function should then set the Content-Transfer-Encoding:
header itself to whatever is appropriate.
Returns "quoted-printable" if self.body_encoding is QP.
Returns "base64" if self.body_encoding is BASE64.
Returns "7bit" otherwise.
"""
if self.body_encoding == QP:
return 'quoted-printable'
elif self.body_encoding == BASE64:
return 'base64'
else:
return encode_7or8bit
def convert(self, s):
"""Convert a string from the input_codec to the output_codec."""
if self.input_codec <> self.output_codec:
return unicode(s, self.input_codec).encode(self.output_codec)
else:
return s
def to_splittable(self, s):
"""Convert a possibly multibyte string to a safely splittable format.
Uses the input_codec to try and convert the string to Unicode, so it
can be safely split on character boundaries (even for double-byte
characters).
Returns the string untouched if we don't know how to convert it to
Unicode with the input_charset.
Characters that could not be converted to Unicode will be replaced
with the Unicode replacement character U+FFFD.
"""
if isinstance(s, UnicodeType) or self.input_codec is None:
return s
try:
return unicode(s, self.input_codec, 'replace')
except LookupError:
# Input codec not installed on system, so return the original
# string unchanged.
return s
def from_splittable(self, ustr, to_output=1):
"""Convert a splittable string back into an encoded string.
Uses the proper codec to try and convert the string from
Unicode back into an encoded format. Return the string as-is
if it is not Unicode, or if it could not be encoded from
Unicode.
Characters that could not be converted from Unicode will be replaced
with an appropriate character (usually '?').
If to_output is true, uses output_codec to convert to an encoded
format. If to_output is false, uses input_codec. to_output defaults
to 1.
"""
if to_output:
codec = self.output_codec
else:
codec = self.input_codec
if not isinstance(ustr, UnicodeType) or codec is None:
return ustr
try:
return ustr.encode(codec, 'replace')
except LookupError:
# Output codec not installed
return ustr
def get_output_charset(self):
"""Return the output character set.
This is self.output_charset if that is set, otherwise it is
self.input_charset.
"""
return self.output_charset or self.input_charset
def encoded_header_len(self, s):
"""Return the length of the encoded header string."""
cset = self.get_output_charset()
# The len(s) of a 7bit encoding is len(s)
if self.header_encoding is BASE64:
return email.base64MIME.base64_len(s) + len(cset) + MISC_LEN
elif self.header_encoding is QP:
return email.quopriMIME.header_quopri_len(s) + len(cset) + MISC_LEN
else:
return len(s)
def header_encode(self, s, convert=0):
"""Header-encode a string, optionally converting it to output_charset.
If convert is true, the string will be converted from the input
charset to the output charset automatically. This is not useful for
multibyte character sets, which have line length issues (multibyte
characters must be split on a character, not a byte boundary); use the
high-level Header class to deal with these issues. convert defaults
to 0.
The type of encoding (base64 or quoted-printable) will be based on
self.header_encoding.
"""
cset = self.get_output_charset()
if convert:
s = self.convert(s)
# 7bit/8bit encodings return the string unchanged (modulo conversions)
if self.header_encoding is BASE64:
return email.base64MIME.header_encode(s, cset)
elif self.header_encoding is QP:
return email.quopriMIME.header_encode(s, cset)
else:
return s
def body_encode(self, s, convert=1):
"""Body-encode a string and convert it to output_charset.
If convert is true (the default), the string will be converted from
the input charset to output charset automatically. Unlike
header_encode(), there are no issues with byte boundaries and
multibyte charsets in email bodies, so this is usually pretty safe.
The type of encoding (base64 or quoted-printable) will be based on
self.body_encoding.
"""
if convert:
s = self.convert(s)
# 7bit/8bit encodings return the string unchanged (module conversions)
if self.body_encoding is BASE64:
return email.base64MIME.body_encode(s)
elif self.header_encoding is QP:
return email.quopriMIME.body_encode(s)
else:
return s
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Module containing encoding functions for Image.Image and Text.Text.
......@@ -11,7 +11,9 @@ from quopri import encodestring as _encodestring
# Helpers
def _qencode(s):
return _encodestring(s, quotetabs=1)
enc = _encodestring(s, quotetabs=1)
# Must encode spaces, which quopri.encodestring() doesn't do
return enc.replace(' ', '=20')
def _bencode(s):
......@@ -54,6 +56,10 @@ def encode_quopri(msg):
def encode_7or8bit(msg):
"""Set the Content-Transfer-Encoding: header to 7bit or 8bit."""
orig = msg.get_payload()
if orig is None:
# There's no payload. For backwards compatibility we use 7bit
msg['Content-Transfer-Encoding'] = '7bit'
return
# We play a trick to make this go fast. If encoding to ASCII succeeds, we
# know the data must be 7bit, otherwise treat it as 8bit.
try:
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""email package exception classes.
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Classes to generate plain text from a message object tree.
......@@ -166,30 +166,33 @@ class Generator:
return text
rtn = []
for line in text.split('\n'):
splitline = []
# Short lines can remain unchanged
if len(line.replace('\t', SPACE8)) <= maxheaderlen:
rtn.append(line)
SEMINLTAB.join(rtn)
splitline.append(line)
rtn.append(SEMINLTAB.join(splitline))
else:
oldlen = len(text)
oldlen = len(line)
# Try to break the line on semicolons, but if that doesn't
# work, try to split on folding whitespace.
while len(text) > maxheaderlen:
i = text.rfind(';', 0, maxheaderlen)
while len(line) > maxheaderlen:
i = line.rfind(';', 0, maxheaderlen)
if i < 0:
break
rtn.append(text[:i])
text = text[i+1:].lstrip()
if len(text) <> oldlen:
splitline.append(line[:i])
line = line[i+1:].lstrip()
if len(line) <> oldlen:
# Splitting on semis worked
rtn.append(text)
return SEMINLTAB.join(rtn)
splitline.append(line)
rtn.append(SEMINLTAB.join(splitline))
continue
# Splitting on semis didn't help, so try to split on
# whitespace.
parts = re.split(r'(\s+)', text)
parts = re.split(r'(\s+)', line)
# Watch out though for "Header: longnonsplittableline"
if parts[0].endswith(':') and len(parts) == 3:
return text
rtn.append(line)
continue
first = parts.pop(0)
sublines = [first]
acc = len(first)
......@@ -203,13 +206,14 @@ class Generator:
else:
# Split it here, but don't forget to ignore the
# next whitespace-only part
rtn.append(EMPTYSTRING.join(sublines))
splitline.append(EMPTYSTRING.join(sublines))
del parts[0]
first = parts.pop(0)
sublines = [first]
acc = len(first)
rtn.append(EMPTYSTRING.join(sublines))
return NLTAB.join(rtn)
splitline.append(EMPTYSTRING.join(sublines))
rtn.append(NLTAB.join(splitline))
return NL.join(rtn)
#
# Handlers for writing types and subtypes
......@@ -219,6 +223,9 @@ class Generator:
payload = msg.get_payload()
if payload is None:
return
cset = msg.get_charset()
if cset is not None:
payload = cset.body_encode(payload)
if not isinstance(payload, StringType):
raise TypeError, 'string payload expected: %s' % type(payload)
if self._mangle_from_:
......@@ -233,7 +240,18 @@ class Generator:
# together, and then make sure that the boundary we've chosen isn't
# present in the payload.
msgtexts = []
for part in msg.get_payload():
subparts = msg.get_payload()
if subparts is None:
# Nothing has every been attached
boundary = msg.get_boundary(failobj=_make_boundary())
print >> self._fp, '--' + boundary
print >> self._fp, '\n'
print >> self._fp, '--' + boundary + '--'
return
elif not isinstance(subparts, ListType):
# Scalar payload
subparts = [subparts]
for part in subparts:
s = StringIO()
g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
g(part, unixfrom=0)
......@@ -365,7 +383,7 @@ class DecodedGenerator(Generator):
# Helper
def _make_boundary(self, text=None):
def _make_boundary(text=None):
# Craft a random boundary. If text is given, ensure that the chosen
# boundary doesn't appear in the text.
boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
......
# Copyright (C) 2002 Python Software Foundation
# Author: che@debian.org (Ben Gertzfield)
"""Header encoding and decoding functionality."""
import re
import email.quopriMIME
import email.base64MIME
from email.Charset import Charset
CRLFSPACE = '\r\n '
CRLF = '\r\n'
NLSPACE = '\n '
MAXLINELEN = 76
ENCODE = 1
DECODE = 2
# Match encoded-word strings in the form =?charset?q?Hello_World?=
ecre = re.compile(r'''
=\? # literal =?
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
\? # literal ?
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
\? # literal ?
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
\?= # literal ?=
''', re.VERBOSE | re.IGNORECASE)
# Helpers
_max_append = email.quopriMIME._max_append
def decode_header(header):
"""Decode a message header value without converting charset.
Returns a list of (decoded_string, charset) pairs containing each of the
decoded parts of the header. Charset is None for non-encoded parts of the
header, otherwise a lower-case string containing the name of the character
set specified in the encoded string.
"""
# If no encoding, just return the header
header = str(header)
if not ecre.search(header):
return [(header, None)]
decoded = []
dec = ''
for line in header.splitlines():
# This line might not have an encoding in it
if not ecre.search(line):
decoded.append((line, None))
continue
parts = ecre.split(line)
while parts:
unenc = parts.pop(0).strip()
if unenc:
# Should we continue a long line?
if decoded and decoded[-1][1] is None:
decoded[-1] = (decoded[-1][0] + dec, None)
else:
decoded.append((unenc, None))
if parts:
charset, encoding = [s.lower() for s in parts[0:2]]
encoded = parts[2]
dec = ''
if encoding == 'q':
dec = email.quopriMIME.header_decode(encoded)
elif encoding == 'b':
dec = email.base64MIME.decode(encoded)
else:
dec = encoded
if decoded and decoded[-1][1] == charset:
decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
else:
decoded.append((dec, charset))
del parts[0:3]
return decoded
class Header:
def __init__(self, s, charset=None, maxlinelen=MAXLINELEN,
header_name=None):
"""Create a MIME-compliant header that can contain many languages.
Specify the initial header value in s. Specify its character set as a
Charset object in the charset argument. If none, a default Charset
instance will be used.
You can later append to the header with append(s, charset) below;
charset does not have to be the same as the one initially specified
here. In fact, it's optional, and if not given, defaults to the
charset specified in the constructor.
The maximum line length can either be specified by maxlinelen, or you
can pass in the name of the header field (e.g. "Subject") to let this
class guess the best line length to use to prevent wrapping. The
default maxlinelen is 76.
"""
if charset is None:
charset = Charset()
self._charset = charset
# BAW: I believe `chunks' and `maxlinelen' should be non-public.
self._chunks = []
self.append(s, charset)
self._maxlinelen = maxlinelen
if header_name is not None:
self.guess_maxlinelen(header_name)
def __str__(self):
"""A synonym for self.encode()."""
return self.encode()
def guess_maxlinelen(self, s=None):
"""Guess the maximum length to make each header line.
Given a header name (e.g. "Subject"), set this header's maximum line
length to an appropriate length to avoid line wrapping. If s is not
given, return the previous maximum line length and don't set it.
Returns the new maximum line length.
"""
# BAW: is this semantic necessary?
if s is not None:
self._maxlinelen = MAXLINELEN - len(s) - 2
return self._maxlinelen
def append(self, s, charset=None):
"""Append string s with Charset charset to the MIME header.
charset defaults to the one given in the class constructor.
"""
if charset is None:
charset = self._charset
self._chunks.append((s, charset))
def _split(self, s, charset):
# Split up a header safely for use with encode_chunks. BAW: this
# appears to be a private convenience method.
splittable = charset.to_splittable(s)
encoded = charset.from_splittable(splittable)
if charset.encoded_header_len(encoded) < self._maxlinelen:
return [(encoded, charset)]
else:
# Divide and conquer. BAW: halfway depends on integer division.
# When porting to Python 2.2, use the // operator.
halfway = len(splittable) // 2
first = charset.from_splittable(splittable[:halfway], 0)
last = charset.from_splittable(splittable[halfway:], 0)
return self._split(first, charset) + self._split(last, charset)
def encode(self):
"""Encode a message header, possibly converting charset and encoding.
There are many issues involved in converting a given string for use in
an email header. Only certain character sets are readable in most
email clients, and as header strings can only contain a subset of
7-bit ASCII, care must be taken to properly convert and encode (with
Base64 or quoted-printable) header strings. In addition, there is a
75-character length limit on any given encoded header field, so
line-wrapping must be performed, even with double-byte character sets.
This method will do its best to convert the string to the correct
character set used in email, and encode and line wrap it safely with
the appropriate scheme for that character set.
If the given charset is not known or an error occurs during
conversion, this function will return the header untouched.
"""
newchunks = []
for s, charset in self._chunks:
newchunks += self._split(s, charset)
self._chunks = newchunks
return self.encode_chunks()
def encode_chunks(self):
"""MIME-encode a header with many different charsets and/or encodings.
Given a list of pairs (string, charset), return a MIME-encoded string
suitable for use in a header field. Each pair may have different
charsets and/or encodings, and the resulting header will accurately
reflect each setting.
Each encoding can be email.Utils.QP (quoted-printable, for ASCII-like
character sets like iso-8859-1), email.Utils.BASE64 (Base64, for
non-ASCII like character sets like KOI8-R and iso-2022-jp), or None
(no encoding).
Each pair will be represented on a separate line; the resulting string
will be in the format:
"=?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
=?charset2?b?SvxyZ2VuIEL2aW5n?="
"""
chunks = []
for header, charset in self._chunks:
if charset is None:
_max_append(chunks, header, self._maxlinelen, ' ')
else:
_max_append(chunks, charset.header_encode(header, 0),
self._maxlinelen, ' ')
return NLSPACE.join(chunks)
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Various types of useful iterators and generators.
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Base class for MIME specializations.
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Class representing image/* type MIME documents.
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Class representing message/* MIME documents.
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Class representing text/* type MIME documents.
"""
import warnings
import MIMEBase
from Encoders import encode_7or8bit
......@@ -13,7 +14,7 @@ class MIMEText(MIMEBase.MIMEBase):
"""Class for generating text/* type MIME documents."""
def __init__(self, _text, _subtype='plain', _charset='us-ascii',
_encoder=encode_7or8bit):
_encoder=None):
"""Create a text/* type MIME document.
_text is the string for this message object. If the text does not end
......@@ -22,20 +23,26 @@ class MIMEText(MIMEBase.MIMEBase):
_subtype is the MIME sub content type, defaulting to "plain".
_charset is the character set parameter added to the Content-Type:
header. This defaults to "us-ascii".
_encoder is a function which will perform the actual encoding for
transport of the text data. It takes one argument, which is this
Text instance. It should use get_payload() and set_payload() to
change the payload to the encoded form. It should also add any
Content-Transfer-Encoding: or other headers to the message as
necessary. The default encoding doesn't actually modify the payload,
but it does set Content-Transfer-Encoding: to either `7bit' or `8bit'
as appropriate.
header. This defaults to "us-ascii". Note that as a side-effect, the
Content-Transfer-Encoding: header will also be set.
The use of the _encoder is deprecated. The encoding of the payload,
and the setting of the character set parameter now happens implicitly
based on the _charset argument. If _encoder is supplied, then a
DeprecationWarning is used, and the _encoder functionality may
override any header settings indicated by _charset. This is probably
not what you want.
"""
MIMEBase.MIMEBase.__init__(self, 'text', _subtype,
**{'charset': _charset})
if _text and _text[-1] <> '\n':
_text += '\n'
self.set_payload(_text)
self.set_payload(_text, _charset)
if _encoder is not None:
warnings.warn('_encoder argument is obsolete.',
DeprecationWarning, 2)
# Because set_payload() with a _charset will set its own
# Content-Transfer-Encoding: header, we need to delete the
# existing one or will end up with two of them. :(
del self['content-transfer-encoding']
_encoder(self)
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Basic message object for the email package object model.
"""
from __future__ import generators
import re
import base64
import quopri
import warnings
from cStringIO import StringIO
from types import ListType
from types import ListType, StringType
# Intrapackage imports
import Errors
import Utils
import Charset
SEMISPACE = '; '
# Regular expression used to split header parameters. BAW: this may be too
# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
# most headers found in the wild. We may eventually need a full fledged
# parser eventually.
paramre = re.compile(r'\s*;\s*')
# Regular expression that matches `special' characters in parameters, the
# existance of which force quoting of the parameter value.
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
# Helper function
def _formatparam(param, value=None, quote=1):
"""Convenience function to format and return a key=value pair.
Will quote the value if needed or if quote is true.
"""
if value is not None and len(value) > 0:
# BAW: Please check this. I think that if quote is set it should
# force quoting even if not necessary.
if quote or tspecials.search(value):
return '%s="%s"' % (param, Utils.quote(value))
else:
return '%s=%s' % (param, value)
else:
return param
......@@ -39,6 +63,7 @@ class Message:
self._headers = []
self._unixfrom = None
self._payload = None
self._charset = None
# Defaults for multipart messages
self.preamble = self.epilogue = None
......@@ -83,6 +108,8 @@ class Message:
If the current payload is empty, then the current payload will be made
a scalar, set to the given value.
"""
warnings.warn('add_payload() is deprecated, use attach() instead.',
DeprecationWarning, 2)
if self._payload is None:
self._payload = payload
elif type(self._payload) is ListType:
......@@ -93,8 +120,18 @@ class Message:
else:
self._payload = [self._payload, payload]
# A useful synonym
attach = add_payload
def attach(self, payload):
"""Add the given payload to the current payload.
The current payload will always be a list of objects after this method
is called. If you want to set the payload to a scalar object
(e.g. because you're attaching a message/rfc822 subpart), use
set_payload() instead.
"""
if self._payload is None:
self._payload = [payload]
else:
self._payload.append(payload)
def get_payload(self, i=None, decode=0):
"""Return the current payload exactly as is.
......@@ -128,9 +165,57 @@ class Message:
return payload
def set_payload(self, payload):
"""Set the payload to the given value."""
def set_payload(self, payload, charset=None):
"""Set the payload to the given value.
Optionally set the charset, which must be a Charset instance."""
self._payload = payload
if charset is not None:
self.set_charset(charset)
def set_charset(self, charset):
"""Set the charset of the payload to a given character set.
charset can be a string or a Charset object. If it is a string, it
will be converted to a Charset object by calling Charset's
constructor. If charset is None, the charset parameter will be
removed from the Content-Type: field. Anything else will generate a
TypeError.
The message will be assumed to be a text message encoded with
charset.input_charset. It will be converted to charset.output_charset
and encoded properly, if needed, when generating the plain text
representation of the message. MIME headers (MIME-Version,
Content-Type, Content-Transfer-Encoding) will be added as needed.
"""
if charset is None:
self.del_param('charset')
self._charset = None
return
if isinstance(charset, StringType):
charset = Charset.Charset(charset)
if not isinstance(charset, Charset.Charset):
raise TypeError, charset
# BAW: should we accept strings that can serve as arguments to the
# Charset constructor?
self._charset = charset
if not self.has_key('MIME-Version'):
self.add_header('MIME-Version', '1.0')
if not self.has_key('Content-Type'):
self.add_header('Content-Type', 'text/plain',
charset=charset.get_output_charset())
else:
self.set_param('charset', charset.get_output_charset())
if not self.has_key('Content-Transfer-Encoding'):
cte = charset.get_body_encoding()
if callable(cte):
cte(self)
else:
self.add_header('Content-Transfer-Encoding', cte)
def get_charset(self):
"""Return the Charset object associated with the message's payload."""
return self._charset
#
# MAPPING INTERFACE (partial)
......@@ -257,7 +342,7 @@ class Message:
if v is None:
parts.append(k.replace('_', '-'))
else:
parts.append('%s="%s"' % (k.replace('_', '-'), v))
parts.append(_formatparam(k.replace('_', '-'), v))
if _value is not None:
parts.insert(0, _value)
self._headers.append((_name, SEMISPACE.join(parts)))
......@@ -308,6 +393,8 @@ class Message:
for p in paramre.split(value):
try:
name, val = p.split('=', 1)
name = name.rstrip()
val = val.lstrip()
except ValueError:
# Must have been a bare attribute
name = p
......@@ -315,26 +402,29 @@ class Message:
params.append((name, val))
return params
def get_params(self, failobj=None, header='content-type'):
def get_params(self, failobj=None, header='content-type', unquote=1):
"""Return the message's Content-Type: parameters, as a list.
The elements of the returned list are 2-tuples of key/value pairs, as
split on the `=' sign. The left hand side of the `=' is the key,
while the right hand side is the value. If there is no `=' sign in
the parameter the value is the empty string. The value is always
unquoted.
unquoted, unless unquote is set to a false value.
Optional failobj is the object to return if there is no Content-Type:
header. Optional header is the header to search instead of
Content-Type:
Content-Type:.
"""
missing = []
params = self._get_params_preserve(missing, header)
if params is missing:
return failobj
if unquote:
return [(k, Utils.unquote(v)) for k, v in params]
else:
return params
def get_param(self, param, failobj=None, header='content-type'):
def get_param(self, param, failobj=None, header='content-type', unquote=1):
"""Return the parameter value if found in the Content-Type: header.
Optional failobj is the object to return if there is no Content-Type:
......@@ -342,15 +432,112 @@ class Message:
Content-Type:
Parameter keys are always compared case insensitively. Values are
always unquoted.
always unquoted, unless unquote is set to a false value.
"""
if not self.has_key(header):
return failobj
for k, v in self._get_params_preserve(failobj, header):
if k.lower() == param.lower():
if unquote:
return Utils.unquote(v)
else:
return v
return failobj
def set_param(self, param, value, header='Content-Type', requote=1):
"""Set a parameter in the Content-Type: header.
If the parameter already exists in the header, its value will be
replaced with the new value.
If header is Content-Type: and has not yet been defined in this
message, it will be set to "text/plain" and the new parameter and
value will be appended, as per RFC 2045.
An alternate header can specified in the header argument, and
all parameters will be quoted as appropriate unless requote is
set to a false value.
"""
if not self.has_key(header) and header.lower() == 'content-type':
ctype = 'text/plain'
else:
ctype = self.get(header)
if not self.get_param(param, header=header):
if not ctype:
ctype = _formatparam(param, value, requote)
else:
ctype = SEMISPACE.join(
[ctype, _formatparam(param, value, requote)])
else:
ctype = ''
for old_param, old_value in self.get_params(header=header,
unquote=requote):
append_param = ''
if old_param.lower() == param.lower():
append_param = _formatparam(param, value, requote)
else:
append_param = _formatparam(old_param, old_value, requote)
if not ctype:
ctype = append_param
else:
ctype = SEMISPACE.join([ctype, append_param])
if ctype <> self.get(header):
del self[header]
self[header] = ctype
def del_param(self, param, header='content-type', requote=1):
"""Remove the given parameter completely from the Content-Type header.
The header will be re-written in place without param or its value.
All values will be quoted as appropriate unless requote is set to a
false value.
"""
if not self.has_key(header):
return
new_ctype = ''
for p, v in self.get_params(header, unquote=requote):
if p.lower() <> param.lower():
if not new_ctype:
new_ctype = _formatparam(p, v, requote)
else:
new_ctype = SEMISPACE.join([new_ctype,
_formatparam(p, v, requote)])
if new_ctype <> self.get(header):
del self[header]
self[header] = new_ctype
def set_type(self, type, header='Content-Type', requote=1):
"""Set the main type and subtype for the Content-Type: header.
type must be a string in the form "maintype/subtype", otherwise a
ValueError is raised.
This method replaces the Content-Type: header, keeping all the
parameters in place. If requote is false, this leaves the existing
header's quoting as is. Otherwise, the parameters will be quoted (the
default).
An alternate header can be specified in the header argument. When the
Content-Type: header is set, we'll always also add a MIME-Version:
header.
"""
# BAW: should we be strict?
if not type.count('/') == 1:
raise ValueError
# Set the Content-Type: you get a MIME-Version:
if header.lower() == 'content-type':
del self['mime-version']
self['MIME-Version'] = '1.0'
if not self.has_key(header):
self[header] = type
return
params = self.get_params(header, unquote=requote)
del self[header]
self[header] = type
# Skip the first param; it's the old type.
for p, v in params[1:]:
self.set_param(p, v, header, requote)
def get_filename(self, failobj=None):
"""Return the filename associated with the payload if present.
......
......@@ -51,9 +51,16 @@ class Parser:
lastvalue = []
lineno = 0
while 1:
line = fp.readline()[:-1]
if not line or not line.strip():
# Don't strip the line before we test for the end condition,
# because whitespace-only header lines are RFC compliant
# continuation lines.
line = fp.readline()
if not line:
break
line = line.splitlines()[0]
if not line:
break
# Ignore the trailing newline
lineno += 1
# Check for initial Unix From_ line
if line.startswith('From '):
......@@ -63,7 +70,6 @@ class Parser:
else:
raise Errors.HeaderParseError(
'Unix-from in headers after first rfc822 header')
#
# Header continuation line
if line[0] in ' \t':
if not lastheader:
......@@ -134,11 +140,11 @@ class Parser:
msgobj = self.parsestr(part)
container.preamble = preamble
container.epilogue = epilogue
# Ensure that the container's payload is a list
if not isinstance(container.get_payload(), ListType):
container.set_payload([msgobj])
else:
container.add_payload(msgobj)
container.attach(msgobj)
elif container.get_main_type() == 'multipart':
# Very bad. A message is a multipart with no boundary!
raise Errors.BoundaryError(
'multipart message with no defined boundary')
elif container.get_type() == 'message/delivery-status':
# This special kind of type contains blocks of headers separated
# by a blank line. We'll represent each header block as a
......@@ -160,9 +166,9 @@ class Parser:
except Errors.HeaderParseError:
msg = self._class()
self._parsebody(msg, fp)
container.add_payload(msg)
container.set_payload(msg)
else:
container.add_payload(fp.read())
container.set_payload(fp.read())
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Miscellaneous utilities.
"""
import time
import socket
import re
import random
import os
import warnings
from cStringIO import StringIO
from types import ListType
from rfc822 import unquote, quote, parseaddr
from rfc822 import dump_address_pair
from rfc822 import unquote, quote
from rfc822 import AddrlistClass as _AddrlistClass
from rfc822 import parsedate_tz, parsedate, mktime_tz
from rfc822 import mktime_tz
# We need wormarounds for bugs in these methods in older Pythons (see below)
from rfc822 import parsedate as _parsedate
from rfc822 import parsedate_tz as _parsedate_tz
from rfc822 import parseaddr as _parseaddr
from quopri import decodestring as _qdecode
import base64
......@@ -20,6 +30,10 @@ from Encoders import _bencode, _qencode
COMMASPACE = ', '
UEMPTYSTRING = u''
CRLF = '\r\n'
specialsre = re.compile(r'[][\()<>@,:;".]')
escapesre = re.compile(r'[][\()"]')
......@@ -43,6 +57,41 @@ def _bdecode(s):
return value
def fix_eols(s):
"""Replace all line-ending characters with \r\n."""
# Fix newlines with no preceding carriage return
s = re.sub(r'(?<!\r)\n', CRLF, s)
# Fix carriage returns with no following newline
s = re.sub(r'\r(?!\n)', CRLF, s)
return s
def formataddr(pair):
"""The inverse of parseaddr(), this takes a 2-tuple of the form
(realname, email_address) and returns the string value suitable
for an RFC 2822 From:, To: or Cc:.
If the first element of pair is false, then the second element is
returned unmodified.
"""
name, address = pair
if name:
quotes = ''
if specialsre.search(name):
quotes = '"'
name = escapesre.sub(r'\\\g<0>', name)
return '%s%s%s <%s>' % (quotes, name, quotes, address)
return address
# For backwards compatibility
def dump_address_pair(pair):
warnings.warn('Use email.Utils.formataddr() instead',
DeprecationWarning, 2)
return formataddr(pair)
def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
......@@ -64,30 +113,26 @@ ecre = re.compile(r'''
def decode(s):
"""Return a decoded string according to RFC 2047, as a unicode string."""
"""Return a decoded string according to RFC 2047, as a unicode string.
NOTE: This function is deprecated. Use Header.decode_header() instead.
"""
warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2)
# Intra-package import here to avoid circular import problems.
from Header import decode_header
L = decode_header(s)
if not isinstance(L, ListType):
# s wasn't decoded
return s
rtn = []
parts = ecre.split(s, 1)
while parts:
# If there are less than 4 parts, it can't be encoded and we're done
if len(parts) < 5:
rtn.extend(parts)
break
# The first element is any non-encoded leading text
rtn.append(parts[0])
charset = parts[1]
encoding = parts[2].lower()
atom = parts[3]
# The next chunk to decode should be in parts[4]
parts = ecre.split(parts[4])
# The encoding must be either `q' or `b', case-insensitive
if encoding == 'q':
func = _qdecode
elif encoding == 'b':
func = _bdecode
for atom, charset in L:
if charset is None:
rtn.append(atom)
else:
func = _identity
# Decode and get the unicode in the charset
rtn.append(unicode(func(atom), charset))
# Convert the string to Unicode using the given encoding. Leave
# Unicode conversion errors to strict.
rtn.append(unicode(atom, charset))
# Now that we've decoded everything, we just need to join all the parts
# together into the final string.
return UEMPTYSTRING.join(rtn)
......@@ -96,6 +141,7 @@ def decode(s):
def encode(s, charset='iso-8859-1', encoding='q'):
"""Encode a string according to RFC 2047."""
warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2)
encoding = encoding.lower()
if encoding == 'q':
estr = _qencode(s)
......@@ -150,3 +196,48 @@ def formatdate(timeval=None, localtime=0):
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
now[0], now[3], now[4], now[5],
zone)
def make_msgid(idstring=None):
"""Returns a string suitable for RFC 2822 compliant Message-ID:, e.g:
<20020201195627.33539.96671@nightshade.la.mastaler.com>
Optional idstring if given is a string used to strengthen the
uniqueness of the Message-ID, otherwise an empty string is used.
"""
timeval = time.time()
utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
pid = os.getpid()
randint = random.randrange(100000)
if idstring is None:
idstring = ''
else:
idstring = '.' + idstring
idhost = socket.getfqdn()
msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
return msgid
# These functions are in the standalone mimelib version only because they've
# subsequently been fixed in the latest Python versions. We use this to worm
# around broken older Pythons.
def parsedate(data):
if not data:
return None
return _parsedate(data)
def parsedate_tz(data):
if not data:
return None
return _parsedate_tz(data)
def parseaddr(addr):
realname, emailaddr = _parseaddr(addr)
if realname == '' and emailaddr is None:
return '', ''
return realname, emailaddr
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""A package for parsing, handling, and generating email messages.
"""
__version__ = '1.0'
__version__ = '2.0'
__all__ = ['Encoders',
__all__ = ['Charset',
'Encoders',
'Errors',
'Generator',
'Header',
'Iterators',
'MIMEAudio',
'MIMEBase',
......@@ -18,6 +20,8 @@ __all__ = ['Encoders',
'Message',
'Parser',
'Utils',
'base64MIME',
'quopriMIME',
'message_from_string',
'message_from_file',
]
......
# Copyright (C) 2002 Python Software Foundation
# Author: che@debian.org (Ben Gertzfield)
"""Base64 content transfer encoding per RFCs 2045-2047.
This module handles the content transfer encoding method defined in RFC 2045
to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
characters encoding known as Base64.
It is used in the MIME standards for email to attach images, audio, and text
using some 8-bit character sets to messages.
This module provides an interface to encode and decode both headers and bodies
with Base64 encoding.
RFC 2045 defines a method for including character set information in an
`encoded-word' in a header. This method is commonly used for 8-bit real names
in To:, From:, Cc:, etc. fields, as well as Subject: lines.
This module does not do the line wrapping or end-of-line character conversion
necessary for proper internationalized headers; it only does dumb encoding and
decoding. To deal with the various line wrapping issues, use the email.Header
module.
"""
import re
from binascii import b2a_base64, a2b_base64
from email.Utils import fix_eols
CRLF = '\r\n'
NL = '\n'
EMPTYSTRING = ''
# See also Charset.py
MISC_LEN = 7
# Helpers
def base64_len(s):
"""Return the length of s when it is encoded with base64."""
groups_of_3, leftover = divmod(len(s), 3)
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
# Thanks, Tim!
n = groups_of_3 * 4
if leftover:
n += 4
return n
def header_encode(header, charset='iso-8859-1', keep_eols=0, maxlinelen=76,
eol=NL):
"""Encode a single header line with Base64 encoding in a given charset.
Defined in RFC 2045, this Base64 encoding is identical to normal Base64
encoding, except that each line must be intelligently wrapped (respecting
the Base64 encoding), and subsequent lines must start with a space.
charset names the character set to use to encode the header. It defaults
to iso-8859-1.
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
to the canonical email line separator \\r\\n unless the keep_eols
parameter is set to true (the default is false).
Each line of the header will be terminated in the value of eol, which
defaults to "\\n". Set this to "\\r\\n" if you are using the result of
this function directly in email.
The resulting string will be in the form:
"=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
=?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
with each line wrapped at, at most, maxlinelen characters (defaults to 76
characters).
"""
# Return empty headers unchanged
if not header:
return header
if not keep_eols:
header = fix_eols(header)
# Base64 encode each line, in encoded chunks no greater than maxlinelen in
# length, after the RFC chrome is added in.
base64ed = []
max_encoded = maxlinelen - len(charset) - MISC_LEN
max_unencoded = max_encoded * 3 / 4
# BAW: Ben's original code used a step of max_unencoded, but I think it
# ought to be max_encoded. Otherwise, where's max_encoded used? I'm
# still not sure what the
for i in range(0, len(header), max_unencoded):
base64ed.append(b2a_base64(header[i:i+max_unencoded]))
# Now add the RFC chrome to each encoded chunk
lines = []
for line in base64ed:
# Ignore the last character of each line if it is a newline
if line[-1] == NL:
line = line[:-1]
# Add the chrome
lines.append('=?%s?b?%s?=' % (charset, line))
# Glue the lines together and return it. BAW: should we be able to
# specify the leading whitespace in the joiner?
joiner = eol + ' '
return joiner.join(lines)
def encode(s, binary=1, maxlinelen=76, eol=NL):
"""Encode a string with base64.
Each line will be wrapped at, at most, maxlinelen characters (defaults to
76 characters).
If binary is false, end-of-line characters will be converted to the
canonical email end-of-line sequence \\r\\n. Otherwise they will be left
verbatim (this is the default).
Each line of encoded text will end with eol, which defaults to "\\n". Set
this to "\r\n" if you will be using the result of this function directly
in an email.
"""
if not s:
return s
if not binary:
s = fix_eols(s)
encvec = []
max_unencoded = maxlinelen * 3 / 4
for i in range(0, len(s), max_unencoded):
# BAW: should encode() inherit b2a_base64()'s dubious behavior in
# adding a newline to the encoded string?
enc = b2a_base64(s[i:i + max_unencoded])
if enc[-1] == NL and eol <> NL:
enc = enc[:-1] + eol
encvec.append(enc)
return EMPTYSTRING.join(encvec)
# For convenience and backwards compatibility w/ standard base64 module
body_encode = encode
encodestring = encode
def decode(s, convert_eols=None):
"""Decode a raw base64 string.
If convert_eols is set to a string value, all canonical email linefeeds,
e.g. "\\r\\n", in the decoded text will be converted to the value of
convert_eols. os.linesep is a good choice for convert_eols if you are
decoding a text attachment.
This function does not parse a full MIME header value encoded with
base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
level email.Header class for that functionality.
"""
if not s:
return s
dec = a2b_base64(s)
if convert_eols:
return dec.replace(CRLF, convert_eols)
return dec
# For convenience and backwards compatibility w/ standard base64 module
body_decode = decode
decodestring = decode
# Copyright (C) 2001,2002 Python Software Foundation
# Author: che@debian.org (Ben Gertzfield)
"""Quoted-printable content transfer encoding per RFCs 2045-2047.
This module handles the content transfer encoding method defined in RFC 2045
to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
safely encode text that is in a character set similar to the 7-bit US ASCII
character set, but that includes some 8-bit characters that are normally not
allowed in email bodies or headers.
Quoted-printable is very space-inefficient for encoding binary files; use the
email.base64MIME module for that instead.
This module provides an interface to encode and decode both headers and bodies
with quoted-printable encoding.
RFC 2045 defines a method for including character set information in an
`encoded-word' in a header. This method is commonly used for 8-bit real names
in To:/From:/Cc: etc. fields, as well as Subject: lines.
This module does not do the line wrapping or end-of-line character
conversion necessary for proper internationalized headers; it only
does dumb encoding and decoding. To deal with the various line
wrapping issues, use the email.Header module.
"""
import re
from string import hexdigits
from email.Utils import fix_eols
CRLF = '\r\n'
NL = '\n'
# See also Charset.py
MISC_LEN = 7
hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]')
bqre = re.compile(r'[^ !-<>-~\t]')
# Helpers
def header_quopri_check(c):
"""Return true if the character should be escaped with header quopri."""
return hqre.match(c) and 1
def body_quopri_check(c):
"""Return true if the character should be escaped with body quopri."""
return bqre.match(c) and 1
def header_quopri_len(s):
"""Return the length of str when it is encoded with header quopri."""
count = 0
for c in s:
if hqre.match(c):
count += 3
else:
count += 1
return count
def body_quopri_len(str):
"""Return the length of str when it is encoded with body quopri."""
count = 0
for c in str:
if bqre.match(c):
count += 3
else:
count += 1
return count
def _max_append(L, s, maxlen, extra=''):
if not L:
L.append(s)
elif len(L[-1]) + len(s) < maxlen:
L[-1] += extra + s
else:
L.append(s)
def unquote(s):
"""Turn a string in the form =AB to the ASCII character with value 0xab"""
return chr(int(s[1:3], 16))
def quote(c):
return "=%02X" % ord(c)
def header_encode(header, charset="iso-8859-1", keep_eols=0, maxlinelen=76,
eol=NL):
"""Encode a single header line with quoted-printable (like) encoding.
Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
used specifically for email header fields to allow charsets with mostly 7
bit characters (and some 8 bit) to remain more or less readable in non-RFC
2045 aware mail clients.
charset names the character set to use to encode the header. It defaults
to iso-8859-1.
The resulting string will be in the form:
"=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n
=?charset?q?Silly_=C8nglish_Kn=EEghts?="
with each line wrapped safely at, at most, maxlinelen characters (defaults
to 76 characters).
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
to the canonical email line separator \\r\\n unless the keep_eols
parameter is set to true (the default is false).
Each line of the header will be terminated in the value of eol, which
defaults to "\\n". Set this to "\\r\\n" if you are using the result of
this function directly in email.
"""
# Return empty headers unchanged
if not header:
return header
if not keep_eols:
header = fix_eols(header)
# Quopri encode each line, in encoded chunks no greater than maxlinelen in
# lenght, after the RFC chrome is added in.
quoted = []
max_encoded = maxlinelen - len(charset) - MISC_LEN
for c in header:
# Space may be represented as _ instead of =20 for readability
if c == ' ':
_max_append(quoted, '_', max_encoded)
# These characters can be included verbatim
elif not hqre.match(c):
_max_append(quoted, c, max_encoded)
# Otherwise, replace with hex value like =E2
else:
_max_append(quoted, "=%02X" % ord(c), max_encoded)
# Now add the RFC chrome to each encoded chunk and glue the chunks
# together. BAW: should we be able to specify the leading whitespace in
# the joiner?
joiner = eol + ' '
return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted])
def encode(body, binary=0, maxlinelen=76, eol=NL):
"""Encode with quoted-printable, wrapping at maxlinelen characters.
If binary is false (the default), end-of-line characters will be converted
to the canonical email end-of-line sequence \\r\\n. Otherwise they will
be left verbatim.
Each line of encoded text will end with eol, which defaults to "\\n". Set
this to "\\r\\n" if you will be using the result of this function directly
in an email.
Each line will be wrapped at, at most, maxlinelen characters (defaults to
76 characters). Long lines will have the `soft linefeed' quoted-printable
character "=" appended to them, so the decoded text will be identical to
the original text.
"""
if not body:
return body
if not binary:
body = fix_eols(body)
# BAW: We're accumulating the body text by string concatenation. That
# can't be very efficient, but I don't have time now to rewrite it. It
# just feels like this algorithm could be more efficient.
encoded_body = ''
lineno = -1
# Preserve line endings here so we can check later to see an eol needs to
# be added to the output later.
lines = body.splitlines(1)
for line in lines:
# But strip off line-endings for processing this line.
if line.endswith(CRLF):
line = line[:-2]
elif line[-1] in CRLF:
line = line[:-1]
lineno += 1
encoded_line = ''
prev = None
linelen = len(line)
# Now we need to examine every character to see if it needs to be
# quopri encoded. BAW: again, string concatenation is inefficient.
for j in range(linelen):
c = line[j]
prev = c
if bqre.match(c):
c = quote(c)
elif j+1 == linelen:
# Check for whitespace at end of line; special case
if c not in ' \t':
encoded_line += c
prev = c
continue
# Check to see to see if the line has reached its maximum length
if len(encoded_line) + len(c) >= maxlinelen:
encoded_body += encoded_line + '=' + eol
encoded_line = ''
encoded_line += c
# Now at end of line..
if prev and prev in ' \t':
# Special case for whitespace at end of file
if lineno+1 == len(lines):
prev = quote(prev)
if len(encoded_line) + len(prev) > maxlinelen:
encoded_body += encoded_line + '=' + eol + prev
else:
encoded_body += encoded_line + prev
# Just normal whitespace at end of line
else:
encoded_body += encoded_line + prev + '=' + eol
encoded_line = ''
# Now look at the line we just finished and it has a line ending, we
# need to add eol to the end of the line.
if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF:
encoded_body += encoded_line + eol
else:
encoded_body += encoded_line
encoded_line = ''
return encoded_body
# For convenience and backwards compatibility w/ standard base64 module
body_encode = encode
encodestring = encode
# BAW: I'm not sure if the intent was for the signature of this function to be
# the same as base64MIME.decode() or not...
def decode(encoded, eol=NL):
"""Decode a quoted-printable string.
Lines are separated with eol, which defaults to \\n.
"""
if not encoded:
return encoded
# BAW: see comment in encode() above. Again, we're building up the
# decoded string with string concatenation, which could be done much more
# efficiently.
decoded = ''
for line in encoded.splitlines():
line = line.rstrip()
if not line:
decoded += eol
continue
i = 0
n = len(line)
while i < n:
c = line[i]
if c <> '=':
decoded += c
i += 1
# Otherwise, c == "=". Are we at the end of the line? If so, add
# a soft line break.
elif i+1 == n:
i += 1
continue
# Decode if in form =AB
elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
decoded += unquote(line[i:i+3])
i += 3
# Otherwise, not in form =AB, pass literally
else:
decoded += c
i += 1
if i == n:
decoded += eol
# Special case if original string did not end with eol
if encoded[-1] <> eol and decoded[-1] == eol:
decoded = decoded[:-1]
return decoded
# For convenience and backwards compatibility w/ standard base64 module
body_decode = decode
decodestring = decode
def _unquote_match(match):
"""Turn a match in the form =AB to the ASCII character with value 0xab"""
s = match.group(0)
return unquote(s)
# Header decoding is done a bit differently
def header_decode(s):
"""Decode a string encoded with RFC 2045 MIME header `Q' encoding.
This function does not parse a full MIME header value encoded with
quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
the high level email.Header class for that functionality.
"""
s = s.replace('_', ' ')
return re.sub(r'=\w{2}', _unquote_match, s)
Content-Type: multipart/mixed; boundary="BOUNDARY"
MIME-Version: 1.0
Subject: A subject
To: aperson@dom.ain
From: bperson@dom.ain
--BOUNDARY
--BOUNDARY--
From MAILER-DAEMON Fri Apr 06 16:46:09 2001
Received: from [204.245.199.98] (helo=zinfandel.lacita.com)
by www.linux.org.uk with esmtp (Exim 3.13 #1)
id 14lYR6-0008Iv-00
for linuxuser-admin@www.linux.org.uk; Fri, 06 Apr 2001 16:46:09 +0100
Received: from localhost (localhost) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with internal id JAB03225; Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
From: Mail Delivery Subsystem <MAILER-DAEMON@zinfandel.lacita.com>
Subject: Returned mail: Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com>
Message-Id: <200104061723.JAB03225@zinfandel.lacita.com>
To: <linuxuser-admin@www.linux.org.uk>
To: postmaster@zinfandel.lacita.com
MIME-Version: 1.0
Content-Type: multipart/report; report-type=delivery-status;
bo
Auto-Submitted: auto-generated (failure)
This is a MIME-encapsulated message
--JAB03225.986577786/zinfandel.lacita.com
The original message was received at Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
from [199.164.235.226]
----- The following addresses have delivery notifications -----
<scoffman@wellpartner.com> (unrecoverable error)
----- Transcript of session follows -----
554 Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com>
--JAB03225.986577786/zinfandel.lacita.com
Content-Type: message/delivery-status
Reporting-MTA: dns; zinfandel.lacita.com
Received-From-MTA: dns; [199.164.235.226]
Arrival-Date: Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
Final-Recipient: rfc822; scoffman@wellpartner.com
Action: failed
Status: 5.4.6
Last-Attempt-Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
--JAB03225.986577786/zinfandel.lacita.com
Content-Type: text/rfc822-headers
Return-Path: linuxuser-admin@www.linux.org.uk
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03225 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03221 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:22:18 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03217 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:21:37 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03213 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:56 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03209 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:15 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03205 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:19:33 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03201 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:18:52 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03197 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:17:54 -0800 (GMT-0800)
Received: from www.linux.org.uk (parcelfarce.linux.theplanet.co.uk [195.92.249.252])
by
fo
Received: from localhost.localdomain
([
by
id
Received: from [212.1.130.11] (helo=s1.uklinux.net ident=root)
by
id
fo
Received: from server (ppp-2-22.cvx4.telinco.net [212.1.149.22])
by
fo
From: Daniel James <daniel@linuxuser.co.uk>
Organization: LinuxUser
To: linuxuser@www.linux.org.uk
X-Mailer: KMail [version 1.1.99]
Content-Type: text/plain;
c
MIME-Version: 1.0
Message-Id: <01040616033903.00962@server>
Content-Transfer-Encoding: 8bit
Subject: [LinuxUser] bulletin no. 45
Sender: linuxuser-admin@www.linux.org.uk
Errors-To: linuxuser-admin@www.linux.org.uk
X-BeenThere: linuxuser@www.linux.org.uk
X-Mailman-Version: 2.0.3
Precedence: bulk
List-Help: <mailto:linuxuser-request@www.linux.org.uk?subject=help>
List-Post: <mailto:linuxuser@www.linux.org.uk>
List-Subscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
<m
List-Id: bulletins from LinuxUser magazine <linuxuser.www.linux.org.uk>
List-Unsubscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
<m
List-Archive: <http://www.linux.org.uk/pipermail/linuxuser/>
Date: Fri, 6 Apr 2001 16:03:39 +0100
--JAB03225.986577786/zinfandel.lacita.com--
# Copyright (C) 2001,2002 Python Software Foundation
# email package unit tests
import sys
import os
import time
import unittest
import base64
from cStringIO import StringIO
from types import StringType
import warnings
import email
from email.Charset import Charset
from email.Header import Header, decode_header
from email.Parser import Parser, HeaderParser
from email.Generator import Generator, DecodedGenerator
from email.Message import Message
......@@ -22,14 +26,18 @@ from email import Utils
from email import Errors
from email import Encoders
from email import Iterators
from email import base64MIME
from email import quopriMIME
from test_support import findfile, __file__ as test_support_file
NL = '\n'
EMPTYSTRING = ''
SPACE = ' '
# We don't care about DeprecationWarnings
warnings.filterwarnings('ignore', '', DeprecationWarning, __name__)
def openfile(filename):
......@@ -41,7 +49,7 @@ def openfile(filename):
# Base test class
class TestEmailBase(unittest.TestCase):
def _msgobj(self, filename):
fp = openfile(filename)
fp = openfile(findfile(filename))
try:
msg = email.message_from_file(fp)
finally:
......@@ -58,6 +66,45 @@ class TestMessageAPI(TestEmailBase):
eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
eq(msg.get_all('xx', 'n/a'), 'n/a')
def test_getset_charset(self):
eq = self.assertEqual
msg = Message()
eq(msg.get_charset(), None)
charset = Charset('iso-8859-1')
msg.set_charset(charset)
eq(msg['mime-version'], '1.0')
eq(msg.get_type(), 'text/plain')
eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
eq(msg.get_param('charset'), 'iso-8859-1')
eq(msg['content-transfer-encoding'], 'quoted-printable')
eq(msg.get_charset().input_charset, 'iso-8859-1')
# Remove the charset
msg.set_charset(None)
eq(msg.get_charset(), None)
eq(msg['content-type'], 'text/plain')
# Try adding a charset when there's already MIME headers present
msg = Message()
msg['MIME-Version'] = '2.0'
msg['Content-Type'] = 'text/x-weird'
msg['Content-Transfer-Encoding'] = 'quinted-puntable'
msg.set_charset(charset)
eq(msg['mime-version'], '2.0')
eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
eq(msg['content-transfer-encoding'], 'quinted-puntable')
def test_set_charset_from_string(self):
eq = self.assertEqual
msg = Message()
msg.set_charset('us-ascii')
eq(msg.get_charset().input_charset, 'us-ascii')
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
def test_set_payload_with_charset(self):
msg = Message()
charset = Charset('iso-8859-1')
msg.set_payload('This is a string payload', charset)
self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
def test_get_charsets(self):
eq = self.assertEqual
......@@ -204,6 +251,11 @@ class TestMessageAPI(TestEmailBase):
eq(msg.get_params(header='x-header'),
[('foo', ''), ('bar', 'one'), ('baz', 'two')])
def test_get_param_liberal(self):
msg = Message()
msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
def test_get_param(self):
eq = self.assertEqual
msg = email.message_from_string(
......@@ -216,6 +268,10 @@ class TestMessageAPI(TestEmailBase):
eq(msg.get_param('foo', header='x-header'), '')
eq(msg.get_param('bar', header='x-header'), 'one')
eq(msg.get_param('baz', header='x-header'), 'two')
# XXX: We are not RFC-2045 compliant! We cannot parse:
# msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
# msg.get_param("weird")
# yet.
def test_get_param_funky_continuation_lines(self):
msg = self._msgobj('msg_22.txt')
......@@ -228,6 +284,51 @@ class TestMessageAPI(TestEmailBase):
self.failUnless(msg.has_key('HEADER'))
self.failIf(msg.has_key('headeri'))
def test_set_param(self):
eq = self.assertEqual
msg = Message()
msg.set_param('charset', 'iso-2022-jp')
eq(msg.get_param('charset'), 'iso-2022-jp')
msg.set_param('importance', 'high value')
eq(msg.get_param('importance'), 'high value')
eq(msg.get_param('importance', unquote=0), '"high value"')
eq(msg.get_params(), [('text/plain', ''),
('charset', 'iso-2022-jp'),
('importance', 'high value')])
eq(msg.get_params(unquote=0), [('text/plain', ''),
('charset', '"iso-2022-jp"'),
('importance', '"high value"')])
msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
def test_del_param(self):
eq = self.assertEqual
msg = self._msgobj('msg_05.txt')
eq(msg.get_params(),
[('multipart/report', ''), ('report-type', 'delivery-status'),
('boundary', 'D1690A7AC1.996856090/mail.example.com')])
old_val = msg.get_param("report-type")
msg.del_param("report-type")
eq(msg.get_params(),
[('multipart/report', ''),
('boundary', 'D1690A7AC1.996856090/mail.example.com')])
msg.set_param("report-type", old_val)
eq(msg.get_params(),
[('multipart/report', ''),
('boundary', 'D1690A7AC1.996856090/mail.example.com'),
('report-type', old_val)])
def test_set_type(self):
eq = self.assertEqual
msg = Message()
self.assertRaises(ValueError, msg.set_type, 'text')
msg.set_type('text/plain')
eq(msg['content-type'], 'text/plain')
msg.set_param('charset', 'us-ascii')
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
msg.set_type('text/html')
eq(msg['content-type'], 'text/html; charset="us-ascii"')
# Test the email.Encoders module
......@@ -236,7 +337,6 @@ class TestEncoders(unittest.TestCase):
eq = self.assertEqual
msg = MIMEText('hello world', _encoder=Encoders.encode_noop)
eq(msg.get_payload(), 'hello world\n')
eq(msg['content-transfer-encoding'], None)
def test_encode_7bit(self):
eq = self.assertEqual
......@@ -253,6 +353,12 @@ class TestEncoders(unittest.TestCase):
eq(msg.get_payload(), 'hello \x80 world\n')
eq(msg['content-transfer-encoding'], '8bit')
def test_encode_empty_payload(self):
eq = self.assertEqual
msg = Message()
msg.set_charset('us-ascii')
eq(msg['content-transfer-encoding'], '7bit')
def test_encode_base64(self):
eq = self.assertEqual
msg = MIMEText('hello world', _encoder=Encoders.encode_base64)
......@@ -265,6 +371,23 @@ class TestEncoders(unittest.TestCase):
eq(msg.get_payload(), 'hello=20world\n')
eq(msg['content-transfer-encoding'], 'quoted-printable')
def test_default_cte(self):
eq = self.assertEqual
msg = MIMEText('hello world')
eq(msg['content-transfer-encoding'], '7bit')
def test_default_cte(self):
eq = self.assertEqual
# With no explicit _charset its us-ascii, and all are 7-bit
msg = MIMEText('hello world')
eq(msg['content-transfer-encoding'], '7bit')
# Similar, but with 8-bit data
msg = MIMEText('hello \xf8 world')
eq(msg['content-transfer-encoding'], '8bit')
# And now with a different charset
msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
eq(msg['content-transfer-encoding'], 'quoted-printable')
# Test long header wrapping
......@@ -279,7 +402,14 @@ class TestLongHeaders(unittest.TestCase):
sfp = StringIO()
g = Generator(sfp)
g(msg)
self.assertEqual(sfp.getvalue(), openfile('msg_18.txt').read())
self.assertEqual(sfp.getvalue(), '''\
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
''')
def test_no_semis_header_splitter(self):
msg = Message()
......@@ -314,6 +444,30 @@ References: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Test""")
def test_splitting_multiple_long_lines(self):
msg = Message()
msg['Received'] = """\
from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
"""
self.assertEqual(msg.as_string(), """\
Received: from babylon.socal-raves.org (localhost [127.0.0.1]);
by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
for <mailman-admin@babylon.socal-raves.org>;
Sat, 2 Feb 2002 17:00:06 -0800 (PST)
from babylon.socal-raves.org (localhost [127.0.0.1]);
by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
for <mailman-admin@babylon.socal-raves.org>;
Sat, 2 Feb 2002 17:00:06 -0800 (PST)
from babylon.socal-raves.org (localhost [127.0.0.1]);
by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
for <mailman-admin@babylon.socal-raves.org>;
Sat, 2 Feb 2002 17:00:06 -0800 (PST)
""")
# Test mangling of "From " lines in the body of a message
......@@ -476,6 +630,12 @@ class TestMIMEText(unittest.TestCase):
self.assertEqual(self._msg.get_payload(), 'hello there\n')
self.failUnless(not self._msg.is_multipart())
def test_charset(self):
eq = self.assertEqual
msg = MIMEText('hello there', _charset='us-ascii')
eq(msg.get_charset().input_charset, 'us-ascii')
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
# Test a more complicated multipart/mixed type message
......@@ -539,6 +699,82 @@ This is the dingus fish.
unless(not m0.is_multipart())
unless(not m1.is_multipart())
def test_no_parts_in_a_multipart(self):
outer = MIMEBase('multipart', 'mixed')
outer['Subject'] = 'A subject'
outer['To'] = 'aperson@dom.ain'
outer['From'] = 'bperson@dom.ain'
outer.preamble = ''
outer.epilogue = ''
outer.set_boundary('BOUNDARY')
msg = MIMEText('hello world')
self.assertEqual(outer.as_string(), '''\
Content-Type: multipart/mixed; boundary="BOUNDARY"
MIME-Version: 1.0
Subject: A subject
To: aperson@dom.ain
From: bperson@dom.ain
--BOUNDARY
--BOUNDARY--
''')
def test_one_part_in_a_multipart(self):
outer = MIMEBase('multipart', 'mixed')
outer['Subject'] = 'A subject'
outer['To'] = 'aperson@dom.ain'
outer['From'] = 'bperson@dom.ain'
outer.preamble = ''
outer.epilogue = ''
outer.set_boundary('BOUNDARY')
msg = MIMEText('hello world')
outer.attach(msg)
self.assertEqual(outer.as_string(), '''\
Content-Type: multipart/mixed; boundary="BOUNDARY"
MIME-Version: 1.0
Subject: A subject
To: aperson@dom.ain
From: bperson@dom.ain
--BOUNDARY
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
hello world
--BOUNDARY--
''')
def test_seq_parts_in_a_multipart(self):
outer = MIMEBase('multipart', 'mixed')
outer['Subject'] = 'A subject'
outer['To'] = 'aperson@dom.ain'
outer['From'] = 'bperson@dom.ain'
outer.preamble = ''
outer.epilogue = ''
msg = MIMEText('hello world')
outer.attach(msg)
outer.set_boundary('BOUNDARY')
self.assertEqual(outer.as_string(), '''\
Content-Type: multipart/mixed; boundary="BOUNDARY"
MIME-Version: 1.0
Subject: A subject
To: aperson@dom.ain
From: bperson@dom.ain
--BOUNDARY
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
hello world
--BOUNDARY--
''')
# Test some badly formatted messages
......@@ -551,7 +787,7 @@ class TestNonConformant(TestEmailBase):
self.failUnless(msg.get_subtype() is None)
def test_bogus_boundary(self):
fp = openfile('msg_15.txt')
fp = openfile(findfile('msg_15.txt'))
try:
data = fp.read()
finally:
......@@ -561,6 +797,10 @@ class TestNonConformant(TestEmailBase):
# message into the intended message tree.
self.assertRaises(Errors.BoundaryError, p.parsestr, data)
def test_multipart_no_boundary(self):
fp = openfile(findfile('msg_25.txt'))
self.assertRaises(Errors.BoundaryError, email.message_from_file, fp)
# Test RFC 2047 header encoding and decoding
......@@ -570,7 +810,7 @@ class TestRFC2047(unittest.TestCase):
s = '=?iso-8859-1?q?this=20is=20some=20text?='
eq(Utils.decode(s), 'this is some text')
s = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?='
eq(Utils.decode(s), u'Keld_J\xf8rn_Simonsen')
eq(Utils.decode(s), u'Keld J\xf8rn Simonsen')
s = '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=' \
'=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='
eq(Utils.decode(s), 'If you can read this you understand the example.')
......@@ -578,6 +818,8 @@ class TestRFC2047(unittest.TestCase):
eq(Utils.decode(s),
u'\u05dd\u05d5\u05dc\u05e9 \u05df\u05d1 \u05d9\u05dc\u05d8\u05e4\u05e0')
s = '=?iso-8859-1?q?this=20is?= =?iso-8859-1?q?some=20text?='
eq(Utils.decode(s), u'this issome text')
s = '=?iso-8859-1?q?this=20is_?= =?iso-8859-1?q?some=20text?='
eq(Utils.decode(s), u'this is some text')
def test_encode_header(self):
......@@ -794,6 +1036,10 @@ class TestIdempotent(unittest.TestCase):
msg, text = self._msgobj('msg_23.txt')
self._idempotent(msg, text)
def test_multipart_no_parts(self):
msg, text = self._msgobj('msg_24.txt')
self._idempotent(msg, text)
def test_content_type(self):
eq = self.assertEquals
# Get a message object and reset the seek pointer for other tests
......@@ -835,7 +1081,6 @@ class TestIdempotent(unittest.TestCase):
self.failUnless(isinstance(msg1.get_payload(), StringType))
eq(msg1.get_payload(), '\n')
# Test various other bits of the package's functionality
class TestMiscellaneous(unittest.TestCase):
......@@ -916,49 +1161,77 @@ class TestMiscellaneous(unittest.TestCase):
module = __import__('email')
all = module.__all__
all.sort()
self.assertEqual(all, ['Encoders', 'Errors', 'Generator', 'Iterators',
'MIMEAudio', 'MIMEBase', 'MIMEImage',
'MIMEMessage', 'MIMEText', 'Message', 'Parser',
'Utils',
'message_from_file', 'message_from_string'])
self.assertEqual(all, ['Charset', 'Encoders', 'Errors', 'Generator',
'Header', 'Iterators', 'MIMEAudio',
'MIMEBase', 'MIMEImage', 'MIMEMessage',
'MIMEText', 'Message', 'Parser',
'Utils', 'base64MIME',
'message_from_file', 'message_from_string',
'quopriMIME'])
def test_formatdate(self):
now = 1005327232.109884
gm_epoch = time.gmtime(0)[0:3]
loc_epoch = time.localtime(0)[0:3]
# When does the epoch start?
if gm_epoch == (1970, 1, 1):
# traditional Unix epoch
matchdate = 'Fri, 09 Nov 2001 17:33:52 -0000'
elif loc_epoch == (1904, 1, 1):
# Mac epoch
matchdate = 'Sat, 09 Nov 1935 16:33:52 -0000'
else:
matchdate = "I don't understand your epoch"
gdate = Utils.formatdate(now)
self.assertEqual(gdate, matchdate)
now = time.time()
self.assertEqual(Utils.parsedate(Utils.formatdate(now))[:6],
time.gmtime(now)[:6])
def test_formatdate_localtime(self):
now = 1005327232.109884
ldate = Utils.formatdate(now, localtime=1)
zone = ldate.split()[5]
offset = int(zone[1:3]) * 3600 + int(zone[-2:]) * 60
# Remember offset is in seconds west of UTC, but the timezone is in
# minutes east of UTC, so the signs differ.
if zone[0] == '+':
offset = -offset
if time.daylight and time.localtime(now)[-1]:
toff = time.altzone
else:
toff = time.timezone
self.assertEqual(offset, toff)
now = time.time()
self.assertEqual(
Utils.parsedate(Utils.formatdate(now, localtime=1))[:6],
time.localtime(now)[:6])
def test_parsedate_none(self):
self.assertEqual(Utils.parsedate(''), None)
def test_parseaddr_empty(self):
self.assertEqual(Utils.parseaddr('<>'), ('', ''))
self.assertEqual(Utils.dump_address_pair(Utils.parseaddr('<>')), '')
self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '')
def test_noquote_dump(self):
self.assertEqual(
Utils.formataddr(('A Silly Person', 'person@dom.ain')),
'A Silly Person <person@dom.ain>')
def test_escape_dump(self):
self.assertEqual(
Utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
r'"A \(Very\) Silly Person" <person@dom.ain>')
a = r'A \(Special\) Person'
b = 'person@dom.ain'
self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
def test_quote_dump(self):
self.assertEqual(
Utils.formataddr(('A Silly; Person', 'person@dom.ain')),
r'"A Silly; Person" <person@dom.ain>')
def test_fix_eols(self):
eq = self.assertEqual
eq(Utils.fix_eols('hello'), 'hello')
eq(Utils.fix_eols('hello\n'), 'hello\r\n')
eq(Utils.fix_eols('hello\r'), 'hello\r\n')
eq(Utils.fix_eols('hello\r\n'), 'hello\r\n')
eq(Utils.fix_eols('hello\n\r'), 'hello\r\n\r\n')
def test_charset_richcomparisons(self):
eq = self.assertEqual
ne = self.failIfEqual
cset1 = Charset()
cset2 = Charset()
eq(cset1, 'us-ascii')
eq(cset1, 'US-ASCII')
eq(cset1, 'Us-AsCiI')
eq('us-ascii', cset1)
eq('US-ASCII', cset1)
eq('Us-AsCiI', cset1)
ne(cset1, 'usascii')
ne(cset1, 'USASCII')
ne(cset1, 'UsAsCiI')
ne('usascii', cset1)
ne('USASCII', cset1)
ne('UsAsCiI', cset1)
eq(cset1, cset2)
eq(cset2, cset1)
......@@ -983,8 +1256,12 @@ class TestIterators(TestEmailBase):
eq = self.assertEqual
msg = self._msgobj('msg_04.txt')
it = Iterators.typed_subpart_iterator(msg, 'text')
lines = [subpart.get_payload() for subpart in it]
eq(len(lines), 2)
lines = []
subparts = 0
for subpart in it:
subparts += 1
lines.append(subpart.get_payload())
eq(subparts, 2)
eq(EMPTYSTRING.join(lines), """\
a simple kind of mirror
to reflect upon our own
......@@ -1011,6 +1288,7 @@ Do you like this message?
-Me
""")
class TestParsers(unittest.TestCase):
def test_header_parser(self):
......@@ -1025,6 +1303,274 @@ class TestParsers(unittest.TestCase):
eq(msg.is_multipart(), 0)
self.failUnless(isinstance(msg.get_payload(), StringType))
def test_whitespace_continuaton(self):
eq = self.assertEqual
# This message contains a line after the Subject: header that has only
# whitespace, but it is not empty!
msg = email.message_from_string("""\
From: aperson@dom.ain
To: bperson@dom.ain
Subject: the next line has a space on it
Date: Mon, 8 Apr 2002 15:09:19 -0400
Message-ID: spam
Here's the message body
""")
eq(msg['subject'], 'the next line has a space on it\n ')
eq(msg['message-id'], 'spam')
eq(msg.get_payload(), "Here's the message body\n")
class TestBase64(unittest.TestCase):
def test_len(self):
eq = self.assertEqual
eq(base64MIME.base64_len('hello'),
len(base64MIME.encode('hello', eol='')))
for size in range(15):
if size == 0 : bsize = 0
elif size <= 3 : bsize = 4
elif size <= 6 : bsize = 8
elif size <= 9 : bsize = 12
elif size <= 12: bsize = 16
else : bsize = 20
eq(base64MIME.base64_len('x'*size), bsize)
def test_decode(self):
eq = self.assertEqual
eq(base64MIME.decode(''), '')
eq(base64MIME.decode('aGVsbG8='), 'hello')
eq(base64MIME.decode('aGVsbG8=', 'X'), 'hello')
eq(base64MIME.decode('aGVsbG8NCndvcmxk\n', 'X'), 'helloXworld')
def test_encode(self):
eq = self.assertEqual
eq(base64MIME.encode(''), '')
eq(base64MIME.encode('hello'), 'aGVsbG8=\n')
# Test the binary flag
eq(base64MIME.encode('hello\n'), 'aGVsbG8K\n')
eq(base64MIME.encode('hello\n', 0), 'aGVsbG8NCg==\n')
# Test the maxlinelen arg
eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40), """\
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
eHh4eCB4eHh4IA==
""")
# Test the eol argument
eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
eHh4eCB4eHh4IA==\r
""")
def test_header_encode(self):
eq = self.assertEqual
he = base64MIME.header_encode
eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
# Test the charset option
eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
# Test the keep_eols flag
eq(he('hello\nworld', keep_eols=1),
'=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
# Test the maxlinelen argument
eq(he('xxxx ' * 20, maxlinelen=40), """\
=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=
=?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=
=?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=
=?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=
=?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=
=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""")
# Test the eol argument
eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=\r
=?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=\r
=?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=\r
=?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=\r
=?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=\r
=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""")
class TestQuopri(unittest.TestCase):
def setUp(self):
self.hlit = [chr(x) for x in range(ord('a'), ord('z')+1)] + \
[chr(x) for x in range(ord('A'), ord('Z')+1)] + \
[chr(x) for x in range(ord('0'), ord('9')+1)] + \
['!', '*', '+', '-', '/', ' ']
self.hnon = [chr(x) for x in range(256) if chr(x) not in self.hlit]
assert len(self.hlit) + len(self.hnon) == 256
self.blit = [chr(x) for x in range(ord(' '), ord('~')+1)] + ['\t']
self.blit.remove('=')
self.bnon = [chr(x) for x in range(256) if chr(x) not in self.blit]
assert len(self.blit) + len(self.bnon) == 256
def test_header_quopri_check(self):
for c in self.hlit:
self.failIf(quopriMIME.header_quopri_check(c))
for c in self.hnon:
self.failUnless(quopriMIME.header_quopri_check(c))
def test_body_quopri_check(self):
for c in self.blit:
self.failIf(quopriMIME.body_quopri_check(c))
for c in self.bnon:
self.failUnless(quopriMIME.body_quopri_check(c))
def test_header_quopri_len(self):
eq = self.assertEqual
hql = quopriMIME.header_quopri_len
enc = quopriMIME.header_encode
for s in ('hello', 'h@e@l@l@o@'):
# Empty charset and no line-endings. 7 == RFC chrome
eq(hql(s), len(enc(s, charset='', eol=''))-7)
for c in self.hlit:
eq(hql(c), 1)
for c in self.hnon:
eq(hql(c), 3)
def test_body_quopri_len(self):
eq = self.assertEqual
bql = quopriMIME.body_quopri_len
for c in self.blit:
eq(bql(c), 1)
for c in self.bnon:
eq(bql(c), 3)
def test_quote_unquote_idempotent(self):
for x in range(256):
c = chr(x)
self.assertEqual(quopriMIME.unquote(quopriMIME.quote(c)), c)
def test_header_encode(self):
eq = self.assertEqual
he = quopriMIME.header_encode
eq(he('hello'), '=?iso-8859-1?q?hello?=')
eq(he('hello\nworld'), '=?iso-8859-1?q?hello=0D=0Aworld?=')
# Test the charset option
eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
# Test the keep_eols flag
eq(he('hello\nworld', keep_eols=1), '=?iso-8859-1?q?hello=0Aworld?=')
# Test a non-ASCII character
eq(he('hellothere'), '=?iso-8859-1?q?hello=C7there?=')
# Test the maxlinelen argument
eq(he('xxxx ' * 20, maxlinelen=40), """\
=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
=?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
=?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=
=?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=
=?iso-8859-1?q?x_xxxx_xxxx_?=""")
# Test the eol argument
eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=\r
=?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=\r
=?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=\r
=?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=\r
=?iso-8859-1?q?x_xxxx_xxxx_?=""")
def test_decode(self):
eq = self.assertEqual
eq(quopriMIME.decode(''), '')
eq(quopriMIME.decode('hello'), 'hello')
eq(quopriMIME.decode('hello', 'X'), 'hello')
eq(quopriMIME.decode('hello\nworld', 'X'), 'helloXworld')
def test_encode(self):
eq = self.assertEqual
eq(quopriMIME.encode(''), '')
eq(quopriMIME.encode('hello'), 'hello')
# Test the binary flag
eq(quopriMIME.encode('hello\r\nworld'), 'hello\nworld')
eq(quopriMIME.encode('hello\r\nworld', 0), 'hello\nworld')
# Test the maxlinelen arg
eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40), """\
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
x xxxx xxxx xxxx xxxx=20""")
# Test the eol argument
eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
x xxxx xxxx xxxx xxxx=20""")
eq(quopriMIME.encode("""\
one line
two line"""), """\
one line
two line""")
# Test the Charset class
class TestCharset(unittest.TestCase):
def test_idempotent(self):
eq = self.assertEqual
# Make sure us-ascii = no Unicode conversion
c = Charset('us-ascii')
s = 'Hello World!'
sp = c.to_splittable(s)
eq(s, c.from_splittable(sp))
# test 8-bit idempotency with us-ascii
s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
sp = c.to_splittable(s)
eq(s, c.from_splittable(sp))
# Test multilingual MIME headers.
class TestHeader(unittest.TestCase):
def test_simple(self):
eq = self.assertEqual
h = Header('Hello World!')
eq(h.encode(), 'Hello World!')
h.append('Goodbye World!')
eq(h.encode(), 'Hello World! Goodbye World!')
def test_header_needs_no_decoding(self):
h = 'no decoding needed'
self.assertEqual(decode_header(h), [(h, None)])
def test_long(self):
h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
maxlinelen=76)
for l in h.encode().split('\n '):
self.failUnless(len(l) <= 76)
def test_multilingual(self):
eq = self.assertEqual
g = Charset("iso-8859-1")
cz = Charset("iso-8859-2")
utf8 = Charset("utf-8")
g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
h = Header(g_head, g)
h.append(cz_head, cz)
h.append(utf8_head, utf8)
enc = h.encode()
eq(enc, """=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
=?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
=?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
=?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
=?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
=?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
=?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
=?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
=?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
=?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
=?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
=?utf-8?b?cyBOdW5zdHVjayBnaXQgdW5k?=
=?utf-8?b?IFNsb3Rlcm1leWVyPyBKYSEgQmVpaGVyaHVuZCBkYXMgT2Rl?=
=?utf-8?b?ciBkaWUgRmxpcHBlcndhbGR0?=
=?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
eq(decode_header(enc),
[(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
(utf8_head, "utf-8")])
def suite():
......@@ -1044,13 +1590,13 @@ def suite():
suite.addTest(unittest.makeSuite(TestMiscellaneous))
suite.addTest(unittest.makeSuite(TestIterators))
suite.addTest(unittest.makeSuite(TestParsers))
suite.addTest(unittest.makeSuite(TestBase64))
suite.addTest(unittest.makeSuite(TestQuopri))
suite.addTest(unittest.makeSuite(TestHeader))
suite.addTest(unittest.makeSuite(TestCharset))
return suite
def test_main():
from test_support import run_suite
run_suite(suite())
if __name__ == '__main__':
test_main()
unittest.main(defaultTest='suite')
# Copyright (C) 2002 Python Software Foundation
# email package unit tests for (optional) Asian codecs
import unittest
from test_support import TestSkipped
from email.Charset import Charset
from email.Header import Header, decode_header
# See if we have the Japanese codecs package installed
try:
unicode('foo', 'japanese.iso-2022-jp')
except LookupError:
raise TestSkipped, 'Optional Japanese codecs not installed'
class TestEmailAsianCodecs(unittest.TestCase):
def test_japanese_codecs(self):
eq = self.assertEqual
j = Charset("euc-jp")
g = Charset("iso-8859-1")
h = Header("Hello World!")
jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
ghello = 'Gr\xfc\xdf Gott!'
h.append(jhello, j)
h.append(ghello, g)
eq(h.encode(), 'Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=\n =?iso-8859-1?q?Gr=FC=DF_Gott!?=')
eq(decode_header(h.encode()),
[('Hello World!', None),
('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
('Gr\xfc\xdf Gott!', 'iso-8859-1')])
long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
h = Header(long, j, header_name="Subject")
# test a very long header
enc = h.encode()
eq(enc, '=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=\n =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?=\n =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=')
eq(decode_header(enc), [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
def suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestEmailAsianCodecs))
return suite
if __name__ == '__main__':
unittest.main(defaultTest='suite')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment