Commit 409a4c08 authored by Barry Warsaw's avatar Barry Warsaw

Sync'ing with standalone email package 2.0.1. This adds support for

non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).
parent 68e69338
This diff is collapsed.
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Module containing encoding functions for Image.Image and Text.Text.
......@@ -11,7 +11,9 @@ from quopri import encodestring as _encodestring
# Helpers
def _qencode(s):
return _encodestring(s, quotetabs=1)
enc = _encodestring(s, quotetabs=1)
# Must encode spaces, which quopri.encodestring() doesn't do
return enc.replace(' ', '=20')
def _bencode(s):
......@@ -54,6 +56,10 @@ def encode_quopri(msg):
def encode_7or8bit(msg):
"""Set the Content-Transfer-Encoding: header to 7bit or 8bit."""
orig = msg.get_payload()
if orig is None:
# There's no payload. For backwards compatibility we use 7bit
msg['Content-Transfer-Encoding'] = '7bit'
return
# We play a trick to make this go fast. If encoding to ASCII succeeds, we
# know the data must be 7bit, otherwise treat it as 8bit.
try:
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""email package exception classes.
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Classes to generate plain text from a message object tree.
......@@ -166,30 +166,33 @@ class Generator:
return text
rtn = []
for line in text.split('\n'):
splitline = []
# Short lines can remain unchanged
if len(line.replace('\t', SPACE8)) <= maxheaderlen:
rtn.append(line)
SEMINLTAB.join(rtn)
splitline.append(line)
rtn.append(SEMINLTAB.join(splitline))
else:
oldlen = len(text)
oldlen = len(line)
# Try to break the line on semicolons, but if that doesn't
# work, try to split on folding whitespace.
while len(text) > maxheaderlen:
i = text.rfind(';', 0, maxheaderlen)
while len(line) > maxheaderlen:
i = line.rfind(';', 0, maxheaderlen)
if i < 0:
break
rtn.append(text[:i])
text = text[i+1:].lstrip()
if len(text) <> oldlen:
splitline.append(line[:i])
line = line[i+1:].lstrip()
if len(line) <> oldlen:
# Splitting on semis worked
rtn.append(text)
return SEMINLTAB.join(rtn)
splitline.append(line)
rtn.append(SEMINLTAB.join(splitline))
continue
# Splitting on semis didn't help, so try to split on
# whitespace.
parts = re.split(r'(\s+)', text)
parts = re.split(r'(\s+)', line)
# Watch out though for "Header: longnonsplittableline"
if parts[0].endswith(':') and len(parts) == 3:
return text
rtn.append(line)
continue
first = parts.pop(0)
sublines = [first]
acc = len(first)
......@@ -203,13 +206,14 @@ class Generator:
else:
# Split it here, but don't forget to ignore the
# next whitespace-only part
rtn.append(EMPTYSTRING.join(sublines))
splitline.append(EMPTYSTRING.join(sublines))
del parts[0]
first = parts.pop(0)
sublines = [first]
acc = len(first)
rtn.append(EMPTYSTRING.join(sublines))
return NLTAB.join(rtn)
splitline.append(EMPTYSTRING.join(sublines))
rtn.append(NLTAB.join(splitline))
return NL.join(rtn)
#
# Handlers for writing types and subtypes
......@@ -219,6 +223,9 @@ class Generator:
payload = msg.get_payload()
if payload is None:
return
cset = msg.get_charset()
if cset is not None:
payload = cset.body_encode(payload)
if not isinstance(payload, StringType):
raise TypeError, 'string payload expected: %s' % type(payload)
if self._mangle_from_:
......@@ -233,7 +240,18 @@ class Generator:
# together, and then make sure that the boundary we've chosen isn't
# present in the payload.
msgtexts = []
for part in msg.get_payload():
subparts = msg.get_payload()
if subparts is None:
# Nothing has every been attached
boundary = msg.get_boundary(failobj=_make_boundary())
print >> self._fp, '--' + boundary
print >> self._fp, '\n'
print >> self._fp, '--' + boundary + '--'
return
elif not isinstance(subparts, ListType):
# Scalar payload
subparts = [subparts]
for part in subparts:
s = StringIO()
g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
g(part, unixfrom=0)
......@@ -365,7 +383,7 @@ class DecodedGenerator(Generator):
# Helper
def _make_boundary(self, text=None):
def _make_boundary(text=None):
# Craft a random boundary. If text is given, ensure that the chosen
# boundary doesn't appear in the text.
boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
......
# Copyright (C) 2002 Python Software Foundation
# Author: che@debian.org (Ben Gertzfield)
"""Header encoding and decoding functionality."""
import re
import email.quopriMIME
import email.base64MIME
from email.Charset import Charset
CRLFSPACE = '\r\n '
CRLF = '\r\n'
NLSPACE = '\n '
MAXLINELEN = 76
ENCODE = 1
DECODE = 2
# Match encoded-word strings in the form =?charset?q?Hello_World?=
ecre = re.compile(r'''
=\? # literal =?
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
\? # literal ?
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
\? # literal ?
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
\?= # literal ?=
''', re.VERBOSE | re.IGNORECASE)
# Helpers
_max_append = email.quopriMIME._max_append
def decode_header(header):
"""Decode a message header value without converting charset.
Returns a list of (decoded_string, charset) pairs containing each of the
decoded parts of the header. Charset is None for non-encoded parts of the
header, otherwise a lower-case string containing the name of the character
set specified in the encoded string.
"""
# If no encoding, just return the header
header = str(header)
if not ecre.search(header):
return [(header, None)]
decoded = []
dec = ''
for line in header.splitlines():
# This line might not have an encoding in it
if not ecre.search(line):
decoded.append((line, None))
continue
parts = ecre.split(line)
while parts:
unenc = parts.pop(0).strip()
if unenc:
# Should we continue a long line?
if decoded and decoded[-1][1] is None:
decoded[-1] = (decoded[-1][0] + dec, None)
else:
decoded.append((unenc, None))
if parts:
charset, encoding = [s.lower() for s in parts[0:2]]
encoded = parts[2]
dec = ''
if encoding == 'q':
dec = email.quopriMIME.header_decode(encoded)
elif encoding == 'b':
dec = email.base64MIME.decode(encoded)
else:
dec = encoded
if decoded and decoded[-1][1] == charset:
decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
else:
decoded.append((dec, charset))
del parts[0:3]
return decoded
class Header:
def __init__(self, s, charset=None, maxlinelen=MAXLINELEN,
header_name=None):
"""Create a MIME-compliant header that can contain many languages.
Specify the initial header value in s. Specify its character set as a
Charset object in the charset argument. If none, a default Charset
instance will be used.
You can later append to the header with append(s, charset) below;
charset does not have to be the same as the one initially specified
here. In fact, it's optional, and if not given, defaults to the
charset specified in the constructor.
The maximum line length can either be specified by maxlinelen, or you
can pass in the name of the header field (e.g. "Subject") to let this
class guess the best line length to use to prevent wrapping. The
default maxlinelen is 76.
"""
if charset is None:
charset = Charset()
self._charset = charset
# BAW: I believe `chunks' and `maxlinelen' should be non-public.
self._chunks = []
self.append(s, charset)
self._maxlinelen = maxlinelen
if header_name is not None:
self.guess_maxlinelen(header_name)
def __str__(self):
"""A synonym for self.encode()."""
return self.encode()
def guess_maxlinelen(self, s=None):
"""Guess the maximum length to make each header line.
Given a header name (e.g. "Subject"), set this header's maximum line
length to an appropriate length to avoid line wrapping. If s is not
given, return the previous maximum line length and don't set it.
Returns the new maximum line length.
"""
# BAW: is this semantic necessary?
if s is not None:
self._maxlinelen = MAXLINELEN - len(s) - 2
return self._maxlinelen
def append(self, s, charset=None):
"""Append string s with Charset charset to the MIME header.
charset defaults to the one given in the class constructor.
"""
if charset is None:
charset = self._charset
self._chunks.append((s, charset))
def _split(self, s, charset):
# Split up a header safely for use with encode_chunks. BAW: this
# appears to be a private convenience method.
splittable = charset.to_splittable(s)
encoded = charset.from_splittable(splittable)
if charset.encoded_header_len(encoded) < self._maxlinelen:
return [(encoded, charset)]
else:
# Divide and conquer. BAW: halfway depends on integer division.
# When porting to Python 2.2, use the // operator.
halfway = len(splittable) // 2
first = charset.from_splittable(splittable[:halfway], 0)
last = charset.from_splittable(splittable[halfway:], 0)
return self._split(first, charset) + self._split(last, charset)
def encode(self):
"""Encode a message header, possibly converting charset and encoding.
There are many issues involved in converting a given string for use in
an email header. Only certain character sets are readable in most
email clients, and as header strings can only contain a subset of
7-bit ASCII, care must be taken to properly convert and encode (with
Base64 or quoted-printable) header strings. In addition, there is a
75-character length limit on any given encoded header field, so
line-wrapping must be performed, even with double-byte character sets.
This method will do its best to convert the string to the correct
character set used in email, and encode and line wrap it safely with
the appropriate scheme for that character set.
If the given charset is not known or an error occurs during
conversion, this function will return the header untouched.
"""
newchunks = []
for s, charset in self._chunks:
newchunks += self._split(s, charset)
self._chunks = newchunks
return self.encode_chunks()
def encode_chunks(self):
"""MIME-encode a header with many different charsets and/or encodings.
Given a list of pairs (string, charset), return a MIME-encoded string
suitable for use in a header field. Each pair may have different
charsets and/or encodings, and the resulting header will accurately
reflect each setting.
Each encoding can be email.Utils.QP (quoted-printable, for ASCII-like
character sets like iso-8859-1), email.Utils.BASE64 (Base64, for
non-ASCII like character sets like KOI8-R and iso-2022-jp), or None
(no encoding).
Each pair will be represented on a separate line; the resulting string
will be in the format:
"=?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
=?charset2?b?SvxyZ2VuIEL2aW5n?="
"""
chunks = []
for header, charset in self._chunks:
if charset is None:
_max_append(chunks, header, self._maxlinelen, ' ')
else:
_max_append(chunks, charset.header_encode(header, 0),
self._maxlinelen, ' ')
return NLSPACE.join(chunks)
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Various types of useful iterators and generators.
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Base class for MIME specializations.
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Class representing image/* type MIME documents.
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Class representing message/* MIME documents.
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Class representing text/* type MIME documents.
"""
import warnings
import MIMEBase
from Encoders import encode_7or8bit
......@@ -13,7 +14,7 @@ class MIMEText(MIMEBase.MIMEBase):
"""Class for generating text/* type MIME documents."""
def __init__(self, _text, _subtype='plain', _charset='us-ascii',
_encoder=encode_7or8bit):
_encoder=None):
"""Create a text/* type MIME document.
_text is the string for this message object. If the text does not end
......@@ -22,20 +23,26 @@ class MIMEText(MIMEBase.MIMEBase):
_subtype is the MIME sub content type, defaulting to "plain".
_charset is the character set parameter added to the Content-Type:
header. This defaults to "us-ascii".
_encoder is a function which will perform the actual encoding for
transport of the text data. It takes one argument, which is this
Text instance. It should use get_payload() and set_payload() to
change the payload to the encoded form. It should also add any
Content-Transfer-Encoding: or other headers to the message as
necessary. The default encoding doesn't actually modify the payload,
but it does set Content-Transfer-Encoding: to either `7bit' or `8bit'
as appropriate.
header. This defaults to "us-ascii". Note that as a side-effect, the
Content-Transfer-Encoding: header will also be set.
The use of the _encoder is deprecated. The encoding of the payload,
and the setting of the character set parameter now happens implicitly
based on the _charset argument. If _encoder is supplied, then a
DeprecationWarning is used, and the _encoder functionality may
override any header settings indicated by _charset. This is probably
not what you want.
"""
MIMEBase.MIMEBase.__init__(self, 'text', _subtype,
**{'charset': _charset})
if _text and _text[-1] <> '\n':
_text += '\n'
self.set_payload(_text)
self.set_payload(_text, _charset)
if _encoder is not None:
warnings.warn('_encoder argument is obsolete.',
DeprecationWarning, 2)
# Because set_payload() with a _charset will set its own
# Content-Transfer-Encoding: header, we need to delete the
# existing one or will end up with two of them. :(
del self['content-transfer-encoding']
_encoder(self)
This diff is collapsed.
......@@ -51,9 +51,16 @@ class Parser:
lastvalue = []
lineno = 0
while 1:
line = fp.readline()[:-1]
if not line or not line.strip():
# Don't strip the line before we test for the end condition,
# because whitespace-only header lines are RFC compliant
# continuation lines.
line = fp.readline()
if not line:
break
line = line.splitlines()[0]
if not line:
break
# Ignore the trailing newline
lineno += 1
# Check for initial Unix From_ line
if line.startswith('From '):
......@@ -63,7 +70,6 @@ class Parser:
else:
raise Errors.HeaderParseError(
'Unix-from in headers after first rfc822 header')
#
# Header continuation line
if line[0] in ' \t':
if not lastheader:
......@@ -134,11 +140,11 @@ class Parser:
msgobj = self.parsestr(part)
container.preamble = preamble
container.epilogue = epilogue
# Ensure that the container's payload is a list
if not isinstance(container.get_payload(), ListType):
container.set_payload([msgobj])
else:
container.add_payload(msgobj)
container.attach(msgobj)
elif container.get_main_type() == 'multipart':
# Very bad. A message is a multipart with no boundary!
raise Errors.BoundaryError(
'multipart message with no defined boundary')
elif container.get_type() == 'message/delivery-status':
# This special kind of type contains blocks of headers separated
# by a blank line. We'll represent each header block as a
......@@ -160,9 +166,9 @@ class Parser:
except Errors.HeaderParseError:
msg = self._class()
self._parsebody(msg, fp)
container.add_payload(msg)
container.set_payload(msg)
else:
container.add_payload(fp.read())
container.set_payload(fp.read())
......
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Miscellaneous utilities.
"""
import time
import socket
import re
import random
import os
import warnings
from cStringIO import StringIO
from types import ListType
from rfc822 import unquote, quote, parseaddr
from rfc822 import dump_address_pair
from rfc822 import unquote, quote
from rfc822 import AddrlistClass as _AddrlistClass
from rfc822 import parsedate_tz, parsedate, mktime_tz
from rfc822 import mktime_tz
# We need wormarounds for bugs in these methods in older Pythons (see below)
from rfc822 import parsedate as _parsedate
from rfc822 import parsedate_tz as _parsedate_tz
from rfc822 import parseaddr as _parseaddr
from quopri import decodestring as _qdecode
import base64
......@@ -20,6 +30,10 @@ from Encoders import _bencode, _qencode
COMMASPACE = ', '
UEMPTYSTRING = u''
CRLF = '\r\n'
specialsre = re.compile(r'[][\()<>@,:;".]')
escapesre = re.compile(r'[][\()"]')
......@@ -43,6 +57,41 @@ def _bdecode(s):
return value
def fix_eols(s):
"""Replace all line-ending characters with \r\n."""
# Fix newlines with no preceding carriage return
s = re.sub(r'(?<!\r)\n', CRLF, s)
# Fix carriage returns with no following newline
s = re.sub(r'\r(?!\n)', CRLF, s)
return s
def formataddr(pair):
"""The inverse of parseaddr(), this takes a 2-tuple of the form
(realname, email_address) and returns the string value suitable
for an RFC 2822 From:, To: or Cc:.
If the first element of pair is false, then the second element is
returned unmodified.
"""
name, address = pair
if name:
quotes = ''
if specialsre.search(name):
quotes = '"'
name = escapesre.sub(r'\\\g<0>', name)
return '%s%s%s <%s>' % (quotes, name, quotes, address)
return address
# For backwards compatibility
def dump_address_pair(pair):
warnings.warn('Use email.Utils.formataddr() instead',
DeprecationWarning, 2)
return formataddr(pair)
def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
......@@ -64,30 +113,26 @@ ecre = re.compile(r'''
def decode(s):
"""Return a decoded string according to RFC 2047, as a unicode string."""
"""Return a decoded string according to RFC 2047, as a unicode string.
NOTE: This function is deprecated. Use Header.decode_header() instead.
"""
warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2)
# Intra-package import here to avoid circular import problems.
from Header import decode_header
L = decode_header(s)
if not isinstance(L, ListType):
# s wasn't decoded
return s
rtn = []
parts = ecre.split(s, 1)
while parts:
# If there are less than 4 parts, it can't be encoded and we're done
if len(parts) < 5:
rtn.extend(parts)
break
# The first element is any non-encoded leading text
rtn.append(parts[0])
charset = parts[1]
encoding = parts[2].lower()
atom = parts[3]
# The next chunk to decode should be in parts[4]
parts = ecre.split(parts[4])
# The encoding must be either `q' or `b', case-insensitive
if encoding == 'q':
func = _qdecode
elif encoding == 'b':
func = _bdecode
for atom, charset in L:
if charset is None:
rtn.append(atom)
else:
func = _identity
# Decode and get the unicode in the charset
rtn.append(unicode(func(atom), charset))
# Convert the string to Unicode using the given encoding. Leave
# Unicode conversion errors to strict.
rtn.append(unicode(atom, charset))
# Now that we've decoded everything, we just need to join all the parts
# together into the final string.
return UEMPTYSTRING.join(rtn)
......@@ -96,6 +141,7 @@ def decode(s):
def encode(s, charset='iso-8859-1', encoding='q'):
"""Encode a string according to RFC 2047."""
warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2)
encoding = encoding.lower()
if encoding == 'q':
estr = _qencode(s)
......@@ -150,3 +196,48 @@ def formatdate(timeval=None, localtime=0):
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
now[0], now[3], now[4], now[5],
zone)
def make_msgid(idstring=None):
"""Returns a string suitable for RFC 2822 compliant Message-ID:, e.g:
<20020201195627.33539.96671@nightshade.la.mastaler.com>
Optional idstring if given is a string used to strengthen the
uniqueness of the Message-ID, otherwise an empty string is used.
"""
timeval = time.time()
utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
pid = os.getpid()
randint = random.randrange(100000)
if idstring is None:
idstring = ''
else:
idstring = '.' + idstring
idhost = socket.getfqdn()
msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
return msgid
# These functions are in the standalone mimelib version only because they've
# subsequently been fixed in the latest Python versions. We use this to worm
# around broken older Pythons.
def parsedate(data):
if not data:
return None
return _parsedate(data)
def parsedate_tz(data):
if not data:
return None
return _parsedate_tz(data)
def parseaddr(addr):
realname, emailaddr = _parseaddr(addr)
if realname == '' and emailaddr is None:
return '', ''
return realname, emailaddr
# Copyright (C) 2001 Python Software Foundation
# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""A package for parsing, handling, and generating email messages.
"""
__version__ = '1.0'
__version__ = '2.0'
__all__ = ['Encoders',
__all__ = ['Charset',
'Encoders',
'Errors',
'Generator',
'Header',
'Iterators',
'MIMEAudio',
'MIMEBase',
......@@ -18,6 +20,8 @@ __all__ = ['Encoders',
'Message',
'Parser',
'Utils',
'base64MIME',
'quopriMIME',
'message_from_string',
'message_from_file',
]
......
# Copyright (C) 2002 Python Software Foundation
# Author: che@debian.org (Ben Gertzfield)
"""Base64 content transfer encoding per RFCs 2045-2047.
This module handles the content transfer encoding method defined in RFC 2045
to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
characters encoding known as Base64.
It is used in the MIME standards for email to attach images, audio, and text
using some 8-bit character sets to messages.
This module provides an interface to encode and decode both headers and bodies
with Base64 encoding.
RFC 2045 defines a method for including character set information in an
`encoded-word' in a header. This method is commonly used for 8-bit real names
in To:, From:, Cc:, etc. fields, as well as Subject: lines.
This module does not do the line wrapping or end-of-line character conversion
necessary for proper internationalized headers; it only does dumb encoding and
decoding. To deal with the various line wrapping issues, use the email.Header
module.
"""
import re
from binascii import b2a_base64, a2b_base64
from email.Utils import fix_eols
CRLF = '\r\n'
NL = '\n'
EMPTYSTRING = ''
# See also Charset.py
MISC_LEN = 7
# Helpers
def base64_len(s):
"""Return the length of s when it is encoded with base64."""
groups_of_3, leftover = divmod(len(s), 3)
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
# Thanks, Tim!
n = groups_of_3 * 4
if leftover:
n += 4
return n
def header_encode(header, charset='iso-8859-1', keep_eols=0, maxlinelen=76,
eol=NL):
"""Encode a single header line with Base64 encoding in a given charset.
Defined in RFC 2045, this Base64 encoding is identical to normal Base64
encoding, except that each line must be intelligently wrapped (respecting
the Base64 encoding), and subsequent lines must start with a space.
charset names the character set to use to encode the header. It defaults
to iso-8859-1.
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
to the canonical email line separator \\r\\n unless the keep_eols
parameter is set to true (the default is false).
Each line of the header will be terminated in the value of eol, which
defaults to "\\n". Set this to "\\r\\n" if you are using the result of
this function directly in email.
The resulting string will be in the form:
"=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
=?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
with each line wrapped at, at most, maxlinelen characters (defaults to 76
characters).
"""
# Return empty headers unchanged
if not header:
return header
if not keep_eols:
header = fix_eols(header)
# Base64 encode each line, in encoded chunks no greater than maxlinelen in
# length, after the RFC chrome is added in.
base64ed = []
max_encoded = maxlinelen - len(charset) - MISC_LEN
max_unencoded = max_encoded * 3 / 4
# BAW: Ben's original code used a step of max_unencoded, but I think it
# ought to be max_encoded. Otherwise, where's max_encoded used? I'm
# still not sure what the
for i in range(0, len(header), max_unencoded):
base64ed.append(b2a_base64(header[i:i+max_unencoded]))
# Now add the RFC chrome to each encoded chunk
lines = []
for line in base64ed:
# Ignore the last character of each line if it is a newline
if line[-1] == NL:
line = line[:-1]
# Add the chrome
lines.append('=?%s?b?%s?=' % (charset, line))
# Glue the lines together and return it. BAW: should we be able to
# specify the leading whitespace in the joiner?
joiner = eol + ' '
return joiner.join(lines)
def encode(s, binary=1, maxlinelen=76, eol=NL):
"""Encode a string with base64.
Each line will be wrapped at, at most, maxlinelen characters (defaults to
76 characters).
If binary is false, end-of-line characters will be converted to the
canonical email end-of-line sequence \\r\\n. Otherwise they will be left
verbatim (this is the default).
Each line of encoded text will end with eol, which defaults to "\\n". Set
this to "\r\n" if you will be using the result of this function directly
in an email.
"""
if not s:
return s
if not binary:
s = fix_eols(s)
encvec = []
max_unencoded = maxlinelen * 3 / 4
for i in range(0, len(s), max_unencoded):
# BAW: should encode() inherit b2a_base64()'s dubious behavior in
# adding a newline to the encoded string?
enc = b2a_base64(s[i:i + max_unencoded])
if enc[-1] == NL and eol <> NL:
enc = enc[:-1] + eol
encvec.append(enc)
return EMPTYSTRING.join(encvec)
# For convenience and backwards compatibility w/ standard base64 module
body_encode = encode
encodestring = encode
def decode(s, convert_eols=None):
"""Decode a raw base64 string.
If convert_eols is set to a string value, all canonical email linefeeds,
e.g. "\\r\\n", in the decoded text will be converted to the value of
convert_eols. os.linesep is a good choice for convert_eols if you are
decoding a text attachment.
This function does not parse a full MIME header value encoded with
base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
level email.Header class for that functionality.
"""
if not s:
return s
dec = a2b_base64(s)
if convert_eols:
return dec.replace(CRLF, convert_eols)
return dec
# For convenience and backwards compatibility w/ standard base64 module
body_decode = decode
decodestring = decode
This diff is collapsed.
Content-Type: multipart/mixed; boundary="BOUNDARY"
MIME-Version: 1.0
Subject: A subject
To: aperson@dom.ain
From: bperson@dom.ain
--BOUNDARY
--BOUNDARY--
From MAILER-DAEMON Fri Apr 06 16:46:09 2001
Received: from [204.245.199.98] (helo=zinfandel.lacita.com)
by www.linux.org.uk with esmtp (Exim 3.13 #1)
id 14lYR6-0008Iv-00
for linuxuser-admin@www.linux.org.uk; Fri, 06 Apr 2001 16:46:09 +0100
Received: from localhost (localhost) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with internal id JAB03225; Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
From: Mail Delivery Subsystem <MAILER-DAEMON@zinfandel.lacita.com>
Subject: Returned mail: Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com>
Message-Id: <200104061723.JAB03225@zinfandel.lacita.com>
To: <linuxuser-admin@www.linux.org.uk>
To: postmaster@zinfandel.lacita.com
MIME-Version: 1.0
Content-Type: multipart/report; report-type=delivery-status;
bo
Auto-Submitted: auto-generated (failure)
This is a MIME-encapsulated message
--JAB03225.986577786/zinfandel.lacita.com
The original message was received at Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
from [199.164.235.226]
----- The following addresses have delivery notifications -----
<scoffman@wellpartner.com> (unrecoverable error)
----- Transcript of session follows -----
554 Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com>
--JAB03225.986577786/zinfandel.lacita.com
Content-Type: message/delivery-status
Reporting-MTA: dns; zinfandel.lacita.com
Received-From-MTA: dns; [199.164.235.226]
Arrival-Date: Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
Final-Recipient: rfc822; scoffman@wellpartner.com
Action: failed
Status: 5.4.6
Last-Attempt-Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
--JAB03225.986577786/zinfandel.lacita.com
Content-Type: text/rfc822-headers
Return-Path: linuxuser-admin@www.linux.org.uk
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03225 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03221 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:22:18 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03217 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:21:37 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03213 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:56 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03209 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:15 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03205 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:19:33 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03201 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:18:52 -0800 (GMT-0800)
Received: from zinfandel.lacita.com ([204.245.199.98])
by
fo
Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03197 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:17:54 -0800 (GMT-0800)
Received: from www.linux.org.uk (parcelfarce.linux.theplanet.co.uk [195.92.249.252])
by
fo
Received: from localhost.localdomain
([
by
id
Received: from [212.1.130.11] (helo=s1.uklinux.net ident=root)
by
id
fo
Received: from server (ppp-2-22.cvx4.telinco.net [212.1.149.22])
by
fo
From: Daniel James <daniel@linuxuser.co.uk>
Organization: LinuxUser
To: linuxuser@www.linux.org.uk
X-Mailer: KMail [version 1.1.99]
Content-Type: text/plain;
c
MIME-Version: 1.0
Message-Id: <01040616033903.00962@server>
Content-Transfer-Encoding: 8bit
Subject: [LinuxUser] bulletin no. 45
Sender: linuxuser-admin@www.linux.org.uk
Errors-To: linuxuser-admin@www.linux.org.uk
X-BeenThere: linuxuser@www.linux.org.uk
X-Mailman-Version: 2.0.3
Precedence: bulk
List-Help: <mailto:linuxuser-request@www.linux.org.uk?subject=help>
List-Post: <mailto:linuxuser@www.linux.org.uk>
List-Subscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
<m
List-Id: bulletins from LinuxUser magazine <linuxuser.www.linux.org.uk>
List-Unsubscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
<m
List-Archive: <http://www.linux.org.uk/pipermail/linuxuser/>
Date: Fri, 6 Apr 2001 16:03:39 +0100
--JAB03225.986577786/zinfandel.lacita.com--
This diff is collapsed.
# Copyright (C) 2002 Python Software Foundation
# email package unit tests for (optional) Asian codecs
import unittest
from test_support import TestSkipped
from email.Charset import Charset
from email.Header import Header, decode_header
# See if we have the Japanese codecs package installed
try:
unicode('foo', 'japanese.iso-2022-jp')
except LookupError:
raise TestSkipped, 'Optional Japanese codecs not installed'
class TestEmailAsianCodecs(unittest.TestCase):
def test_japanese_codecs(self):
eq = self.assertEqual
j = Charset("euc-jp")
g = Charset("iso-8859-1")
h = Header("Hello World!")
jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
ghello = 'Gr\xfc\xdf Gott!'
h.append(jhello, j)
h.append(ghello, g)
eq(h.encode(), 'Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=\n =?iso-8859-1?q?Gr=FC=DF_Gott!?=')
eq(decode_header(h.encode()),
[('Hello World!', None),
('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
('Gr\xfc\xdf Gott!', 'iso-8859-1')])
long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
h = Header(long, j, header_name="Subject")
# test a very long header
enc = h.encode()
eq(enc, '=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=\n =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?=\n =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=')
eq(decode_header(enc), [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
def suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestEmailAsianCodecs))
return suite
if __name__ == '__main__':
unittest.main(defaultTest='suite')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment