message.py 28.9 KB
Newer Older
1
# Copyright (C) 2001-2006 Python Software Foundation
2 3
# Author: Barry Warsaw
# Contact: email-sig@python.org
4

5
"""Basic message object for the email package object model."""
6

7 8
__all__ = ['Message']

9
import re
10
import uu
11
import binascii
12
import warnings
13 14 15
from cStringIO import StringIO

# Intrapackage imports
16 17 18
import email.charset
from email import utils
from email import errors
19

20
SEMISPACE = '; '
21 22 23 24 25

# Regular expression used to split header parameters.  BAW: this may be too
# simple.  It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
# most headers found in the wild.  We may eventually need a full fledged
# parser eventually.
26
paramre = re.compile(r'\s*;\s*')
27 28 29 30 31 32
# Regular expression that matches `special' characters in parameters, the
# existance of which force quoting of the parameter value.
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')



33
# Helper functions
34
def _formatparam(param, value=None, quote=True):
35 36
    """Convenience function to format and return a key=value pair.

37
    This will quote the value if needed or if quote is true.
38 39
    """
    if value is not None and len(value) > 0:
40
        # A tuple is used for RFC 2231 encoded parameter values where items
41 42
        # are (charset, language, value).  charset is a string, not a Charset
        # instance.
43
        if isinstance(value, tuple):
44 45
            # Encode as per RFC 2231
            param += '*'
46
            value = utils.encode_rfc2231(value[2], value[0], value[1])
47 48 49
        # BAW: Please check this.  I think that if quote is set it should
        # force quoting even if not necessary.
        if quote or tspecials.search(value):
50
            return '%s="%s"' % (param, utils.quote(value))
51 52 53 54
        else:
            return '%s=%s' % (param, value)
    else:
        return param
55

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
def _parseparam(s):
    plist = []
    while s[:1] == ';':
        s = s[1:]
        end = s.find(';')
        while end > 0 and s.count('"', 0, end) % 2:
            end = s.find(';', end + 1)
        if end < 0:
            end = len(s)
        f = s[:end]
        if '=' in f:
            i = f.index('=')
            f = f[:i].strip().lower() + '=' + f[i+1:].strip()
        plist.append(f.strip())
        s = s[end:]
    return plist

73

74
def _unquotevalue(value):
75
    # This is different than utils.collapse_rfc2231_value() because it doesn't
76 77 78
    # try to convert the value to a unicode.  Message.get_param() and
    # Message.get_params() are both currently defined to return the tuple in
    # the face of RFC 2231 parameters.
79
    if isinstance(value, tuple):
80
        return value[0], value[1], utils.unquote(value[2])
81
    else:
82
        return utils.unquote(value)
83 84


Barry Warsaw's avatar
Typo  
Barry Warsaw committed
85

86
class Message:
87
    """Basic message object.
88 89

    A message object is defined as something that has a bunch of RFC 2822
90 91 92 93
    headers and a payload.  It may optionally have an envelope header
    (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
    multipart or a message/rfc822), then the payload is a list of Message
    objects, otherwise it is a string.
94

95
    Message objects implement part of the `mapping' interface, which assumes
96
    there is exactly one occurrance of the header per message.  Some headers
97
    do in fact appear multiple times (e.g. Received) and for those headers,
98 99 100 101 102 103 104
    you must use the explicit API to set or get all the headers.  Not all of
    the mapping methods are implemented.
    """
    def __init__(self):
        self._headers = []
        self._unixfrom = None
        self._payload = None
105
        self._charset = None
106 107
        # Defaults for multipart messages
        self.preamble = self.epilogue = None
108
        self.defects = []
109 110
        # Default content type
        self._default_type = 'text/plain'
111 112 113

    def __str__(self):
        """Return the entire formatted message as a string.
114
        This includes the headers, body, and envelope header.
115
        """
116
        return self.as_string(unixfrom=True)
117

118
    def as_string(self, unixfrom=False):
119
        """Return the entire formatted message as a string.
120
        Optional `unixfrom' when True, means include the Unix From_ envelope
121
        header.
122 123

        This is a convenience method and may not generate the message exactly
124 125
        as you intend because by default it mangles lines that begin with
        "From ".  For more flexibility, use the flatten() method of a
126
        Generator instance.
127
        """
128
        from email.Generator import Generator
129 130
        fp = StringIO()
        g = Generator(fp)
131
        g.flatten(self, unixfrom=unixfrom)
132 133 134
        return fp.getvalue()

    def is_multipart(self):
135
        """Return True if the message consists of multiple parts."""
136
        return isinstance(self._payload, list)
137 138 139 140 141 142 143 144 145 146 147 148 149

    #
    # Unix From_ line
    #
    def set_unixfrom(self, unixfrom):
        self._unixfrom = unixfrom

    def get_unixfrom(self):
        return self._unixfrom

    #
    # Payload manipulation.
    #
150 151 152 153
    def attach(self, payload):
        """Add the given payload to the current payload.

        The current payload will always be a list of objects after this method
154
        is called.  If you want to set the payload to a scalar object, use
155 156 157 158 159 160
        set_payload() instead.
        """
        if self._payload is None:
            self._payload = [payload]
        else:
            self._payload.append(payload)
161

162
    def get_payload(self, i=None, decode=False):
163
        """Return a reference to the payload.
164

165 166 167 168
        The payload will either be a list object or a string.  If you mutate
        the list object, you modify the message's payload in place.  Optional
        i returns that index into the payload.

169 170 171 172 173 174 175 176 177
        Optional decode is a flag indicating whether the payload should be
        decoded or not, according to the Content-Transfer-Encoding header
        (default is False).

        When True and the message is not a multipart, the payload will be
        decoded if this header's value is `quoted-printable' or `base64'.  If
        some other encoding is used, or the header is missing, or if the
        payload has bogus data (i.e. bogus base64 or uuencoded data), the
        payload is returned as-is.
178 179 180

        If the message is a multipart and the decode flag is True, then None
        is returned.
181 182 183
        """
        if i is None:
            payload = self._payload
184
        elif not isinstance(self._payload, list):
185
            raise TypeError('Expected list, got %s' % type(self._payload))
186 187 188 189 190
        else:
            payload = self._payload[i]
        if decode:
            if self.is_multipart():
                return None
191 192
            cte = self.get('content-transfer-encoding', '').lower()
            if cte == 'quoted-printable':
193
                return utils._qdecode(payload)
194
            elif cte == 'base64':
195
                try:
196
                    return utils._bdecode(payload)
197 198 199
                except binascii.Error:
                    # Incorrect padding
                    return payload
200 201 202
            elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
                sfp = StringIO()
                try:
203
                    uu.decode(StringIO(payload+'\n'), sfp, quiet=True)
204 205 206 207
                    payload = sfp.getvalue()
                except uu.Error:
                    # Some decoding problem
                    return payload
208 209 210 211
        # Everything else, including encodings with 8bit or 7bit are returned
        # unchanged.
        return payload

212 213 214
    def set_payload(self, payload, charset=None):
        """Set the payload to the given value.

215 216 217
        Optional charset sets the message's default character set.  See
        set_charset() for details.
        """
218
        self._payload = payload
219 220 221 222 223 224
        if charset is not None:
            self.set_charset(charset)

    def set_charset(self, charset):
        """Set the charset of the payload to a given character set.

225 226 227 228
        charset can be a Charset instance, a string naming a character set, or
        None.  If it is a string it will be converted to a Charset instance.
        If charset is None, the charset parameter will be removed from the
        Content-Type field.  Anything else will generate a TypeError.
229

230
        The message will be assumed to be of type text/* encoded with
231 232 233 234
        charset.input_charset.  It will be converted to charset.output_charset
        and encoded properly, if needed, when generating the plain text
        representation of the message.  MIME headers (MIME-Version,
        Content-Type, Content-Transfer-Encoding) will be added as needed.
235

236 237 238 239 240
        """
        if charset is None:
            self.del_param('charset')
            self._charset = None
            return
241
        if isinstance(charset, str):
242 243
            charset = email.charset.Charset(charset)
        if not isinstance(charset, email.charset.Charset):
244
            raise TypeError(charset)
245 246 247 248 249 250 251 252 253 254
        # BAW: should we accept strings that can serve as arguments to the
        # Charset constructor?
        self._charset = charset
        if not self.has_key('MIME-Version'):
            self.add_header('MIME-Version', '1.0')
        if not self.has_key('Content-Type'):
            self.add_header('Content-Type', 'text/plain',
                            charset=charset.get_output_charset())
        else:
            self.set_param('charset', charset.get_output_charset())
255 256
        if str(charset) <> charset.get_output_charset():
            self._payload = charset.body_encode(self._payload)
257 258
        if not self.has_key('Content-Transfer-Encoding'):
            cte = charset.get_body_encoding()
259
            try:
260
                cte(self)
261
            except TypeError:
262
                self._payload = charset.body_encode(self._payload)
263
                self.add_header('Content-Transfer-Encoding', cte)
264

265
    def get_charset(self):
266 267
        """Return the Charset instance associated with the message's payload.
        """
268
        return self._charset
Tim Peters's avatar
Tim Peters committed
269

270 271 272 273
    #
    # MAPPING INTERFACE (partial)
    #
    def __len__(self):
274
        """Return the total number of headers, including duplicates."""
275 276 277 278 279 280 281 282
        return len(self._headers)

    def __getitem__(self, name):
        """Get a header value.

        Return None if the header is missing instead of raising an exception.

        Note that if the header appeared multiple times, exactly which
283
        occurrance gets returned is undefined.  Use get_all() to get all
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
        the values matching a header field name.
        """
        return self.get(name)

    def __setitem__(self, name, val):
        """Set the value of a header.

        Note: this does not overwrite an existing header with the same field
        name.  Use __delitem__() first to delete any existing headers.
        """
        self._headers.append((name, val))

    def __delitem__(self, name):
        """Delete all occurrences of a header, if present.

        Does not raise an exception if the header is missing.
        """
        name = name.lower()
        newheaders = []
        for k, v in self._headers:
            if k.lower() <> name:
                newheaders.append((k, v))
        self._headers = newheaders

308 309
    def __contains__(self, name):
        return name.lower() in [k.lower() for k, v in self._headers]
310 311 312

    def has_key(self, name):
        """Return true if the message contains the header."""
313
        missing = object()
314
        return self.get(name, missing) is not missing
315 316 317 318 319

    def keys(self):
        """Return a list of all the message's header field names.

        These will be sorted in the order they appeared in the original
320 321 322
        message, or were added to the message, and may contain duplicates.
        Any fields deleted and re-inserted are always appended to the header
        list.
323 324 325 326 327 328 329
        """
        return [k for k, v in self._headers]

    def values(self):
        """Return a list of all the message's header values.

        These will be sorted in the order they appeared in the original
330 331 332
        message, or were added to the message, and may contain duplicates.
        Any fields deleted and re-inserted are always appended to the header
        list.
333 334 335 336 337 338 339
        """
        return [v for k, v in self._headers]

    def items(self):
        """Get all the message's header fields and values.

        These will be sorted in the order they appeared in the original
340 341 342
        message, or were added to the message, and may contain duplicates.
        Any fields deleted and re-inserted are always appended to the header
        list.
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
        """
        return self._headers[:]

    def get(self, name, failobj=None):
        """Get a header value.

        Like __getitem__() but return failobj instead of None when the field
        is missing.
        """
        name = name.lower()
        for k, v in self._headers:
            if k.lower() == name:
                return v
        return failobj

    #
    # Additional useful stuff
    #

    def get_all(self, name, failobj=None):
        """Return a list of all the values for the named field.

        These will be sorted in the order they appeared in the original
        message, and may contain duplicates.  Any fields deleted and
Greg Ward's avatar
Greg Ward committed
367
        re-inserted are always appended to the header list.
368 369

        If no such fields exist, failobj is returned (defaults to None).
370 371 372 373 374 375
        """
        values = []
        name = name.lower()
        for k, v in self._headers:
            if k.lower() == name:
                values.append(v)
376 377
        if not values:
            return failobj
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
        return values

    def add_header(self, _name, _value, **_params):
        """Extended header setting.

        name is the header field to add.  keyword arguments can be used to set
        additional parameters for the header field, with underscores converted
        to dashes.  Normally the parameter will be added as key="value" unless
        value is None, in which case only the key will be added.

        Example:

        msg.add_header('content-disposition', 'attachment', filename='bud.gif')
        """
        parts = []
        for k, v in _params.items():
            if v is None:
                parts.append(k.replace('_', '-'))
            else:
397
                parts.append(_formatparam(k.replace('_', '-'), v))
398 399 400 401
        if _value is not None:
            parts.insert(0, _value)
        self._headers.append((_name, SEMISPACE.join(parts)))

402 403 404 405 406 407 408 409 410 411 412 413 414
    def replace_header(self, _name, _value):
        """Replace a header.

        Replace the first matching header found in the message, retaining
        header order and case.  If no matching header was found, a KeyError is
        raised.
        """
        _name = _name.lower()
        for i, (k, v) in zip(range(len(self._headers)), self._headers):
            if k.lower() == _name:
                self._headers[i] = (k, _value)
                break
        else:
415
            raise KeyError(_name)
416

417 418 419 420 421
    #
    # Use these three methods instead of the three above.
    #

    def get_content_type(self):
422
        """Return the message's content type.
423

424 425 426 427 428
        The returned string is coerced to lower case of the form
        `maintype/subtype'.  If there was no Content-Type header in the
        message, the default type as given by get_default_type() will be
        returned.  Since according to RFC 2045, messages always have a default
        type this will always return a value.
429

430 431 432
        RFC 2045 defines a message's default type to be text/plain unless it
        appears inside a multipart/digest container, in which case it would be
        message/rfc822.
433
        """
434
        missing = object()
435 436 437 438
        value = self.get('content-type', missing)
        if value is missing:
            # This should have no parameters
            return self.get_default_type()
439 440 441 442 443
        ctype = paramre.split(value)[0].lower().strip()
        # RFC 2045, section 5.2 says if its invalid, use text/plain
        if ctype.count('/') <> 1:
            return 'text/plain'
        return ctype
444 445

    def get_content_maintype(self):
446
        """Return the message's main content type.
447 448

        This is the `maintype' part of the string returned by
449
        get_content_type().
450 451 452 453 454
        """
        ctype = self.get_content_type()
        return ctype.split('/')[0]

    def get_content_subtype(self):
455
        """Returns the message's sub-content type.
456 457

        This is the `subtype' part of the string returned by
458
        get_content_type().
459 460 461
        """
        ctype = self.get_content_type()
        return ctype.split('/')[1]
462

463 464 465 466 467
    def get_default_type(self):
        """Return the `default' content type.

        Most messages have a default content type of text/plain, except for
        messages that are subparts of multipart/digest containers.  Such
468
        subparts have a default content type of message/rfc822.
469 470 471 472 473 474
        """
        return self._default_type

    def set_default_type(self, ctype):
        """Set the `default' content type.

475 476
        ctype should be either "text/plain" or "message/rfc822", although this
        is not enforced.  The default content type is not stored in the
477
        Content-Type header.
478 479 480
        """
        self._default_type = ctype

481 482 483
    def _get_params_preserve(self, failobj, header):
        # Like get_params() but preserves the quoting of values.  BAW:
        # should this be part of the public interface?
484
        missing = object()
485 486 487 488
        value = self.get(header, missing)
        if value is missing:
            return failobj
        params = []
489
        for p in _parseparam(';' + value):
490 491
            try:
                name, val = p.split('=', 1)
492 493
                name = name.strip()
                val = val.strip()
494 495
            except ValueError:
                # Must have been a bare attribute
496
                name = p.strip()
497 498
                val = ''
            params.append((name, val))
499
        params = utils.decode_params(params)
500 501
        return params

502 503
    def get_params(self, failobj=None, header='content-type', unquote=True):
        """Return the message's Content-Type parameters, as a list.
504

505 506 507
        The elements of the returned list are 2-tuples of key/value pairs, as
        split on the `=' sign.  The left hand side of the `=' is the key,
        while the right hand side is the value.  If there is no `=' sign in
508 509
        the parameter the value is the empty string.  The value is as
        described in the get_param() method.
510

511
        Optional failobj is the object to return if there is no Content-Type
512
        header.  Optional header is the header to search instead of
513
        Content-Type.  If unquote is True, the value is unquoted.
514
        """
515
        missing = object()
516 517
        params = self._get_params_preserve(missing, header)
        if params is missing:
518
            return failobj
519
        if unquote:
520
            return [(k, _unquotevalue(v)) for k, v in params]
521 522
        else:
            return params
523

524 525 526
    def get_param(self, param, failobj=None, header='content-type',
                  unquote=True):
        """Return the parameter value if found in the Content-Type header.
527

528
        Optional failobj is the object to return if there is no Content-Type
529
        header, or the Content-Type header has no such parameter.  Optional
530
        header is the header to search instead of Content-Type.
531 532 533 534

        Parameter keys are always compared case insensitively.  The return
        value can either be a string, or a 3-tuple if the parameter was RFC
        2231 encoded.  When it's a 3-tuple, the elements of the value are of
535 536 537 538 539 540
        the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
        LANGUAGE can be None, in which case you should consider VALUE to be
        encoded in the us-ascii charset.  You can usually ignore LANGUAGE.

        Your application should be prepared to deal with 3-tuple return
        values, and can convert the parameter to a Unicode string like so:
541 542 543

            param = msg.get_param('foo')
            if isinstance(param, tuple):
544
                param = unicode(param[2], param[0] or 'us-ascii')
545 546 547

        In any case, the parameter value (either the returned string, or the
        VALUE item in the 3-tuple) is always unquoted, unless unquote is set
548
        to False.
549
        """
550
        if not self.has_key(header):
551
            return failobj
552 553
        for k, v in self._get_params_preserve(failobj, header):
            if k.lower() == param.lower():
554
                if unquote:
555
                    return _unquotevalue(v)
556 557
                else:
                    return v
558 559
        return failobj

560
    def set_param(self, param, value, header='Content-Type', requote=True,
561
                  charset=None, language=''):
562
        """Set a parameter in the Content-Type header.
563 564 565 566

        If the parameter already exists in the header, its value will be
        replaced with the new value.

567
        If header is Content-Type and has not yet been defined for this
568
        message, it will be set to "text/plain" and the new parameter and
569
        value will be appended as per RFC 2045.
570

571
        An alternate header can specified in the header argument, and all
572
        parameters will be quoted as necessary unless requote is False.
573

574 575 576
        If charset is specified, the parameter will be encoded according to RFC
        2231.  Optional language specifies the RFC 2231 language, defaulting
        to the empty string.  Both charset and language should be strings.
577
        """
578
        if not isinstance(value, tuple) and charset:
579 580
            value = (charset, language, value)

581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
        if not self.has_key(header) and header.lower() == 'content-type':
            ctype = 'text/plain'
        else:
            ctype = self.get(header)
        if not self.get_param(param, header=header):
            if not ctype:
                ctype = _formatparam(param, value, requote)
            else:
                ctype = SEMISPACE.join(
                    [ctype, _formatparam(param, value, requote)])
        else:
            ctype = ''
            for old_param, old_value in self.get_params(header=header,
                                                        unquote=requote):
                append_param = ''
                if old_param.lower() == param.lower():
                    append_param = _formatparam(param, value, requote)
                else:
                    append_param = _formatparam(old_param, old_value, requote)
                if not ctype:
                    ctype = append_param
                else:
                    ctype = SEMISPACE.join([ctype, append_param])
        if ctype <> self.get(header):
            del self[header]
            self[header] = ctype

608
    def del_param(self, param, header='content-type', requote=True):
609 610
        """Remove the given parameter completely from the Content-Type header.

611 612 613 614
        The header will be re-written in place without the parameter or its
        value. All values will be quoted as necessary unless requote is
        False.  Optional header specifies an alternative to the Content-Type
        header.
615 616 617 618
        """
        if not self.has_key(header):
            return
        new_ctype = ''
619
        for p, v in self.get_params(header=header, unquote=requote):
620 621 622 623 624 625 626 627 628 629
            if p.lower() <> param.lower():
                if not new_ctype:
                    new_ctype = _formatparam(p, v, requote)
                else:
                    new_ctype = SEMISPACE.join([new_ctype,
                                                _formatparam(p, v, requote)])
        if new_ctype <> self.get(header):
            del self[header]
            self[header] = new_ctype

630 631
    def set_type(self, type, header='Content-Type', requote=True):
        """Set the main type and subtype for the Content-Type header.
632 633 634 635

        type must be a string in the form "maintype/subtype", otherwise a
        ValueError is raised.

636 637
        This method replaces the Content-Type header, keeping all the
        parameters in place.  If requote is False, this leaves the existing
638 639 640
        header's quoting as is.  Otherwise, the parameters will be quoted (the
        default).

641 642
        An alternative header can be specified in the header argument.  When
        the Content-Type header is set, we'll always also add a MIME-Version
643 644 645 646 647
        header.
        """
        # BAW: should we be strict?
        if not type.count('/') == 1:
            raise ValueError
648
        # Set the Content-Type, you get a MIME-Version
649 650 651 652 653 654
        if header.lower() == 'content-type':
            del self['mime-version']
            self['MIME-Version'] = '1.0'
        if not self.has_key(header):
            self[header] = type
            return
655
        params = self.get_params(header=header, unquote=requote)
656 657 658 659 660 661
        del self[header]
        self[header] = type
        # Skip the first param; it's the old type.
        for p, v in params[1:]:
            self.set_param(p, v, header, requote)

662 663 664
    def get_filename(self, failobj=None):
        """Return the filename associated with the payload if present.

665
        The filename is extracted from the Content-Disposition header's
666 667 668
        `filename' parameter, and it is unquoted.  If that header is missing
        the `filename' parameter, this method falls back to looking for the
        `name' parameter.
669
        """
670
        missing = object()
671
        filename = self.get_param('filename', missing, 'content-disposition')
672 673
        if filename is missing:
            filename = self.get_param('name', missing, 'content-disposition')
674 675
        if filename is missing:
            return failobj
676
        return utils.collapse_rfc2231_value(filename).strip()
677 678 679 680

    def get_boundary(self, failobj=None):
        """Return the boundary associated with the payload if present.

681
        The boundary is extracted from the Content-Type header's `boundary'
682 683
        parameter, and it is unquoted.
        """
684
        missing = object()
685 686 687
        boundary = self.get_param('boundary', missing)
        if boundary is missing:
            return failobj
688
        # RFC 2046 says that boundaries may begin but not end in w/s
689
        return utils.collapse_rfc2231_value(boundary).rstrip()
690 691

    def set_boundary(self, boundary):
692
        """Set the boundary parameter in Content-Type to 'boundary'.
693

694
        This is subtly different than deleting the Content-Type header and
695 696
        adding a new one with a new boundary parameter via add_header().  The
        main difference is that using the set_boundary() method preserves the
697
        order of the Content-Type header in the original message.
698

699
        HeaderParseError is raised if the message has no Content-Type header.
700
        """
701
        missing = object()
702 703
        params = self._get_params_preserve(missing, 'content-type')
        if params is missing:
704
            # There was no Content-Type header, and we don't know what type
705
            # to set it to, so raise an exception.
706
            raise errors.HeaderParseError('No Content-Type header found')
707
        newparams = []
708
        foundp = False
709 710 711
        for pk, pv in params:
            if pk.lower() == 'boundary':
                newparams.append(('boundary', '"%s"' % boundary))
712
                foundp = True
713
            else:
714
                newparams.append((pk, pv))
715
        if not foundp:
716
            # The original Content-Type header had no boundary attribute.
717
            # Tack one on the end.  BAW: should we raise an exception
718
            # instead???
719
            newparams.append(('boundary', '"%s"' % boundary))
720
        # Replace the existing Content-Type header with the new value
721 722 723
        newheaders = []
        for h, v in self._headers:
            if h.lower() == 'content-type':
724 725 726 727 728 729 730 731
                parts = []
                for k, v in newparams:
                    if v == '':
                        parts.append(k)
                    else:
                        parts.append('%s=%s' % (k, v))
                newheaders.append((h, SEMISPACE.join(parts)))

732 733 734 735
            else:
                newheaders.append((h, v))
        self._headers = newheaders

736 737 738
    def get_content_charset(self, failobj=None):
        """Return the charset parameter of the Content-Type header.

739 740 741
        The returned string is always coerced to lower case.  If there is no
        Content-Type header, or if that header has no charset parameter,
        failobj is returned.
742
        """
743
        missing = object()
744 745 746
        charset = self.get_param('charset', missing)
        if charset is missing:
            return failobj
747
        if isinstance(charset, tuple):
748
            # RFC 2231 encoded, so decode it, and it better end up as ascii.
749 750
            pcharset = charset[0] or 'us-ascii'
            charset = unicode(charset[2], pcharset).encode('us-ascii')
751 752
        # RFC 2046, $4.1.2 says charsets are not case sensitive
        return charset.lower()
753

754 755
    def get_charsets(self, failobj=None):
        """Return a list containing the charset(s) used in this message.
Tim Peters's avatar
Tim Peters committed
756

757
        The returned list of items describes the Content-Type headers'
758 759 760 761
        charset parameter for this message and all the subparts in its
        payload.

        Each item will either be a string (the value of the charset parameter
762
        in the Content-Type header of that part) or the value of the
763 764 765 766 767 768 769
        'failobj' parameter (defaults to None), if the part does not have a
        main MIME type of "text", or the charset is not defined.

        The list will contain one string for each part of the message, plus
        one for the container message (i.e. self), so that a non-multipart
        message will still return a list of length 1.
        """
770
        return [part.get_content_charset(failobj) for part in self.walk()]
771 772 773

    # I.e. def walk(self): ...
    from email.Iterators import walk