Issue #22088: Merge base64 docs from 3.5

150ea1a1 · Martin Panter · 3d2778c8 · ee3074e1 · 150ea1a1 · 150ea1a1
Commit 150ea1a1 authored Feb 24, 2016 by Martin Panter
Show whitespace changes
Inline Side-by-side

Showing with 92 additions and 79 deletions

Doc/library/base64.rst Doc/library/base64.rst +16 -13

Lib/base64.py Lib/base64.py +58 -64

Lib/test/test_base64.py Lib/test/test_base64.py +18 -2

No files found.
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -24,8 +24,8 @@ POST request.  The encoding algorithm is not the same as the
 There are two interfaces provided by this module.  The modern interface
 supports encoding :term:`bytes-like objects <bytes-like object>` to ASCII
 :class:`bytes`, and decoding :term:`bytes-like objects <bytes-like object>` or
-strings containing ASCII to :class:`bytes`.  All three :rfc:`3548` defined
-alphabets (normal, URL-safe, and filesystem-safe) are supported.
+strings containing ASCII to :class:`bytes`.  Both base-64 alphabets
+defined in :rfc:`3548` (normal, and URL- and filesystem-safe) are supported.

 The legacy interface does not support decoding from strings, but it does
 provide functions for encoding and decoding to and from :term:`file objects
@@ -69,9 +69,10 @@ The modern interface provides:
   A :exc:`binascii.Error` exception is raised
   if *s* is incorrectly padded.

-   If *validate* is ``False`` (the default), non-base64-alphabet characters are
+   If *validate* is ``False`` (the default), characters that are neither
+   in the normal base-64 alphabet nor the alternative alphabet are
   discarded prior to the padding check.  If *validate* is ``True``,
-   non-base64-alphabet characters in the input result in a
+   these non-alphabet characters in the input result in a
   :exc:`binascii.Error`.


@@ -89,7 +90,8 @@ The modern interface provides:

 .. function:: urlsafe_b64encode(s)

-   Encode :term:`bytes-like object` *s* using a URL-safe alphabet, which
+   Encode :term:`bytes-like object` *s* using the
+   URL- and filesystem-safe alphabet, which
   substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` in the
   standard Base64 alphabet, and return the encoded :class:`bytes`.  The result
   can still contain ``=``.
@@ -97,7 +99,8 @@ The modern interface provides:

 .. function:: urlsafe_b64decode(s)

-   Decode :term:`bytes-like object` or ASCII string *s* using a URL-safe
+   Decode :term:`bytes-like object` or ASCII string *s*
+   using the URL- and filesystem-safe
   alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of
   ``/`` in the standard Base64 alphabet, and return the decoded
   :class:`bytes`.
@@ -145,14 +148,14 @@ The modern interface provides:
   lowercase alphabet is acceptable as input.  For security purposes, the default
   is ``False``.

-   A :exc:`TypeError` is raised if *s* is
+   A :exc:`binascii.Error` is raised if *s* is
   incorrectly padded or if there are non-alphabet characters present in the
   input.


-.. function:: a85encode(s, *, foldspaces=False, wrapcol=0, pad=False, adobe=False)
+.. function:: a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False)

-   Encode the :term:`bytes-like object` *s* using Ascii85 and return the
+   Encode the :term:`bytes-like object` *b* using Ascii85 and return the
   encoded :class:`bytes`.

   *foldspaces* is an optional flag that uses the special short sequence 'y'
@@ -172,9 +175,9 @@ The modern interface provides:
   .. versionadded:: 3.4


-.. function:: a85decode(s, *, foldspaces=False, adobe=False, ignorechars=b' \\t\\n\\r\\v')
+.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \\t\\n\\r\\v')

-   Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *s* and
+   Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *b* and
   return the decoded :class:`bytes`.

   *foldspaces* is a flag that specifies whether the 'y' short sequence
@@ -192,9 +195,9 @@ The modern interface provides:
   .. versionadded:: 3.4


-.. function:: b85encode(s, pad=False)
+.. function:: b85encode(b, pad=False)

-   Encode the :term:`bytes-like object` *s* using base85 (as used in e.g.
+   Encode the :term:`bytes-like object` *b* using base85 (as used in e.g.
   git-style binary diffs) and return the encoded :class:`bytes`.

   If *pad* is true, the input is padded with ``b'\0'`` so its length is a

--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -12,7 +12,7 @@ import binascii


 __all__ = [
-    # Legacy interface exports traditional RFC 1521 Base64 encodings
+    # Legacy interface exports traditional RFC 2045 Base64 encodings
    'encode', 'decode', 'encodebytes', 'decodebytes',
    # Generalized interface for other encodings
    'b64encode', 'b64decode', 'b32encode', 'b32decode',
@@ -49,14 +49,11 @@ def _bytes_from_decode_data(s):
 # Base64 encoding/decoding uses binascii

 def b64encode(s, altchars=None):
-    """Encode a byte string using Base64.
+    """Encode the bytes-like object s using Base64 and return a bytes object.

-    s is the byte string to encode.  Optional altchars must be a byte
-    string of length 2 which specifies an alternative alphabet for the
-    '+' and '/' characters.  This allows an application to
-    e.g. generate url or filesystem safe Base64 strings.
-
-    The encoded byte string is returned.
+    Optional altchars should be a byte string of length 2 which specifies an
+    alternative alphabet for the '+' and '/' characters.  This allows an
+    application to e.g. generate url or filesystem safe Base64 strings.
    """
    encoded = binascii.b2a_base64(s, newline=False)
    if altchars is not None:
@@ -66,18 +63,19 @@ def b64encode(s, altchars=None):


 def b64decode(s, altchars=None, validate=False):
-    """Decode a Base64 encoded byte string.
+    """Decode the Base64 encoded bytes-like object or ASCII string s.

-    s is the byte string to decode.  Optional altchars must be a
-    string of length 2 which specifies the alternative alphabet used
-    instead of the '+' and '/' characters.
+    Optional altchars must be a bytes-like object or ASCII string of length 2
+    which specifies the alternative alphabet used instead of the '+' and '/'
+    characters.

-    The decoded string is returned.  A binascii.Error is raised if s is
-    incorrectly padded.
+    The result is returned as a bytes object.  A binascii.Error is raised if
+    s is incorrectly padded.

-    If validate is False (the default), non-base64-alphabet characters are
-    discarded prior to the padding check.  If validate is True,
-    non-base64-alphabet characters in the input result in a binascii.Error.
+    If validate is False (the default), characters that are neither in the
+    normal base-64 alphabet nor the alternative alphabet are discarded prior
+    to the padding check.  If validate is True, these non-alphabet characters
+    in the input result in a binascii.Error.
    """
    s = _bytes_from_decode_data(s)
    if altchars is not None:
@@ -90,19 +88,19 @@ def b64decode(s, altchars=None, validate=False):


 def standard_b64encode(s):
-    """Encode a byte string using the standard Base64 alphabet.
+    """Encode bytes-like object s using the standard Base64 alphabet.

-    s is the byte string to encode.  The encoded byte string is returned.
+    The result is returned as a bytes object.
    """
    return b64encode(s)

 def standard_b64decode(s):
-    """Decode a byte string encoded with the standard Base64 alphabet.
+    """Decode bytes encoded with the standard Base64 alphabet.

-    s is the byte string to decode.  The decoded byte string is
-    returned.  binascii.Error is raised if the input is incorrectly
-    padded or if there are non-alphabet characters present in the
-    input.
+    Argument s is a bytes-like object or ASCII string to decode.  The result
+    is returned as a bytes object.  A binascii.Error is raised if the input
+    is incorrectly padded.  Characters that are not in the standard alphabet
+    are discarded prior to the padding check.
    """
    return b64decode(s)

@@ -111,21 +109,22 @@ _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
 _urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')

 def urlsafe_b64encode(s):
-    """Encode a byte string using a url-safe Base64 alphabet.
+    """Encode bytes using the URL- and filesystem-safe Base64 alphabet.

-    s is the byte string to encode.  The encoded byte string is
-    returned.  The alphabet uses '-' instead of '+' and '_' instead of
+    Argument s is a bytes-like object to encode.  The result is returned as a
+    bytes object.  The alphabet uses '-' instead of '+' and '_' instead of
    '/'.
    """
    return b64encode(s).translate(_urlsafe_encode_translation)

 def urlsafe_b64decode(s):
-    """Decode a byte string encoded with the standard Base64 alphabet.
+    """Decode bytes using the URL- and filesystem-safe Base64 alphabet.

-    s is the byte string to decode.  The decoded byte string is
-    returned.  binascii.Error is raised if the input is incorrectly
-    padded or if there are non-alphabet characters present in the
-    input.
+    Argument s is a bytes-like object or ASCII string to decode.  The result
+    is returned as a bytes object.  A binascii.Error is raised if the input
+    is incorrectly padded.  Characters that are not in the URL-safe base-64
+    alphabet, and are not a plus '+' or slash '/', are discarded prior to the
+    padding check.

    The alphabet uses '-' instead of '+' and '_' instead of '/'.
    """
@@ -141,9 +140,7 @@ _b32tab2 = None
 _b32rev = None

 def b32encode(s):
-    """Encode a byte string using Base32.
-
-    s is the byte string to encode.  The encoded byte string is returned.
+    """Encode the bytes-like object s using Base32 and return a bytes object.
    """
    global _b32tab2
    # Delay the initialization of the table to not waste memory
@@ -181,11 +178,10 @@ def b32encode(s):
    return bytes(encoded)

 def b32decode(s, casefold=False, map01=None):
-    """Decode a Base32 encoded byte string.
+    """Decode the Base32 encoded bytes-like object or ASCII string s.

-    s is the byte string to decode.  Optional casefold is a flag
-    specifying whether a lowercase alphabet is acceptable as input.
-    For security purposes, the default is False.
+    Optional casefold is a flag specifying whether a lowercase alphabet is
+    acceptable as input.  For security purposes, the default is False.

    RFC 3548 allows for optional mapping of the digit 0 (zero) to the
    letter O (oh), and for optional mapping of the digit 1 (one) to
@@ -195,7 +191,7 @@ def b32decode(s, casefold=False, map01=None):
    the letter O).  For security purposes the default is None, so that
    0 and 1 are not allowed in the input.

-    The decoded byte string is returned.  binascii.Error is raised if
+    The result is returned as a bytes object.  A binascii.Error is raised if
    the input is incorrectly padded or if there are non-alphabet
    characters present in the input.
    """
@@ -256,23 +252,20 @@ def b32decode(s, casefold=False, map01=None):
 # lowercase.  The RFC also recommends against accepting input case
 # insensitively.
 def b16encode(s):
-    """Encode a byte string using Base16.
-
-    s is the byte string to encode.  The encoded byte string is returned.
+    """Encode the bytes-like object s using Base16 and return a bytes object.
    """
    return binascii.hexlify(s).upper()


 def b16decode(s, casefold=False):
-    """Decode a Base16 encoded byte string.
+    """Decode the Base16 encoded bytes-like object or ASCII string s.

-    s is the byte string to decode.  Optional casefold is a flag
-    specifying whether a lowercase alphabet is acceptable as input.
-    For security purposes, the default is False.
+    Optional casefold is a flag specifying whether a lowercase alphabet is
+    acceptable as input.  For security purposes, the default is False.

-    The decoded byte string is returned.  binascii.Error is raised if
-    s were incorrectly padded or if there are non-alphabet characters
-    present in the string.
+    The result is returned as a bytes object.  A binascii.Error is raised if
+    s is incorrectly padded or if there are non-alphabet characters present
+    in the input.
    """
    s = _bytes_from_decode_data(s)
    if casefold:
@@ -315,19 +308,17 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
    return b''.join(chunks)

 def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
-    """Encode a byte string using Ascii85.
-
-    b is the byte string to encode. The encoded byte string is returned.
+    """Encode bytes-like object b using Ascii85 and return a bytes object.

    foldspaces is an optional flag that uses the special short sequence 'y'
    instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
    feature is not supported by the "standard" Adobe encoding.

-    wrapcol controls whether the output should have newline ('\\n') characters
+    wrapcol controls whether the output should have newline (b'\\n') characters
    added to it. If this is non-zero, each output line will be at most this
    many characters long.

-    pad controls whether the input string is padded to a multiple of 4 before
+    pad controls whether the input is padded to a multiple of 4 before
    encoding. Note that the btoa implementation always pads.

    adobe controls whether the encoded byte sequence is framed with <~ and ~>,
@@ -358,9 +349,7 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
    return result

 def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
-    """Decode an Ascii85 encoded byte string.
-
-    s is the byte string to decode.
+    """Decode the Ascii85 encoded bytes-like object or ASCII string b.

    foldspaces is a flag that specifies whether the 'y' short sequence should be
    accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
@@ -372,6 +361,8 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
    ignorechars should be a byte string containing characters to ignore from the
    input. This should only contain whitespace characters, and by default
    contains all whitespace characters in ASCII.
+
+    The result is returned as a bytes object.
    """
    b = _bytes_from_decode_data(b)
    if adobe:
@@ -431,10 +422,10 @@ _b85chars2 = None
 _b85dec = None

 def b85encode(b, pad=False):
-    """Encode an ASCII-encoded byte array in base85 format.
+    """Encode bytes-like object b in base85 format and return a bytes object.

-    If pad is true, the input is padded with "\\0" so its length is a multiple of
-    4 characters before encoding.
+    If pad is true, the input is padded with b'\\0' so its length is a multiple of
+    4 bytes before encoding.
    """
    global _b85chars, _b85chars2
    # Delay the initialization of tables to not waste memory
@@ -445,7 +436,10 @@ def b85encode(b, pad=False):
    return _85encode(b, _b85chars, _b85chars2, pad)

 def b85decode(b):
-    """Decode base85-encoded byte array"""
+    """Decode the base85-encoded bytes-like object or ASCII string b
+
+    The result is returned as a bytes object.
+    """
    global _b85dec
    # Delay the initialization of tables to not waste memory
    # if the function is never called
@@ -530,7 +524,7 @@ def _input_type_check(s):


 def encodebytes(s):
-    """Encode a bytestring into a bytestring containing multiple lines
+    """Encode a bytestring into a bytes object containing multiple lines
    of base-64 data."""
    _input_type_check(s)
    pieces = []
@@ -548,7 +542,7 @@ def encodestring(s):


 def decodebytes(s):
-    """Decode a bytestring of base-64 data into a bytestring."""
+    """Decode a bytestring of base-64 data into a bytes object."""
    _input_type_check(s)
    return binascii.a2b_base64(s)


--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -243,14 +243,26 @@ class BaseXYTestCase(unittest.TestCase):
                 (b'@@', b''),
                 (b'!', b''),
                 (b'YWJj\nYWI=', b'abcab'))
+        funcs = (
+            base64.b64decode,
+            base64.standard_b64decode,
+            base64.urlsafe_b64decode,
+        )
        for bstr, res in tests:
-            self.assertEqual(base64.b64decode(bstr), res)
-            self.assertEqual(base64.b64decode(bstr.decode('ascii')), res)
+            for func in funcs:
+                with self.subTest(bstr=bstr, func=func):
+                    self.assertEqual(func(bstr), res)
+                    self.assertEqual(func(bstr.decode('ascii')), res)
            with self.assertRaises(binascii.Error):
                base64.b64decode(bstr, validate=True)
            with self.assertRaises(binascii.Error):
                base64.b64decode(bstr.decode('ascii'), validate=True)

+        # Normal alphabet characters not discarded when alternative given
+        res = b'\xFB\xEF\xBE\xFF\xFF\xFF'
+        self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res)
+        self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res)
+
    def test_b32encode(self):
        eq = self.assertEqual
        eq(base64.b32encode(b''), b'')
@@ -360,6 +372,10 @@ class BaseXYTestCase(unittest.TestCase):
           b'\x01\x02\xab\xcd\xef')
        eq(base64.b16decode(array('B', b"0102abcdef"), True),
           b'\x01\x02\xab\xcd\xef')
+        # Non-alphabet characters
+        self.assertRaises(binascii.Error, base64.b16decode, '0102AG')
+        # Incorrect "padding"
+        self.assertRaises(binascii.Error, base64.b16decode, '010')

    def test_a85encode(self):
        eq = self.assertEqual