Commit 150ea1a1 authored by Martin Panter's avatar Martin Panter

Issue #22088: Merge base64 docs from 3.5

parents 3d2778c8 ee3074e1
...@@ -24,8 +24,8 @@ POST request. The encoding algorithm is not the same as the ...@@ -24,8 +24,8 @@ POST request. The encoding algorithm is not the same as the
There are two interfaces provided by this module. The modern interface There are two interfaces provided by this module. The modern interface
supports encoding :term:`bytes-like objects <bytes-like object>` to ASCII supports encoding :term:`bytes-like objects <bytes-like object>` to ASCII
:class:`bytes`, and decoding :term:`bytes-like objects <bytes-like object>` or :class:`bytes`, and decoding :term:`bytes-like objects <bytes-like object>` or
strings containing ASCII to :class:`bytes`. All three :rfc:`3548` defined strings containing ASCII to :class:`bytes`. Both base-64 alphabets
alphabets (normal, URL-safe, and filesystem-safe) are supported. defined in :rfc:`3548` (normal, and URL- and filesystem-safe) are supported.
The legacy interface does not support decoding from strings, but it does The legacy interface does not support decoding from strings, but it does
provide functions for encoding and decoding to and from :term:`file objects provide functions for encoding and decoding to and from :term:`file objects
...@@ -69,9 +69,10 @@ The modern interface provides: ...@@ -69,9 +69,10 @@ The modern interface provides:
A :exc:`binascii.Error` exception is raised A :exc:`binascii.Error` exception is raised
if *s* is incorrectly padded. if *s* is incorrectly padded.
If *validate* is ``False`` (the default), non-base64-alphabet characters are If *validate* is ``False`` (the default), characters that are neither
in the normal base-64 alphabet nor the alternative alphabet are
discarded prior to the padding check. If *validate* is ``True``, discarded prior to the padding check. If *validate* is ``True``,
non-base64-alphabet characters in the input result in a these non-alphabet characters in the input result in a
:exc:`binascii.Error`. :exc:`binascii.Error`.
...@@ -89,7 +90,8 @@ The modern interface provides: ...@@ -89,7 +90,8 @@ The modern interface provides:
.. function:: urlsafe_b64encode(s) .. function:: urlsafe_b64encode(s)
Encode :term:`bytes-like object` *s* using a URL-safe alphabet, which Encode :term:`bytes-like object` *s* using the
URL- and filesystem-safe alphabet, which
substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` in the substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` in the
standard Base64 alphabet, and return the encoded :class:`bytes`. The result standard Base64 alphabet, and return the encoded :class:`bytes`. The result
can still contain ``=``. can still contain ``=``.
...@@ -97,7 +99,8 @@ The modern interface provides: ...@@ -97,7 +99,8 @@ The modern interface provides:
.. function:: urlsafe_b64decode(s) .. function:: urlsafe_b64decode(s)
Decode :term:`bytes-like object` or ASCII string *s* using a URL-safe Decode :term:`bytes-like object` or ASCII string *s*
using the URL- and filesystem-safe
alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of
``/`` in the standard Base64 alphabet, and return the decoded ``/`` in the standard Base64 alphabet, and return the decoded
:class:`bytes`. :class:`bytes`.
...@@ -145,14 +148,14 @@ The modern interface provides: ...@@ -145,14 +148,14 @@ The modern interface provides:
lowercase alphabet is acceptable as input. For security purposes, the default lowercase alphabet is acceptable as input. For security purposes, the default
is ``False``. is ``False``.
A :exc:`TypeError` is raised if *s* is A :exc:`binascii.Error` is raised if *s* is
incorrectly padded or if there are non-alphabet characters present in the incorrectly padded or if there are non-alphabet characters present in the
input. input.
.. function:: a85encode(s, *, foldspaces=False, wrapcol=0, pad=False, adobe=False) .. function:: a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False)
Encode the :term:`bytes-like object` *s* using Ascii85 and return the Encode the :term:`bytes-like object` *b* using Ascii85 and return the
encoded :class:`bytes`. encoded :class:`bytes`.
*foldspaces* is an optional flag that uses the special short sequence 'y' *foldspaces* is an optional flag that uses the special short sequence 'y'
...@@ -172,9 +175,9 @@ The modern interface provides: ...@@ -172,9 +175,9 @@ The modern interface provides:
.. versionadded:: 3.4 .. versionadded:: 3.4
.. function:: a85decode(s, *, foldspaces=False, adobe=False, ignorechars=b' \\t\\n\\r\\v') .. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \\t\\n\\r\\v')
Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *s* and Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *b* and
return the decoded :class:`bytes`. return the decoded :class:`bytes`.
*foldspaces* is a flag that specifies whether the 'y' short sequence *foldspaces* is a flag that specifies whether the 'y' short sequence
...@@ -192,9 +195,9 @@ The modern interface provides: ...@@ -192,9 +195,9 @@ The modern interface provides:
.. versionadded:: 3.4 .. versionadded:: 3.4
.. function:: b85encode(s, pad=False) .. function:: b85encode(b, pad=False)
Encode the :term:`bytes-like object` *s* using base85 (as used in e.g. Encode the :term:`bytes-like object` *b* using base85 (as used in e.g.
git-style binary diffs) and return the encoded :class:`bytes`. git-style binary diffs) and return the encoded :class:`bytes`.
If *pad* is true, the input is padded with ``b'\0'`` so its length is a If *pad* is true, the input is padded with ``b'\0'`` so its length is a
......
...@@ -12,7 +12,7 @@ import binascii ...@@ -12,7 +12,7 @@ import binascii
__all__ = [ __all__ = [
# Legacy interface exports traditional RFC 1521 Base64 encodings # Legacy interface exports traditional RFC 2045 Base64 encodings
'encode', 'decode', 'encodebytes', 'decodebytes', 'encode', 'decode', 'encodebytes', 'decodebytes',
# Generalized interface for other encodings # Generalized interface for other encodings
'b64encode', 'b64decode', 'b32encode', 'b32decode', 'b64encode', 'b64decode', 'b32encode', 'b32decode',
...@@ -49,14 +49,11 @@ def _bytes_from_decode_data(s): ...@@ -49,14 +49,11 @@ def _bytes_from_decode_data(s):
# Base64 encoding/decoding uses binascii # Base64 encoding/decoding uses binascii
def b64encode(s, altchars=None): def b64encode(s, altchars=None):
"""Encode a byte string using Base64. """Encode the bytes-like object s using Base64 and return a bytes object.
s is the byte string to encode. Optional altchars must be a byte Optional altchars should be a byte string of length 2 which specifies an
string of length 2 which specifies an alternative alphabet for the alternative alphabet for the '+' and '/' characters. This allows an
'+' and '/' characters. This allows an application to application to e.g. generate url or filesystem safe Base64 strings.
e.g. generate url or filesystem safe Base64 strings.
The encoded byte string is returned.
""" """
encoded = binascii.b2a_base64(s, newline=False) encoded = binascii.b2a_base64(s, newline=False)
if altchars is not None: if altchars is not None:
...@@ -66,18 +63,19 @@ def b64encode(s, altchars=None): ...@@ -66,18 +63,19 @@ def b64encode(s, altchars=None):
def b64decode(s, altchars=None, validate=False): def b64decode(s, altchars=None, validate=False):
"""Decode a Base64 encoded byte string. """Decode the Base64 encoded bytes-like object or ASCII string s.
s is the byte string to decode. Optional altchars must be a Optional altchars must be a bytes-like object or ASCII string of length 2
string of length 2 which specifies the alternative alphabet used which specifies the alternative alphabet used instead of the '+' and '/'
instead of the '+' and '/' characters. characters.
The decoded string is returned. A binascii.Error is raised if s is The result is returned as a bytes object. A binascii.Error is raised if
incorrectly padded. s is incorrectly padded.
If validate is False (the default), non-base64-alphabet characters are If validate is False (the default), characters that are neither in the
discarded prior to the padding check. If validate is True, normal base-64 alphabet nor the alternative alphabet are discarded prior
non-base64-alphabet characters in the input result in a binascii.Error. to the padding check. If validate is True, these non-alphabet characters
in the input result in a binascii.Error.
""" """
s = _bytes_from_decode_data(s) s = _bytes_from_decode_data(s)
if altchars is not None: if altchars is not None:
...@@ -90,19 +88,19 @@ def b64decode(s, altchars=None, validate=False): ...@@ -90,19 +88,19 @@ def b64decode(s, altchars=None, validate=False):
def standard_b64encode(s): def standard_b64encode(s):
"""Encode a byte string using the standard Base64 alphabet. """Encode bytes-like object s using the standard Base64 alphabet.
s is the byte string to encode. The encoded byte string is returned. The result is returned as a bytes object.
""" """
return b64encode(s) return b64encode(s)
def standard_b64decode(s): def standard_b64decode(s):
"""Decode a byte string encoded with the standard Base64 alphabet. """Decode bytes encoded with the standard Base64 alphabet.
s is the byte string to decode. The decoded byte string is Argument s is a bytes-like object or ASCII string to decode. The result
returned. binascii.Error is raised if the input is incorrectly is returned as a bytes object. A binascii.Error is raised if the input
padded or if there are non-alphabet characters present in the is incorrectly padded. Characters that are not in the standard alphabet
input. are discarded prior to the padding check.
""" """
return b64decode(s) return b64decode(s)
...@@ -111,21 +109,22 @@ _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') ...@@ -111,21 +109,22 @@ _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') _urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
def urlsafe_b64encode(s): def urlsafe_b64encode(s):
"""Encode a byte string using a url-safe Base64 alphabet. """Encode bytes using the URL- and filesystem-safe Base64 alphabet.
s is the byte string to encode. The encoded byte string is Argument s is a bytes-like object to encode. The result is returned as a
returned. The alphabet uses '-' instead of '+' and '_' instead of bytes object. The alphabet uses '-' instead of '+' and '_' instead of
'/'. '/'.
""" """
return b64encode(s).translate(_urlsafe_encode_translation) return b64encode(s).translate(_urlsafe_encode_translation)
def urlsafe_b64decode(s): def urlsafe_b64decode(s):
"""Decode a byte string encoded with the standard Base64 alphabet. """Decode bytes using the URL- and filesystem-safe Base64 alphabet.
s is the byte string to decode. The decoded byte string is Argument s is a bytes-like object or ASCII string to decode. The result
returned. binascii.Error is raised if the input is incorrectly is returned as a bytes object. A binascii.Error is raised if the input
padded or if there are non-alphabet characters present in the is incorrectly padded. Characters that are not in the URL-safe base-64
input. alphabet, and are not a plus '+' or slash '/', are discarded prior to the
padding check.
The alphabet uses '-' instead of '+' and '_' instead of '/'. The alphabet uses '-' instead of '+' and '_' instead of '/'.
""" """
...@@ -141,9 +140,7 @@ _b32tab2 = None ...@@ -141,9 +140,7 @@ _b32tab2 = None
_b32rev = None _b32rev = None
def b32encode(s): def b32encode(s):
"""Encode a byte string using Base32. """Encode the bytes-like object s using Base32 and return a bytes object.
s is the byte string to encode. The encoded byte string is returned.
""" """
global _b32tab2 global _b32tab2
# Delay the initialization of the table to not waste memory # Delay the initialization of the table to not waste memory
...@@ -181,11 +178,10 @@ def b32encode(s): ...@@ -181,11 +178,10 @@ def b32encode(s):
return bytes(encoded) return bytes(encoded)
def b32decode(s, casefold=False, map01=None): def b32decode(s, casefold=False, map01=None):
"""Decode a Base32 encoded byte string. """Decode the Base32 encoded bytes-like object or ASCII string s.
s is the byte string to decode. Optional casefold is a flag Optional casefold is a flag specifying whether a lowercase alphabet is
specifying whether a lowercase alphabet is acceptable as input. acceptable as input. For security purposes, the default is False.
For security purposes, the default is False.
RFC 3548 allows for optional mapping of the digit 0 (zero) to the RFC 3548 allows for optional mapping of the digit 0 (zero) to the
letter O (oh), and for optional mapping of the digit 1 (one) to letter O (oh), and for optional mapping of the digit 1 (one) to
...@@ -195,7 +191,7 @@ def b32decode(s, casefold=False, map01=None): ...@@ -195,7 +191,7 @@ def b32decode(s, casefold=False, map01=None):
the letter O). For security purposes the default is None, so that the letter O). For security purposes the default is None, so that
0 and 1 are not allowed in the input. 0 and 1 are not allowed in the input.
The decoded byte string is returned. binascii.Error is raised if The result is returned as a bytes object. A binascii.Error is raised if
the input is incorrectly padded or if there are non-alphabet the input is incorrectly padded or if there are non-alphabet
characters present in the input. characters present in the input.
""" """
...@@ -256,23 +252,20 @@ def b32decode(s, casefold=False, map01=None): ...@@ -256,23 +252,20 @@ def b32decode(s, casefold=False, map01=None):
# lowercase. The RFC also recommends against accepting input case # lowercase. The RFC also recommends against accepting input case
# insensitively. # insensitively.
def b16encode(s): def b16encode(s):
"""Encode a byte string using Base16. """Encode the bytes-like object s using Base16 and return a bytes object.
s is the byte string to encode. The encoded byte string is returned.
""" """
return binascii.hexlify(s).upper() return binascii.hexlify(s).upper()
def b16decode(s, casefold=False): def b16decode(s, casefold=False):
"""Decode a Base16 encoded byte string. """Decode the Base16 encoded bytes-like object or ASCII string s.
s is the byte string to decode. Optional casefold is a flag Optional casefold is a flag specifying whether a lowercase alphabet is
specifying whether a lowercase alphabet is acceptable as input. acceptable as input. For security purposes, the default is False.
For security purposes, the default is False.
The decoded byte string is returned. binascii.Error is raised if The result is returned as a bytes object. A binascii.Error is raised if
s were incorrectly padded or if there are non-alphabet characters s is incorrectly padded or if there are non-alphabet characters present
present in the string. in the input.
""" """
s = _bytes_from_decode_data(s) s = _bytes_from_decode_data(s)
if casefold: if casefold:
...@@ -315,19 +308,17 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): ...@@ -315,19 +308,17 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
return b''.join(chunks) return b''.join(chunks)
def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
"""Encode a byte string using Ascii85. """Encode bytes-like object b using Ascii85 and return a bytes object.
b is the byte string to encode. The encoded byte string is returned.
foldspaces is an optional flag that uses the special short sequence 'y' foldspaces is an optional flag that uses the special short sequence 'y'
instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
feature is not supported by the "standard" Adobe encoding. feature is not supported by the "standard" Adobe encoding.
wrapcol controls whether the output should have newline ('\\n') characters wrapcol controls whether the output should have newline (b'\\n') characters
added to it. If this is non-zero, each output line will be at most this added to it. If this is non-zero, each output line will be at most this
many characters long. many characters long.
pad controls whether the input string is padded to a multiple of 4 before pad controls whether the input is padded to a multiple of 4 before
encoding. Note that the btoa implementation always pads. encoding. Note that the btoa implementation always pads.
adobe controls whether the encoded byte sequence is framed with <~ and ~>, adobe controls whether the encoded byte sequence is framed with <~ and ~>,
...@@ -358,9 +349,7 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): ...@@ -358,9 +349,7 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
return result return result
def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
"""Decode an Ascii85 encoded byte string. """Decode the Ascii85 encoded bytes-like object or ASCII string b.
s is the byte string to decode.
foldspaces is a flag that specifies whether the 'y' short sequence should be foldspaces is a flag that specifies whether the 'y' short sequence should be
accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
...@@ -372,6 +361,8 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): ...@@ -372,6 +361,8 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
ignorechars should be a byte string containing characters to ignore from the ignorechars should be a byte string containing characters to ignore from the
input. This should only contain whitespace characters, and by default input. This should only contain whitespace characters, and by default
contains all whitespace characters in ASCII. contains all whitespace characters in ASCII.
The result is returned as a bytes object.
""" """
b = _bytes_from_decode_data(b) b = _bytes_from_decode_data(b)
if adobe: if adobe:
...@@ -431,10 +422,10 @@ _b85chars2 = None ...@@ -431,10 +422,10 @@ _b85chars2 = None
_b85dec = None _b85dec = None
def b85encode(b, pad=False): def b85encode(b, pad=False):
"""Encode an ASCII-encoded byte array in base85 format. """Encode bytes-like object b in base85 format and return a bytes object.
If pad is true, the input is padded with "\\0" so its length is a multiple of If pad is true, the input is padded with b'\\0' so its length is a multiple of
4 characters before encoding. 4 bytes before encoding.
""" """
global _b85chars, _b85chars2 global _b85chars, _b85chars2
# Delay the initialization of tables to not waste memory # Delay the initialization of tables to not waste memory
...@@ -445,7 +436,10 @@ def b85encode(b, pad=False): ...@@ -445,7 +436,10 @@ def b85encode(b, pad=False):
return _85encode(b, _b85chars, _b85chars2, pad) return _85encode(b, _b85chars, _b85chars2, pad)
def b85decode(b): def b85decode(b):
"""Decode base85-encoded byte array""" """Decode the base85-encoded bytes-like object or ASCII string b
The result is returned as a bytes object.
"""
global _b85dec global _b85dec
# Delay the initialization of tables to not waste memory # Delay the initialization of tables to not waste memory
# if the function is never called # if the function is never called
...@@ -530,7 +524,7 @@ def _input_type_check(s): ...@@ -530,7 +524,7 @@ def _input_type_check(s):
def encodebytes(s): def encodebytes(s):
"""Encode a bytestring into a bytestring containing multiple lines """Encode a bytestring into a bytes object containing multiple lines
of base-64 data.""" of base-64 data."""
_input_type_check(s) _input_type_check(s)
pieces = [] pieces = []
...@@ -548,7 +542,7 @@ def encodestring(s): ...@@ -548,7 +542,7 @@ def encodestring(s):
def decodebytes(s): def decodebytes(s):
"""Decode a bytestring of base-64 data into a bytestring.""" """Decode a bytestring of base-64 data into a bytes object."""
_input_type_check(s) _input_type_check(s)
return binascii.a2b_base64(s) return binascii.a2b_base64(s)
......
...@@ -243,14 +243,26 @@ class BaseXYTestCase(unittest.TestCase): ...@@ -243,14 +243,26 @@ class BaseXYTestCase(unittest.TestCase):
(b'@@', b''), (b'@@', b''),
(b'!', b''), (b'!', b''),
(b'YWJj\nYWI=', b'abcab')) (b'YWJj\nYWI=', b'abcab'))
funcs = (
base64.b64decode,
base64.standard_b64decode,
base64.urlsafe_b64decode,
)
for bstr, res in tests: for bstr, res in tests:
self.assertEqual(base64.b64decode(bstr), res) for func in funcs:
self.assertEqual(base64.b64decode(bstr.decode('ascii')), res) with self.subTest(bstr=bstr, func=func):
self.assertEqual(func(bstr), res)
self.assertEqual(func(bstr.decode('ascii')), res)
with self.assertRaises(binascii.Error): with self.assertRaises(binascii.Error):
base64.b64decode(bstr, validate=True) base64.b64decode(bstr, validate=True)
with self.assertRaises(binascii.Error): with self.assertRaises(binascii.Error):
base64.b64decode(bstr.decode('ascii'), validate=True) base64.b64decode(bstr.decode('ascii'), validate=True)
# Normal alphabet characters not discarded when alternative given
res = b'\xFB\xEF\xBE\xFF\xFF\xFF'
self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res)
self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res)
def test_b32encode(self): def test_b32encode(self):
eq = self.assertEqual eq = self.assertEqual
eq(base64.b32encode(b''), b'') eq(base64.b32encode(b''), b'')
...@@ -360,6 +372,10 @@ class BaseXYTestCase(unittest.TestCase): ...@@ -360,6 +372,10 @@ class BaseXYTestCase(unittest.TestCase):
b'\x01\x02\xab\xcd\xef') b'\x01\x02\xab\xcd\xef')
eq(base64.b16decode(array('B', b"0102abcdef"), True), eq(base64.b16decode(array('B', b"0102abcdef"), True),
b'\x01\x02\xab\xcd\xef') b'\x01\x02\xab\xcd\xef')
# Non-alphabet characters
self.assertRaises(binascii.Error, base64.b16decode, '0102AG')
# Incorrect "padding"
self.assertRaises(binascii.Error, base64.b16decode, '010')
def test_a85encode(self): def test_a85encode(self):
eq = self.assertEqual eq = self.assertEqual
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment