Issue #26331: Implement the parsing part of PEP 515.

Thanks to Georg Brandl for the patch.

Issue #26331: Implement the parsing part of PEP 515.
Thanks to Georg Brandl for the patch.
a721abac · Brett Cannon · ee73a657 · a721abac · a721abac · a721abac
Commit a721abac authored Sep 09, 2016 by Brett Cannon
22 changed files
--- a/Doc/library/decimal.rst
+++ b/Doc/library/decimal.rst
@@ -345,7 +345,7 @@ Decimal objects
   *value* can be an integer, string, tuple, :class:`float`, or another :class:`Decimal`
   object. If no *value* is given, returns ``Decimal('0')``.  If *value* is a
   string, it should conform to the decimal numeric string syntax after leading
-   and trailing whitespace characters are removed::
+   and trailing whitespace characters, as well as underscores throughout, are removed::

      sign           ::=  '+' | '-'
      digit          ::=  '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
@@ -394,6 +394,10 @@ Decimal objects
      :class:`float` arguments raise an exception if the :exc:`FloatOperation`
      trap is set. By default the trap is off.

+   .. versionchanged:: 3.6
+      Underscores are allowed for grouping, as with integral and floating-point
+      literals in code.
+
   Decimal floating point objects share many properties with the other built-in
   numeric types such as :class:`float` and :class:`int`.  All of the usual math
   operations and special methods apply.  Likewise, decimal objects can be
@@ -1075,8 +1079,8 @@ In addition to the three supplied contexts, new contexts can be created with the
         Decimal('4.44')

      This method implements the to-number operation of the IBM specification.
-      If the argument is a string, no leading or trailing whitespace is
-      permitted.
+      If the argument is a string, no leading or trailing whitespace or
+      underscores are permitted.

   .. method:: create_decimal_from_float(f)


--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -271,6 +271,9 @@ are always available.  They are listed here in alphabetical order.

   The complex type is described in :ref:`typesnumeric`.

+   .. versionchanged:: 3.6
+      Grouping digits with underscores as in code literals is allowed.
+

 .. function:: delattr(object, name)

@@ -531,11 +534,14 @@ are always available.  They are listed here in alphabetical order.

   The float type is described in :ref:`typesnumeric`.

-   .. index::
-      single: __format__
-      single: string; format() (built-in function)
+   .. versionchanged:: 3.6
+      Grouping digits with underscores as in code literals is allowed.


+.. index::
+   single: __format__
+   single: string; format() (built-in function)
+
 .. function:: format(value[, format_spec])

   Convert a *value* to a "formatted" representation, as controlled by
@@ -702,6 +708,10 @@ are always available.  They are listed here in alphabetical order.
      :meth:`base.__int__ <object.__int__>` instead of :meth:`base.__index__
      <object.__index__>`.

+   .. versionchanged:: 3.6
+      Grouping digits with underscores as in code literals is allowed.
+
+
 .. function:: isinstance(object, classinfo)

   Return true if the *object* argument is an instance of the *classinfo*

--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -721,20 +721,24 @@ Integer literals
 Integer literals are described by the following lexical definitions:

 .. productionlist::
-   integer: `decimalinteger` | `octinteger` | `hexinteger` | `bininteger`
-   decimalinteger: `nonzerodigit` `digit`* | "0"+
+   integer: `decinteger` | `bininteger` | `octinteger` | `hexinteger`
+   decinteger: `nonzerodigit` (["_"] `digit`)* | "0"+ (["_"] "0")*
+   bininteger: "0" ("b" | "B") (["_"] `bindigit`)+
+   octinteger: "0" ("o" | "O") (["_"] `octdigit`)+
+   hexinteger: "0" ("x" | "X") (["_"] `hexdigit`)+
   nonzerodigit: "1"..."9"
   digit: "0"..."9"
-   octinteger: "0" ("o" | "O") `octdigit`+
-   hexinteger: "0" ("x" | "X") `hexdigit`+
-   bininteger: "0" ("b" | "B") `bindigit`+
+   bindigit: "0" | "1"
   octdigit: "0"..."7"
   hexdigit: `digit` | "a"..."f" | "A"..."F"
-   bindigit: "0" | "1"

 There is no limit for the length of integer literals apart from what can be
 stored in available memory.

+Underscores are ignored for determining the numeric value of the literal.  They
+can be used to group digits for enhanced readability.  One underscore can occur
+between digits, and after base specifiers like ``0x``.
+
 Note that leading zeros in a non-zero decimal number are not allowed. This is
 for disambiguation with C-style octal literals, which Python used before version
 3.0.
@@ -743,6 +747,10 @@ Some examples of integer literals::

   7     2147483647                        0o177    0b100110111
   3     79228162514264337593543950336     0o377    0xdeadbeef
+         100_000_000_000                   0b_1110_0101
+
+.. versionchanged:: 3.6
+   Underscores are now allowed for grouping purposes in literals.


 .. _floating:
@@ -754,23 +762,28 @@ Floating point literals are described by the following lexical definitions:

 .. productionlist::
   floatnumber: `pointfloat` | `exponentfloat`
-   pointfloat: [`intpart`] `fraction` | `intpart` "."
-   exponentfloat: (`intpart` | `pointfloat`) `exponent`
-   intpart: `digit`+
-   fraction: "." `digit`+
-   exponent: ("e" | "E") ["+" | "-"] `digit`+
+   pointfloat: [`digitpart`] `fraction` | `digitpart` "."
+   exponentfloat: (`digitpart` | `pointfloat`) `exponent`
+   digitpart: `digit` (["_"] `digit`)*
+   fraction: "." `digitpart`
+   exponent: ("e" | "E") ["+" | "-"] `digitpart`

 Note that the integer and exponent parts are always interpreted using radix 10.
 For example, ``077e010`` is legal, and denotes the same number as ``77e10``. The
-allowed range of floating point literals is implementation-dependent. Some
-examples of floating point literals::
+allowed range of floating point literals is implementation-dependent.  As in
+integer literals, underscores are supported for digit grouping.
+
+Some examples of floating point literals::

-   3.14    10.    .001    1e100    3.14e-10    0e0
+   3.14    10.    .001    1e100    3.14e-10    0e0    3.14_15_93

 Note that numeric literals do not include a sign; a phrase like ``-1`` is
 actually an expression composed of the unary operator ``-`` and the literal
 ``1``.

+.. versionchanged:: 3.6
+   Underscores are now allowed for grouping purposes in literals.
+

 .. _imaginary:

@@ -780,7 +793,7 @@ Imaginary literals
 Imaginary literals are described by the following lexical definitions:

 .. productionlist::
-   imagnumber: (`floatnumber` | `intpart`) ("j" | "J")
+   imagnumber: (`floatnumber` | `digitpart`) ("j" | "J")

 An imaginary literal yields a complex number with a real part of 0.0.  Complex
 numbers are represented as a pair of floating point numbers and have the same
@@ -788,7 +801,7 @@ restrictions on their range.  To create a complex number with a nonzero real
 part, add a floating point number to it, e.g., ``(3+4j)``.  Some examples of
 imaginary literals::

-   3.14j   10.j    10j     .001j   1e100j  3.14e-10j
+   3.14j   10.j    10j     .001j   1e100j   3.14e-10j   3.14_15_93j


 .. _operators:

--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -124,6 +124,29 @@ Windows improvements:
 New Features
 ============

+.. _pep-515:
+
+PEP 515: Underscores in Numeric Literals
+========================================
+
+Prior to PEP 515, there was no support for writing long numeric
+literals with some form of separator to improve readability. For
+instance, how big is ``1000000000000000```? With :pep:`515`, though,
+you can use underscores to separate digits as desired to make numeric
+literals easier to read: ``1_000_000_000_000_000``. Underscores can be
+used with other numeric literals beyond integers, e.g.
+``0x_FF_FF_FF_FF``.
+
+Single underscores are allowed between digits and after any base
+specifier. More than a single underscore in a row, leading, or
+trailing underscores are not allowed.
+
+.. seealso::
+
+   :pep:`523` - Underscores in Numeric Literals
+   PEP written by Georg Brandl & Serhiy Storchaka.
+
+
 .. _pep-523:

 PEP 523: Adding a frame evaluation API to CPython

--- a/Include/pystrtod.h
+++ b/Include/pystrtod.h
@@ -19,6 +19,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
                                         int *type);

 #ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject *) _Py_string_to_number_with_underscores(
+    const char *str, Py_ssize_t len, const char *what, PyObject *obj, void *arg,
+    PyObject *(*innerfunc)(const char *, Py_ssize_t, void *));
+
 PyAPI_FUNC(double) _Py_parse_inf_or_nan(const char *p, char **endptr);
 #endif


--- a/Lib/_pydecimal.py
+++ b/Lib/_pydecimal.py
@@ -589,7 +589,7 @@ class Decimal(object):
        # From a string
        # REs insist on real strings, so we can too.
        if isinstance(value, str):
-            m = _parser(value.strip())
+            m = _parser(value.strip().replace("_", ""))
            if m is None:
                if context is None:
                    context = getcontext()
@@ -4125,7 +4125,7 @@ class Context(object):
        This will make it round up for that operation.
        """
        rounding = self.rounding
-        self.rounding= type
+        self.rounding = type
        return rounding

    def create_decimal(self, num='0'):
@@ -4134,10 +4134,10 @@ class Context(object):
        This method implements the to-number operation of the
        IBM Decimal specification."""

-        if isinstance(num, str) and num != num.strip():
+        if isinstance(num, str) and (num != num.strip() or '_' in num):
            return self._raise_error(ConversionSyntax,
-                                     "no trailing or leading whitespace is "
-                                     "permitted.")
+                                     "trailing or leading whitespace and "
+                                     "underscores are not permitted.")

        d = Decimal(num, context=self)
        if d._isnan() and len(d._int) > self.prec - self.clamp:

--- a/Lib/test/test_complex.py
+++ b/Lib/test/test_complex.py
 import unittest
 from test import support
+from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
+                               INVALID_UNDERSCORE_LITERALS)

 from random import random
 from math import atan2, isnan, copysign
@@ -377,6 +379,18 @@ class ComplexTest(unittest.TestCase):
        self.assertAlmostEqual(complex(complex1(1j)), 2j)
        self.assertRaises(TypeError, complex, complex2(1j))

+    def test_underscores(self):
+        # check underscores
+        for lit in VALID_UNDERSCORE_LITERALS:
+            if not any(ch in lit for ch in 'xXoObB'):
+                self.assertEqual(complex(lit), eval(lit))
+                self.assertEqual(complex(lit), complex(lit.replace('_', '')))
+        for lit in INVALID_UNDERSCORE_LITERALS:
+            if lit in ('0_7', '09_99'):  # octals are not recognized here
+                continue
+            if not any(ch in lit for ch in 'xXoObB'):
+                self.assertRaises(ValueError, complex, lit)
+
    def test_hash(self):
        for x in range(-30, 30):
            self.assertEqual(hash(x), hash(complex(x, 0)))

--- a/Lib/test/test_decimal.py
+++ b/Lib/test/test_decimal.py
@@ -554,6 +554,10 @@ class ExplicitConstructionTest(unittest.TestCase):
        self.assertEqual(str(Decimal('  -7.89')), '-7.89')
        self.assertEqual(str(Decimal("  3.45679  ")), '3.45679')

+        # underscores
+        self.assertEqual(str(Decimal('1_3.3e4_0')), '1.33E+41')
+        self.assertEqual(str(Decimal('1_0_0_0')), '1000')
+
        # unicode whitespace
        for lead in ["", ' ', '\u00a0', '\u205f']:
            for trail in ["", ' ', '\u00a0', '\u205f']:
@@ -578,6 +582,9 @@ class ExplicitConstructionTest(unittest.TestCase):
            # embedded NUL
            self.assertRaises(InvalidOperation, Decimal, "12\u00003")

+            # underscores don't prevent errors
+            self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003")
+
    @cpython_only
    def test_from_legacy_strings(self):
        import _testcapi
@@ -772,6 +779,9 @@ class ExplicitConstructionTest(unittest.TestCase):
        self.assertRaises(InvalidOperation, nc.create_decimal, "xyz")
        self.assertRaises(ValueError, nc.create_decimal, (1, "xyz", -25))
        self.assertRaises(TypeError, nc.create_decimal, "1234", "5678")
+        # no whitespace and underscore stripping is done with this method
+        self.assertRaises(InvalidOperation, nc.create_decimal, " 1234")
+        self.assertRaises(InvalidOperation, nc.create_decimal, "12_34")

        # too many NaN payload digits
        nc.prec = 3

--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
-
 import fractions
 import operator
 import os
@@ -9,6 +8,8 @@ import time
 import unittest

 from test import support
+from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
+                               INVALID_UNDERSCORE_LITERALS)
 from math import isinf, isnan, copysign, ldexp

 INF = float("inf")
@@ -60,6 +61,27 @@ class GeneralFloatCases(unittest.TestCase):
        float(b'.' + b'1'*1000)
        float('.' + '1'*1000)

+    def test_underscores(self):
+        for lit in VALID_UNDERSCORE_LITERALS:
+            if not any(ch in lit for ch in 'jJxXoObB'):
+                self.assertEqual(float(lit), eval(lit))
+                self.assertEqual(float(lit), float(lit.replace('_', '')))
+        for lit in INVALID_UNDERSCORE_LITERALS:
+            if lit in ('0_7', '09_99'):  # octals are not recognized here
+                continue
+            if not any(ch in lit for ch in 'jJxXoObB'):
+                self.assertRaises(ValueError, float, lit)
+        # Additional test cases; nan and inf are never valid as literals,
+        # only in the float() constructor, but we don't allow underscores
+        # in or around them.
+        self.assertRaises(ValueError, float, '_NaN')
+        self.assertRaises(ValueError, float, 'Na_N')
+        self.assertRaises(ValueError, float, 'IN_F')
+        self.assertRaises(ValueError, float, '-_INF')
+        self.assertRaises(ValueError, float, '-INF_')
+        # Check that we handle bytes values correctly.
+        self.assertRaises(ValueError, float, b'0_.\xff9')
+
    def test_non_numeric_input_types(self):
        # Test possible non-numeric types for the argument x, including
        # subclasses of the explicitly documented accepted types.

--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -16,6 +16,87 @@ from collections import ChainMap
 from test import ann_module2
 import test

+# These are shared with test_tokenize and other test modules.
+#
+# Note: since several test cases filter out floats by looking for "e" and ".",
+# don't add hexadecimal literals that contain "e" or "E".
+VALID_UNDERSCORE_LITERALS = [
+    '0_0_0',
+    '4_2',
+    '1_0000_0000',
+    '0b1001_0100',
+    '0xffff_ffff',
+    '0o5_7_7',
+    '1_00_00.5',
+    '1_00_00.5e5',
+    '1_00_00e5_1',
+    '1e1_0',
+    '.1_4',
+    '.1_4e1',
+    '0b_0',
+    '0x_f',
+    '0o_5',
+    '1_00_00j',
+    '1_00_00.5j',
+    '1_00_00e5_1j',
+    '.1_4j',
+    '(1_2.5+3_3j)',
+    '(.5_6j)',
+]
+INVALID_UNDERSCORE_LITERALS = [
+    # Trailing underscores:
+    '0_',
+    '42_',
+    '1.4j_',
+    '0x_',
+    '0b1_',
+    '0xf_',
+    '0o5_',
+    '0 if 1_Else 1',
+    # Underscores in the base selector:
+    '0_b0',
+    '0_xf',
+    '0_o5',
+    # Old-style octal, still disallowed:
+    '0_7',
+    '09_99',
+    # Multiple consecutive underscores:
+    '4_______2',
+    '0.1__4',
+    '0.1__4j',
+    '0b1001__0100',
+    '0xffff__ffff',
+    '0x___',
+    '0o5__77',
+    '1e1__0',
+    '1e1__0j',
+    # Underscore right before a dot:
+    '1_.4',
+    '1_.4j',
+    # Underscore right after a dot:
+    '1._4',
+    '1._4j',
+    '._5',
+    '._5j',
+    # Underscore right after a sign:
+    '1.0e+_1',
+    '1.0e+_1j',
+    # Underscore right before j:
+    '1.4_j',
+    '1.4e5_j',
+    # Underscore right before e:
+    '1_e1',
+    '1.4_e1',
+    '1.4_e1j',
+    # Underscore right after e:
+    '1e_1',
+    '1.4e_1',
+    '1.4e_1j',
+    # Complex cases with parens:
+    '(1+1.5_j_)',
+    '(1+1.5_j)',
+]
+

 class TokenTests(unittest.TestCase):

@@ -95,6 +176,14 @@ class TokenTests(unittest.TestCase):
        self.assertEqual(1 if 0else 0, 0)
        self.assertRaises(SyntaxError, eval, "0 if 1Else 0")

+    def test_underscore_literals(self):
+        for lit in VALID_UNDERSCORE_LITERALS:
+            self.assertEqual(eval(lit), eval(lit.replace('_', '')))
+        for lit in INVALID_UNDERSCORE_LITERALS:
+            self.assertRaises(SyntaxError, eval, lit)
+        # Sanity check: no literal begins with an underscore
+        self.assertRaises(NameError, eval, "_0")
+
    def test_string_literals(self):
        x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y)
        x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)

--- a/Lib/test/test_int.py
+++ b/Lib/test/test_int.py
@@ -2,6 +2,8 @@ import sys

 import unittest
 from test import support
+from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
+                               INVALID_UNDERSCORE_LITERALS)

 L = [
        ('0', 0),
@@ -212,6 +214,25 @@ class IntTestCases(unittest.TestCase):
        self.assertEqual(int('2br45qc', 35), 4294967297)
        self.assertEqual(int('1z141z5', 36), 4294967297)

+    def test_underscores(self):
+        for lit in VALID_UNDERSCORE_LITERALS:
+            if any(ch in lit for ch in '.eEjJ'):
+                continue
+            self.assertEqual(int(lit, 0), eval(lit))
+            self.assertEqual(int(lit, 0), int(lit.replace('_', ''), 0))
+        for lit in INVALID_UNDERSCORE_LITERALS:
+            if any(ch in lit for ch in '.eEjJ'):
+                continue
+            self.assertRaises(ValueError, int, lit, 0)
+        # Additional test cases with bases != 0, only for the constructor:
+        self.assertEqual(int("1_00", 3), 9)
+        self.assertEqual(int("0_100"), 100)  # not valid as a literal!
+        self.assertEqual(int(b"1_00"), 100)  # byte underscore
+        self.assertRaises(ValueError, int, "_100")
+        self.assertRaises(ValueError, int, "+_100")
+        self.assertRaises(ValueError, int, "1__00")
+        self.assertRaises(ValueError, int, "100_")
+
    @support.cpython_only
    def test_small_ints(self):
        # Bug #3236: Return small longs from PyLong_FromString

--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -3,7 +3,9 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
                     STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
                     open as tokenize_open, Untokenizer)
 from io import BytesIO
-from unittest import TestCase, mock, main
+from unittest import TestCase, mock
+from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
+                               INVALID_UNDERSCORE_LITERALS)
 import os
 import token

@@ -185,6 +187,21 @@ def k(x):
    NUMBER     '3.14e159'    (1, 4) (1, 12)
    """)

+    def test_underscore_literals(self):
+        def number_token(s):
+            f = BytesIO(s.encode('utf-8'))
+            for toktype, token, start, end, line in tokenize(f.readline):
+                if toktype == NUMBER:
+                    return token
+            return 'invalid token'
+        for lit in VALID_UNDERSCORE_LITERALS:
+            if '(' in lit:
+                # this won't work with compound complex inputs
+                continue
+            self.assertEqual(number_token(lit), lit)
+        for lit in INVALID_UNDERSCORE_LITERALS:
+            self.assertNotEqual(number_token(lit), lit)
+
    def test_string(self):
        # String literals
        self.check_tokenize("x = ''; y = \"\"", """\
@@ -1529,11 +1546,10 @@ class TestRoundtrip(TestCase):
        tempdir = os.path.dirname(fn) or os.curdir
        testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

-        # Tokenize is broken on test_unicode_identifiers.py because regular
-        # expressions are broken on the obscure unicode identifiers in it.
-        # *sigh* With roundtrip extended to test the 5-tuple mode of
-        # untokenize, 7 more testfiles fail.  Remove them also until the
-        # failure is diagnosed.
+        # Tokenize is broken on test_pep3131.py because regular expressions are
+        # broken on the obscure unicode identifiers in it. *sigh*
+        # With roundtrip extended to test the 5-tuple mode of untokenize,
+        # 7 more testfiles fail.  Remove them also until the failure is diagnosed.

        testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
        for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
@@ -1565,4 +1581,4 @@ class TestRoundtrip(TestCase):


 if __name__ == "__main__":
-    main()
+    unittest.main()
--- a/Lib/test/test_types.py
+++ b/Lib/test/test_types.py
@@ -48,6 +48,7 @@ class TypesTests(unittest.TestCase):
    def test_float_constructor(self):
        self.assertRaises(ValueError, float, '')
        self.assertRaises(ValueError, float, '5\0')
+        self.assertRaises(ValueError, float, '5_5\0')

    def test_zero_division(self):
        try: 5.0 / 0.0

--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -120,16 +120,17 @@ Comment = r'#[^\r\n]*'
 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
 Name = r'\w+'

-Hexnumber = r'0[xX][0-9a-fA-F]+'
-Binnumber = r'0[bB][01]+'
-Octnumber = r'0[oO][0-7]+'
-Decnumber = r'(?:0+|[1-9][0-9]*)'
+Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
+Binnumber = r'0[bB](?:_?[01])+'
+Octnumber = r'0[oO](?:_?[0-7])+'
+Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
-Exponent = r'[eE][-+]?[0-9]+'
-Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
-Expfloat = r'[0-9]+' + Exponent
+Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
+Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
+                   r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
+Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
 Floatnumber = group(Pointfloat, Expfloat)
-Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
+Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
 Number = group(Imagnumber, Floatnumber, Intnumber)

 # Return the empty string, plus all of the valid string prefixes.

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -17,6 +17,8 @@ Core and Builtins
  efficient bytecode. Patch by Demur Rumed, design by Serhiy Storchaka,
  reviewed by Serhiy Storchaka and Victor Stinner.

+- Issue #26331: Implement tokenizing support for PEP 515. Patch by Georg Brandl.
+
 - Issue #27999: Make "global after use" a SyntaxError, and ditto for nonlocal.
  Patch by Ivan Levkivskyi.

@@ -2678,7 +2680,7 @@ Library
 - Issue #24774: Fix docstring in http.server.test. Patch from Chiu-Hsiang Hsu.

 - Issue #21159: Improve message in configparser.InterpolationMissingOptionError.
-  Patch from Å?ukasz Langa.
+  Patch from �?ukasz Langa.

 - Issue #20362: Honour TestCase.longMessage correctly in assertRegex.
  Patch from Ilia Kurenkov.
@@ -4606,7 +4608,7 @@ Library
  Based on patch by Martin Panter.

 - Issue #17293: uuid.getnode() now determines MAC address on AIX using netstat.
-  Based on patch by Aivars KalvÄ?ns.
+  Based on patch by Aivars Kalv�?ns.

 - Issue #22769: Fixed ttk.Treeview.tag_has() when called without arguments.


--- a/Modules/_decimal/_decimal.c
+++ b/Modules/_decimal/_decimal.c
@@ -1889,12 +1889,13 @@ is_space(enum PyUnicode_Kind kind, void *data, Py_ssize_t pos)
 /* Return the ASCII representation of a numeric Unicode string. The numeric
   string may contain ascii characters in the range [1, 127], any Unicode
   space and any unicode digit. If strip_ws is true, leading and trailing
-   whitespace is stripped.
+   whitespace is stripped. If ignore_underscores is true, underscores are
+   ignored.

   Return NULL if malloc fails and an empty string if invalid characters
   are found. */
 static char *
-numeric_as_ascii(const PyObject *u, int strip_ws)
+numeric_as_ascii(const PyObject *u, int strip_ws, int ignore_underscores)
 {
    enum PyUnicode_Kind kind;
    void *data;
@@ -1929,6 +1930,9 @@ numeric_as_ascii(const PyObject *u, int strip_ws)

    for (; j < len; j++) {
        ch = PyUnicode_READ(kind, data, j);
+        if (ignore_underscores && ch == '_') {
+            continue;
+        }
        if (0 < ch && ch <= 127) {
            *cp++ = ch;
            continue;
@@ -2011,7 +2015,7 @@ PyDecType_FromUnicode(PyTypeObject *type, const PyObject *u,
    PyObject *dec;
    char *s;

-    s = numeric_as_ascii(u, 0);
+    s = numeric_as_ascii(u, 0, 0);
    if (s == NULL) {
        return NULL;
    }
@@ -2031,7 +2035,7 @@ PyDecType_FromUnicodeExactWS(PyTypeObject *type, const PyObject *u,
    PyObject *dec;
    char *s;

-    s = numeric_as_ascii(u, 1);
+    s = numeric_as_ascii(u, 1, 1);
    if (s == NULL) {
        return NULL;
    }

--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -759,29 +759,12 @@ static PyMemberDef complex_members[] = {
 };

 static PyObject *
-complex_subtype_from_string(PyTypeObject *type, PyObject *v)
+complex_from_string_inner(const char *s, Py_ssize_t len, void *type)
 {
-    const char *s, *start;
-    char *end;
    double x=0.0, y=0.0, z;
    int got_bracket=0;
-    PyObject *s_buffer = NULL;
-    Py_ssize_t len;
-
-    if (PyUnicode_Check(v)) {
-        s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
-        if (s_buffer == NULL)
-            return NULL;
-        s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
-        if (s == NULL)
-            goto error;
-    }
-    else {
-        PyErr_Format(PyExc_TypeError,
-            "complex() argument must be a string or a number, not '%.200s'",
-            Py_TYPE(v)->tp_name);
-        return NULL;
-    }
+    const char *start;
+    char *end;

    /* position on first nonblank */
    start = s;
@@ -822,7 +805,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
        if (PyErr_ExceptionMatches(PyExc_ValueError))
            PyErr_Clear();
        else
-            goto error;
+            return NULL;
    }
    if (end != s) {
        /* all 4 forms starting with <float> land here */
@@ -835,7 +818,7 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
                if (PyErr_ExceptionMatches(PyExc_ValueError))
                    PyErr_Clear();
                else
-                    goto error;
+                    return NULL;
            }
            if (end != s)
                /* <float><signed-float>j */
@@ -890,17 +873,45 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
    if (s-start != len)
        goto parse_error;

-    Py_XDECREF(s_buffer);
-    return complex_subtype_from_doubles(type, x, y);
+    return complex_subtype_from_doubles((PyTypeObject *)type, x, y);

  parse_error:
    PyErr_SetString(PyExc_ValueError,
                    "complex() arg is a malformed string");
-  error:
-    Py_XDECREF(s_buffer);
    return NULL;
 }

+static PyObject *
+complex_subtype_from_string(PyTypeObject *type, PyObject *v)
+{
+    const char *s;
+    PyObject *s_buffer = NULL, *result = NULL;
+    Py_ssize_t len;
+
+    if (PyUnicode_Check(v)) {
+        s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
+        if (s_buffer == NULL) {
+            return NULL;
+        }
+        s = PyUnicode_AsUTF8AndSize(s_buffer, &len);
+        if (s == NULL) {
+            goto exit;
+        }
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+            "complex() argument must be a string or a number, not '%.200s'",
+            Py_TYPE(v)->tp_name);
+        return NULL;
+    }
+
+    result = _Py_string_to_number_with_underscores(s, len, "complex", v, type,
+                                                   complex_from_string_inner);
+  exit:
+    Py_DECREF(s_buffer);
+    return result;
+}
+
 static PyObject *
 complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {

--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -124,11 +124,43 @@ PyFloat_FromDouble(double fval)
    return (PyObject *) op;
 }

+static PyObject *
+float_from_string_inner(const char *s, Py_ssize_t len, void *obj)
+{
+    double x;
+    const char *end;
+    const char *last = s + len;
+    /* strip space */
+    while (s < last && Py_ISSPACE(*s)) {
+        s++;
+    }
+
+    while (s < last - 1 && Py_ISSPACE(last[-1])) {
+        last--;
+    }
+
+    /* We don't care about overflow or underflow.  If the platform
+     * supports them, infinities and signed zeroes (on underflow) are
+     * fine. */
+    x = PyOS_string_to_double(s, (char **)&end, NULL);
+    if (end != last) {
+        PyErr_Format(PyExc_ValueError,
+                     "could not convert string to float: "
+                     "%R", obj);
+        return NULL;
+    }
+    else if (x == -1.0 && PyErr_Occurred()) {
+        return NULL;
+    }
+    else {
+        return PyFloat_FromDouble(x);
+    }
+}
+
 PyObject *
 PyFloat_FromString(PyObject *v)
 {
-    const char *s, *last, *end;
-    double x;
+    const char *s;
    PyObject *s_buffer = NULL;
    Py_ssize_t len;
    Py_buffer view = {NULL, NULL};
@@ -169,27 +201,8 @@ PyFloat_FromString(PyObject *v)
            Py_TYPE(v)->tp_name);
        return NULL;
    }
-    last = s + len;
-    /* strip space */
-    while (s < last && Py_ISSPACE(*s))
-        s++;
-    while (s < last - 1 && Py_ISSPACE(last[-1]))
-        last--;
-    /* We don't care about overflow or underflow.  If the platform
-     * supports them, infinities and signed zeroes (on underflow) are
-     * fine. */
-    x = PyOS_string_to_double(s, (char **)&end, NULL);
-    if (end != last) {
-        PyErr_Format(PyExc_ValueError,
-                     "could not convert string to float: "
-                     "%R", v);
-        result = NULL;
-    }
-    else if (x == -1.0 && PyErr_Occurred())
-        result = NULL;
-    else
-        result = PyFloat_FromDouble(x);
-
+    result = _Py_string_to_number_with_underscores(s, len, "float", v, v,
+                                                   float_from_string_inner);
    PyBuffer_Release(&view);
    Py_XDECREF(s_buffer);
    return result;

--- a/Objects/longobject.c
+++ b/Objects/longobject.c
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -4018,7 +4018,7 @@ ast_for_stmt(struct compiling *c, const node *n)
 }

 static PyObject *
-parsenumber(struct compiling *c, const char *s)
+parsenumber_raw(struct compiling *c, const char *s)
 {
    const char *end;
    long x;
@@ -4060,6 +4060,31 @@ parsenumber(struct compiling *c, const char *s)
    }
 }

+static PyObject *
+parsenumber(struct compiling *c, const char *s)
+{
+    char *dup, *end;
+    PyObject *res = NULL;
+
+    assert(s != NULL);
+
+    if (strchr(s, '_') == NULL) {
+        return parsenumber_raw(c, s);
+    }
+    /* Create a duplicate without underscores. */
+    dup = PyMem_Malloc(strlen(s) + 1);
+    end = dup;
+    for (; *s; s++) {
+        if (*s != '_') {
+            *end++ = *s;
+        }
+    }
+    *end = '\0';
+    res = parsenumber_raw(c, dup);
+    PyMem_Free(dup);
+    return res;
+}
+
 static PyObject *
 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
 {

--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -370,6 +370,72 @@ PyOS_string_to_double(const char *s,
    return result;
 }

+/* Remove underscores that follow the underscore placement rule from
+   the string and then call the `innerfunc` function on the result.
+   It should return a new object or NULL on exception.
+
+   `what` is used for the error message emitted when underscores are detected
+   that don't follow the rule. `arg` is an opaque pointer passed to the inner
+   function.
+
+   This is used to implement underscore-agnostic conversion for floats
+   and complex numbers.
+*/
+PyObject *
+_Py_string_to_number_with_underscores(
+    const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
+    PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
+{
+    char prev;
+    const char *p, *last;
+    char *dup, *end;
+    PyObject *result;
+
+    if (strchr(s, '_') == NULL) {
+        return innerfunc(s, orig_len, arg);
+    }
+
+    dup = PyMem_Malloc(orig_len + 1);
+    end = dup;
+    prev = '\0';
+    last = s + orig_len;
+    for (p = s; *p; p++) {
+        if (*p == '_') {
+            /* Underscores are only allowed after digits. */
+            if (!(prev >= '0' && prev <= '9')) {
+                goto error;
+            }
+        }
+        else {
+            *end++ = *p;
+            /* Underscores are only allowed before digits. */
+            if (prev == '_' && !(*p >= '0' && *p <= '9')) {
+                goto error;
+            }
+        }
+        prev = *p;
+    }
+    /* Underscores are not allowed at the end. */
+    if (prev == '_') {
+        goto error;
+    }
+    /* No embedded NULs allowed. */
+    if (p != last) {
+        goto error;
+    }
+    *end = '\0';
+    result = innerfunc(dup, end - dup, arg);
+    PyMem_Free(dup);
+    return result;
+
+  error:
+    PyMem_Free(dup);
+    PyErr_Format(PyExc_ValueError,
+		 "could not convert string to %s: "
+		 "%R", what, obj);
+    return NULL;
+}
+
 #ifdef PY_NO_SHORT_FLOAT_REPR

 /* Given a string that may have a decimal point in the current