Commit 0c1a8b4d authored by Stefan Behnel's avatar Stefan Behnel

Implement PEP 515: allow underscores in number literals

https://www.python.org/dev/peps/pep-0515/
https://bugs.python.org/issue26331

Practically accepted as of 2016-03-19:
http://thread.gmane.org/gmane.comp.python.devel/156533/focus=156734
parent 87dc8089
...@@ -24,21 +24,25 @@ def make_lexicon(): ...@@ -24,21 +24,25 @@ def make_lexicon():
bindigit = Any("01") bindigit = Any("01")
octdigit = Any("01234567") octdigit = Any("01234567")
hexdigit = Any("0123456789ABCDEFabcdef") hexdigit = Any("0123456789ABCDEFabcdef")
allow_ = Rep(Str("_"))
indentation = Bol + Rep(Any(" \t")) indentation = Bol + Rep(Any(" \t"))
decimal = Rep1(digit) def underscore_digits(d):
return d + Rep(Str("_") | d)
decimal = underscore_digits(digit)
dot = Str(".") dot = Str(".")
exponent = Any("Ee") + Opt(Any("+-")) + decimal exponent = allow_ + Any("Ee") + Opt(Any("+-")) + decimal
decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal) decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
name = letter + Rep(letter | digit) name = letter + Rep(letter | digit)
intconst = decimal | (Str("0") + ((Any("Xx") + Rep1(hexdigit)) | intconst = decimal | (Str("0") + ((Any("Xx") + allow_ + underscore_digits(hexdigit)) |
(Any("Oo") + Rep1(octdigit)) | (Any("Oo") + allow_ + underscore_digits(octdigit)) |
(Any("Bb") + Rep1(bindigit)) )) (Any("Bb") + allow_ + underscore_digits(bindigit)) ))
intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu"))) intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
intliteral = intconst + intsuffix intliteral = intconst + intsuffix
fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
imagconst = (intconst | fltconst) + Any("jJ") imagconst = (intconst | fltconst) + allow_ + Any("jJ")
beginstring = Opt(Any(string_prefixes) + Opt(Any(raw_prefixes)) | beginstring = Opt(Any(string_prefixes) + Opt(Any(raw_prefixes)) |
Any(raw_prefixes) + Opt(Any(bytes_prefixes)) | Any(raw_prefixes) + Opt(Any(bytes_prefixes)) |
...@@ -67,9 +71,9 @@ def make_lexicon(): ...@@ -67,9 +71,9 @@ def make_lexicon():
return Lexicon([ return Lexicon([
(name, IDENT), (name, IDENT),
(intliteral, 'INT'), (intliteral, Method('strip_underscores', symbol='INT')),
(fltconst, 'FLOAT'), (fltconst, Method('strip_underscores', symbol='FLOAT')),
(imagconst, 'IMAG'), (imagconst, Method('strip_underscores', symbol='IMAG')),
(punct | diphthong, TEXT), (punct | diphthong, TEXT),
(bra, Method('open_bracket_action')), (bra, Method('open_bracket_action')),
......
...@@ -53,12 +53,15 @@ pyx_reserved_words = py_reserved_words + [ ...@@ -53,12 +53,15 @@ pyx_reserved_words = py_reserved_words + [
class Method(object): class Method(object):
def __init__(self, name): def __init__(self, name, **kwargs):
self.name = name self.name = name
self.kwargs = kwargs or None
self.__name__ = name # for Plex tracing self.__name__ = name # for Plex tracing
def __call__(self, stream, text): def __call__(self, stream, text):
return getattr(stream, self.name)(text) method = getattr(stream, self.name)
# self.kwargs is almost always unused => avoid call overhead
return method(text, **self.kwargs) if self.kwargs is not None else method(text)
#------------------------------------------------------------------ #------------------------------------------------------------------
...@@ -340,6 +343,9 @@ class PyrexScanner(Scanner): ...@@ -340,6 +343,9 @@ class PyrexScanner(Scanner):
if self.parse_comments: if self.parse_comments:
self.produce('commentline', text) self.produce('commentline', text)
def strip_underscores(self, text, symbol):
self.produce(symbol, text.replace('_', ''))
def current_level(self): def current_level(self):
return self.indentation_stack[-1] return self.indentation_stack[-1]
......
# mode: run
# tag: syntax
"""
Uses TreeFragment to test invalid syntax.
"""
from __future__ import absolute_import
from ...TestUtils import CythonTest
from ..Errors import CompileError
VALID_UNDERSCORE_LITERALS = [
# Copied from CPython's test_grammar.py
'0_0_0',
'4_2',
'4_______2',
'1_0000_0000',
'0b_1001_0100',
'0x_ffff_ffff',
'0o_5_7_7',
'1__.4',
'42_j',
'1.4_j',
'1.4e5_j',
'1_00_00_.5',
'1_e10',
'1_E10',
'1_e1_0',
'.1_4',
'0_',
'42_',
'0b1_',
'0xf_',
'0o5_',
]
INVALID_UNDERSCORE_LITERALS = [
# Copied from CPython's test_grammar.py
# Trailing underscores:
# Underscores in the base selector:
'0_b0',
'0_xf',
'0_o5',
# Old-style octal, still disallowed:
#'0_7',
#'09_99',
# Underscore after non-digit:
'1.4j_',
'1.4e_1',
'.1_4e_1',
'1.0e+_1',
'1._4',
'1._4j',
'1._4e5_j',
'._5',
]
class TestGrammar(CythonTest):
def test_invalid_number_literals(self):
for literal in INVALID_UNDERSCORE_LITERALS:
for expression in ['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']:
code = 'x = ' + expression % literal
try:
self.fragment(u'''\
# cython: language_level=3
''' + code)
except CompileError as exc:
assert code in [s.strip() for s in str(exc).splitlines()], str(exc)
else:
assert False, "Invalid Cython code '%s' failed to raise an exception" % code
def test_valid_number_literals(self):
for literal in VALID_UNDERSCORE_LITERALS:
for expression in ['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']:
code = 'x = ' + expression % literal
assert self.fragment(u'''\
# cython: language_level=3
''' + code) is not None
if __name__ == "__main__":
import unittest
unittest.main()
# mode: run
# tag: syntax
from __future__ import absolute_import
cimport cython cimport cython
from cython cimport typeof from cython cimport typeof
import sys import sys
def valid_underscore_literals():
"""
>>> valid_underscore_literals()
"""
# Copied from CPython's test_grammar.py
assert 0_0_0 == 0
assert 4_2 == 42
assert 4_______2 == 42
assert 1_0000_0000 == 100000000
assert 0b_1001_0100 == 0b10010100
assert 0x_ffff_ffff == 0xffffffff
assert 0o_5_7_7 == 0o577
assert 1__.4 == 1.4
assert 42_j == 42j
assert 1.4_j == 1.4j
assert 1.4e5_j == 1.4e5j
assert 1_00_00_.5 == 10000.5
assert 1_e10 == 1e10
assert 1_E10 == 1E10
assert 1_e1_0 == 1e10
assert .1_4 == .14
assert 0_ == 0
assert 42_ == 42
assert 0b1_ == 0b1
assert 0xf_ == 0xf
assert 0o5_ == 0o5
@cython.test_assert_path_exists( @cython.test_assert_path_exists(
'//IntNode[@longness = "LL"]', '//IntNode[@longness = "LL"]',
'//IntNode[@longness = "L"]', '//IntNode[@longness = "L"]',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment