Commit 50e67150 authored by Robert Bradshaw's avatar Robert Bradshaw

Merge pull request #499 from cython/pep515_underscores_in_numbers

Implement PEP 515: allow underscores in number literals
parents fe3a65f7 0c03ca0d
...@@ -26,15 +26,18 @@ def make_lexicon(): ...@@ -26,15 +26,18 @@ def make_lexicon():
hexdigit = Any("0123456789ABCDEFabcdef") hexdigit = Any("0123456789ABCDEFabcdef")
indentation = Bol + Rep(Any(" \t")) indentation = Bol + Rep(Any(" \t"))
decimal = Rep1(digit) def underscore_digits(d):
return Rep1(d) + Rep(Str("_") + Rep1(d))
decimal = underscore_digits(digit)
dot = Str(".") dot = Str(".")
exponent = Any("Ee") + Opt(Any("+-")) + decimal exponent = Any("Ee") + Opt(Any("+-")) + decimal
decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal) decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
name = letter + Rep(letter | digit) name = letter + Rep(letter | digit)
intconst = decimal | (Str("0") + ((Any("Xx") + Rep1(hexdigit)) | intconst = decimal | (Str("0") + ((Any("Xx") + underscore_digits(hexdigit)) |
(Any("Oo") + Rep1(octdigit)) | (Any("Oo") + underscore_digits(octdigit)) |
(Any("Bb") + Rep1(bindigit)) )) (Any("Bb") + underscore_digits(bindigit)) ))
intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu"))) intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
intliteral = intconst + intsuffix intliteral = intconst + intsuffix
fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
...@@ -67,9 +70,9 @@ def make_lexicon(): ...@@ -67,9 +70,9 @@ def make_lexicon():
return Lexicon([ return Lexicon([
(name, IDENT), (name, IDENT),
(intliteral, 'INT'), (intliteral, Method('strip_underscores', symbol='INT')),
(fltconst, 'FLOAT'), (fltconst, Method('strip_underscores', symbol='FLOAT')),
(imagconst, 'IMAG'), (imagconst, Method('strip_underscores', symbol='IMAG')),
(punct | diphthong, TEXT), (punct | diphthong, TEXT),
(bra, Method('open_bracket_action')), (bra, Method('open_bracket_action')),
......
...@@ -11,6 +11,7 @@ cdef initial_compile_time_env() ...@@ -11,6 +11,7 @@ cdef initial_compile_time_env()
cdef class Method: cdef class Method:
cdef object name cdef object name
cdef dict kwargs
cdef readonly object __name__ # for tracing the scanner cdef readonly object __name__ # for tracing the scanner
@cython.final @cython.final
......
...@@ -53,12 +53,15 @@ pyx_reserved_words = py_reserved_words + [ ...@@ -53,12 +53,15 @@ pyx_reserved_words = py_reserved_words + [
class Method(object): class Method(object):
def __init__(self, name): def __init__(self, name, **kwargs):
self.name = name self.name = name
self.kwargs = kwargs or None
self.__name__ = name # for Plex tracing self.__name__ = name # for Plex tracing
def __call__(self, stream, text): def __call__(self, stream, text):
return getattr(stream, self.name)(text) method = getattr(stream, self.name)
# self.kwargs is almost always unused => avoid call overhead
return method(text, **self.kwargs) if self.kwargs is not None else method(text)
#------------------------------------------------------------------ #------------------------------------------------------------------
...@@ -340,6 +343,9 @@ class PyrexScanner(Scanner): ...@@ -340,6 +343,9 @@ class PyrexScanner(Scanner):
if self.parse_comments: if self.parse_comments:
self.produce('commentline', text) self.produce('commentline', text)
def strip_underscores(self, text, symbol):
self.produce(symbol, text.replace('_', ''))
def current_level(self): def current_level(self):
return self.indentation_stack[-1] return self.indentation_stack[-1]
......
# mode: run
# tag: syntax
"""
Uses TreeFragment to test invalid syntax.
"""
from __future__ import absolute_import
from ...TestUtils import CythonTest
from ..Errors import CompileError
from .. import ExprNodes
# Copied from CPython's test_grammar.py
VALID_UNDERSCORE_LITERALS = [
'0_0_0',
'4_2',
'1_0000_0000',
'0b1001_0100',
'0xffff_ffff',
'0o5_7_7',
'1_00_00.5',
'1_00_00.5j',
'1_00_00.5e5',
'1_00_00j',
'1_00_00e5_1',
'1e1_0',
'.1_4',
'.1_4e1',
'.1_4j',
]
# Copied from CPython's test_grammar.py
INVALID_UNDERSCORE_LITERALS = [
# Trailing underscores:
'0_',
'42_',
'1.4j_',
'0b1_',
'0xf_',
'0o5_',
# Underscores in the base selector:
'0_b0',
'0_xf',
'0_o5',
# Underscore right after the base selector:
'0b_0',
'0x_f',
'0o_5',
# Old-style octal, still disallowed:
#'0_7',
#'09_99',
# Special case with exponent:
'0 if 1_Else 1',
# Underscore right before a dot:
'1_.4',
'1_.4j',
# Underscore right after a dot:
'1._4',
'1._4j',
'._5',
# Underscore right after a sign:
'1.0e+_1',
# Multiple consecutive underscores:
'4_______2',
'0.1__4',
'0b1001__0100',
'0xffff__ffff',
'0o5__77',
'1e1__0',
# Underscore right before j:
'1.4_j',
'1.4e5_j',
# Underscore right before e:
'1_e1',
'1.4_e1',
# Underscore right after e:
'1e_1',
'1.4e_1',
# Whitespace in literals
'1_ 2',
'1 _2',
'1_2.2_ 1',
'1_2.2 _1',
'1_2e _1',
'1_2e2 _1',
'1_2e 2_1',
]
class TestGrammar(CythonTest):
def test_invalid_number_literals(self):
for literal in INVALID_UNDERSCORE_LITERALS:
for expression in ['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']:
code = 'x = ' + expression % literal
try:
self.fragment(u'''\
# cython: language_level=3
''' + code)
except CompileError as exc:
assert code in [s.strip() for s in str(exc).splitlines()], str(exc)
else:
assert False, "Invalid Cython code '%s' failed to raise an exception" % code
def test_valid_number_literals(self):
for literal in VALID_UNDERSCORE_LITERALS:
for i, expression in enumerate(['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']):
code = 'x = ' + expression % literal
node = self.fragment(u'''\
# cython: language_level=3
''' + code).root
assert node is not None
literal_node = node.stats[0].rhs # StatListNode([SingleAssignmentNode('x', expr)])
if i > 0:
# Add/MulNode() -> literal is first or second operand
literal_node = literal_node.operand2 if i % 2 else literal_node.operand1
if 'j' in literal or 'J' in literal:
assert isinstance(literal_node, ExprNodes.ImagNode)
elif '.' in literal or 'e' in literal or 'E' in literal and not ('0x' in literal or '0X' in literal):
assert isinstance(literal_node, ExprNodes.FloatNode)
else:
assert isinstance(literal_node, ExprNodes.IntNode)
if __name__ == "__main__":
import unittest
unittest.main()
...@@ -2100,3 +2100,4 @@ if __name__ == '__main__': ...@@ -2100,3 +2100,4 @@ if __name__ == '__main__':
except PendingThreadsError: except PendingThreadsError:
# normal program exit won't kill the threads, do it the hard way here # normal program exit won't kill the threads, do it the hard way here
flush_and_terminate(1) flush_and_terminate(1)
sys.exit(1)
# mode: run
# tag: syntax
from __future__ import absolute_import
cimport cython cimport cython
from cython cimport typeof from cython cimport typeof
import sys import sys
def valid_underscore_literals():
"""
>>> valid_underscore_literals()
"""
# Copied from CPython's test_grammar.py
assert 0_0_0 == 0
assert 4_2 == 42
assert 1_0000_0000 == 100000000
assert 0b1001_0100 == 0b10010100
assert 0xffff_ffff == 0xffffffff
assert 0o5_7_7 == 0o577
assert 1_00_00.5 == 10000.5
assert 1e1_0 == 1e10
assert .1_4 == .14
@cython.test_assert_path_exists( @cython.test_assert_path_exists(
'//IntNode[@longness = "LL"]', '//IntNode[@longness = "LL"]',
'//IntNode[@longness = "L"]', '//IntNode[@longness = "L"]',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment