Commit c3361b9a authored by Raymond Hettinger's avatar Raymond Hettinger

merge

parents bbeac6eb c566431b
...@@ -1333,7 +1333,7 @@ successive matches:: ...@@ -1333,7 +1333,7 @@ successive matches::
Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column']) Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column'])
def tokenize(s): def tokenize(code):
keywords = {'IF', 'THEN', 'ENDIF', 'FOR', 'NEXT', 'GOSUB', 'RETURN'} keywords = {'IF', 'THEN', 'ENDIF', 'FOR', 'NEXT', 'GOSUB', 'RETURN'}
token_specification = [ token_specification = [
('NUMBER', r'\d+(\.\d*)?'), # Integer or decimal number ('NUMBER', r'\d+(\.\d*)?'), # Integer or decimal number
...@@ -1343,26 +1343,27 @@ successive matches:: ...@@ -1343,26 +1343,27 @@ successive matches::
('OP', r'[+\-*/]'), # Arithmetic operators ('OP', r'[+\-*/]'), # Arithmetic operators
('NEWLINE', r'\n'), # Line endings ('NEWLINE', r'\n'), # Line endings
('SKIP', r'[ \t]+'), # Skip over spaces and tabs ('SKIP', r'[ \t]+'), # Skip over spaces and tabs
('MISMATCH',r'.'), # Any other character
] ]
tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification) tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
get_token = re.compile(tok_regex).match get_token = re.compile(tok_regex).match
line = 1 line_num = 1
pos = line_start = 0 line_start = 0
mo = get_token(s) for mo in re.finditer(tok_regex, code):
while mo is not None: kind = mo.lastgroup
typ = mo.lastgroup value = mo.group(kind)
if typ == 'NEWLINE': if kind == 'NEWLINE':
line_start = pos line_start = mo.end()
line += 1 line_num += 1
elif typ != 'SKIP': elif kind == 'SKIP':
val = mo.group(typ) pass
if typ == 'ID' and val in keywords: elif kind == 'MISMATCH':
typ = val raise RuntimeError('%r unexpected on line %d' % (value, line_num))
yield Token(typ, val, line, mo.start()-line_start) else:
pos = mo.end() if kind == 'ID' and value in keywords:
mo = get_token(s, pos) kind = value
if pos != len(s): column = mo.start() - line_start
raise RuntimeError('Unexpected character %r on line %d' %(s[pos], line)) yield Token(kind, value, line_num, column)
statements = ''' statements = '''
IF quantity THEN IF quantity THEN
...@@ -1376,22 +1377,22 @@ successive matches:: ...@@ -1376,22 +1377,22 @@ successive matches::
The tokenizer produces the following output:: The tokenizer produces the following output::
Token(typ='IF', value='IF', line=2, column=5) Token(typ='IF', value='IF', line=2, column=4)
Token(typ='ID', value='quantity', line=2, column=8) Token(typ='ID', value='quantity', line=2, column=7)
Token(typ='THEN', value='THEN', line=2, column=17) Token(typ='THEN', value='THEN', line=2, column=16)
Token(typ='ID', value='total', line=3, column=9) Token(typ='ID', value='total', line=3, column=8)
Token(typ='ASSIGN', value=':=', line=3, column=15) Token(typ='ASSIGN', value=':=', line=3, column=14)
Token(typ='ID', value='total', line=3, column=18) Token(typ='ID', value='total', line=3, column=17)
Token(typ='OP', value='+', line=3, column=24) Token(typ='OP', value='+', line=3, column=23)
Token(typ='ID', value='price', line=3, column=26) Token(typ='ID', value='price', line=3, column=25)
Token(typ='OP', value='*', line=3, column=32) Token(typ='OP', value='*', line=3, column=31)
Token(typ='ID', value='quantity', line=3, column=34) Token(typ='ID', value='quantity', line=3, column=33)
Token(typ='END', value=';', line=3, column=42) Token(typ='END', value=';', line=3, column=41)
Token(typ='ID', value='tax', line=4, column=9) Token(typ='ID', value='tax', line=4, column=8)
Token(typ='ASSIGN', value=':=', line=4, column=13) Token(typ='ASSIGN', value=':=', line=4, column=12)
Token(typ='ID', value='price', line=4, column=16) Token(typ='ID', value='price', line=4, column=15)
Token(typ='OP', value='*', line=4, column=22) Token(typ='OP', value='*', line=4, column=21)
Token(typ='NUMBER', value='0.05', line=4, column=24) Token(typ='NUMBER', value='0.05', line=4, column=23)
Token(typ='END', value=';', line=4, column=28) Token(typ='END', value=';', line=4, column=27)
Token(typ='ENDIF', value='ENDIF', line=5, column=5) Token(typ='ENDIF', value='ENDIF', line=5, column=4)
Token(typ='END', value=';', line=5, column=10) Token(typ='END', value=';', line=5, column=9)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment