Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
f7c54042
Commit
f7c54042
authored
May 23, 2011
by
Raymond Hettinger
Browse files
Options
Browse Files
Download
Plain Diff
Clean-up example.
parents
fb7ac269
90b898b9
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
41 additions
and
31 deletions
+41
-31
Doc/library/re.rst
Doc/library/re.rst
+41
-31
No files found.
Doc/library/re.rst
View file @
f7c54042
...
...
@@ -1301,24 +1301,27 @@ The text categories are specified with regular expressions. The technique is
to combine those into a single master regular expression and to loop over
successive matches::
Token = collections.namedtuple('Token', 'typ value line column')
import collections
import re
Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column'])
def tokenize(s):
keywords = {'IF', 'THEN', 'FOR', 'NEXT', 'GOSUB', 'RETURN'}
tok
_spec
= [
keywords = {'IF', 'THEN', '
ENDIF', '
FOR', 'NEXT', 'GOSUB', 'RETURN'}
tok
en_specification
= [
('NUMBER', r'\d+(\.\d*)?'), # Integer or decimal number
('ASSIGN', r':='), # Assignment operator
('END',
';'),
# Statement terminator
('END',
r';'),
# Statement terminator
('ID', r'[A-Za-z]+'), # Identifiers
('OP', r'[+*\/\-]'), # Arithmetic operators
('NEWLINE', r'\n'), # Line endings
('SKIP', r'[ \t]'), # Skip over spaces and tabs
]
tok_re
= '|'.join('(?P<%s>%s)' % pair for pair in tok_spec
)
get
tok = re.compile(tok_re
).match
tok_re
gex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification
)
get
_token = re.compile(tok_regex
).match
line = 1
pos = line_start = 0
mo = get
tok
(s)
mo = get
_token
(s)
while mo is not None:
typ = mo.lastgroup
if typ == 'NEWLINE':
...
...
@@ -1330,13 +1333,15 @@ successive matches::
typ = val
yield Token(typ, val, line, mo.start()-line_start)
pos = mo.end()
mo = get
tok
(s, pos)
mo = get
_token
(s, pos)
if pos != len(s):
raise RuntimeError('Unexpected character %r on line %d' %(s[pos], line))
statements = '''\
statements = '''
IF quantity THEN
total := total + price * quantity;
tax := price * 0.05;
ENDIF;
'''
for token in tokenize(statements):
...
...
@@ -1344,17 +1349,22 @@ successive matches::
The tokenizer produces the following output::
Token(typ='ID', value='total', line=1, column=8)
Token(typ='ASSIGN', value=':=', line=1, column=14)
Token(typ='ID', value='total', line=1, column=17)
Token(typ='OP', value='+', line=1, column=23)
Token(typ='ID', value='price', line=1, column=25)
Token(typ='OP', value='*', line=1, column=31)
Token(typ='ID', value='quantity', line=1, column=33)
Token(typ='END', value=';', line=1, column=41)
Token(typ='ID', value='tax', line=2, column=9)
Token(typ='ASSIGN', value=':=', line=2, column=13)
Token(typ='ID', value='price', line=2, column=16)
Token(typ='OP', value='*', line=2, column=22)
Token(typ='NUMBER', value='0.05', line=2, column=24)
Token(typ='END', value=';', line=2, column=28)
Token(typ='IF', value='IF', line=2, column=5)
Token(typ='ID', value='quantity', line=2, column=8)
Token(typ='THEN', value='THEN', line=2, column=17)
Token(typ='ID', value='total', line=3, column=9)
Token(typ='ASSIGN', value=':=', line=3, column=15)
Token(typ='ID', value='total', line=3, column=18)
Token(typ='OP', value='+', line=3, column=24)
Token(typ='ID', value='price', line=3, column=26)
Token(typ='OP', value='*', line=3, column=32)
Token(typ='ID', value='quantity', line=3, column=34)
Token(typ='END', value=';', line=3, column=42)
Token(typ='ID', value='tax', line=4, column=9)
Token(typ='ASSIGN', value=':=', line=4, column=13)
Token(typ='ID', value='price', line=4, column=16)
Token(typ='OP', value='*', line=4, column=22)
Token(typ='NUMBER', value='0.05', line=4, column=24)
Token(typ='END', value=';', line=4, column=28)
Token(typ='ENDIF', value='ENDIF', line=5, column=5)
Token(typ='END', value=';', line=5, column=10)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment