Commit ce7c8f08 authored by jbrockmendel's avatar jbrockmendel Committed by Stefan Behnel

CLN: implement setup.cfg, lint cleanup of Plex directory (#2827)

implement setup.cfg, lint cleanup of Plex directory
parent f34c5b13
# cython: language_level=3str
# cython: auto_pickle=False
#=======================================================================
#
# Python Lexical Analyser
#
# Actions for use in token specifications
#
#=======================================================================
"""
Python Lexical Analyser
Actions for use in token specifications
"""
class Action(object):
def perform(self, token_stream, text):
......@@ -88,7 +87,6 @@ class Ignore(Action):
IGNORE = Ignore()
#IGNORE.__doc__ = Ignore.__doc__
class Text(Action):
......@@ -106,6 +104,3 @@ class Text(Action):
TEXT = Text()
#TEXT.__doc__ = Text.__doc__
#=======================================================================
#
# Python Lexical Analyser
#
# Converting NFA to DFA
#
#=======================================================================
"""
Python Lexical Analyser
Converting NFA to DFA
"""
from __future__ import absolute_import
from . import Machines
......@@ -29,12 +26,14 @@ def nfa_to_dfa(old_machine, debug=None):
# is reached.
new_machine = Machines.FastMachine()
state_map = StateMap(new_machine)
# Seed the process using the initial states of the old machine.
# Make the corresponding new states into initial states of the new
# machine with the same names.
for (key, old_state) in old_machine.initial_states.items():
new_state = state_map.old_to_new(epsilon_closure(old_state))
new_machine.make_initial_state(key, new_state)
# Tricky bit here: we add things to the end of this list while we're
# iterating over it. The iteration stops when closure is achieved.
for new_state in new_machine.states:
......@@ -45,6 +44,7 @@ def nfa_to_dfa(old_machine, debug=None):
transitions.add_set(event, set_epsilon_closure(old_target_states))
for event, old_states in transitions.items():
new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states))
if debug:
debug.write("\n===== State Mapping =====\n")
state_map.dump(debug)
......@@ -119,8 +119,6 @@ class StateMap(object):
new_state = self.new_machine.new_state(action)
self.old_to_new_dict[key] = new_state
self.new_to_old_dict[id(new_state)] = old_state_set
#for old_state in old_state_set.keys():
#new_state.merge_actions(old_state)
return new_state
def highest_priority_action(self, state_set):
......@@ -133,13 +131,6 @@ class StateMap(object):
best_priority = priority
return best_action
# def old_to_new_set(self, old_state_set):
# """
# Return the new state corresponding to a set of old states as
# a singleton set.
# """
# return {self.old_to_new(old_state_set):1}
def new_to_old(self, new_state):
"""Given a new state, return a set of corresponding old states."""
return self.new_to_old_dict[id(new_state)]
......@@ -160,5 +151,3 @@ class StateMap(object):
old_state_set = self.new_to_old_dict[id(new_state)]
file.write(" State %s <-- %s\n" % (
new_state['number'], state_set_str(old_state_set)))
#=======================================================================
#
# Python Lexical Analyser
#
# Exception classes
#
#=======================================================================
"""
Python Lexical Analyser
Exception classes
"""
class PlexError(Exception):
......
#=======================================================================
#
# Python Lexical Analyser
#
# Lexical Analyser Specification
#
#=======================================================================
"""
Python Lexical Analyser
Lexical Analyser Specification
"""
from __future__ import absolute_import
import types
from . import Actions
from . import DFA
from . import Errors
......@@ -122,9 +117,11 @@ class Lexicon(object):
total_time = 0.0
time1 = time()
nfa = Machines.Machine()
default_initial_state = nfa.new_initial_state('')
token_number = 1
for spec in specifications:
if isinstance(spec, State):
user_initial_state = nfa.new_initial_state(spec.name)
......@@ -140,6 +137,7 @@ class Lexicon(object):
raise Errors.InvalidToken(
token_number,
"Expected a token definition (tuple) or State instance")
if timings:
time2 = time()
total_time = total_time + (time2 - time1)
......@@ -147,6 +145,7 @@ class Lexicon(object):
if debug and (debug_flags & 1):
debug.write("\n============= NFA ===========\n")
nfa.dump(debug)
dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug)
if timings:
time4 = time()
......@@ -158,15 +157,12 @@ class Lexicon(object):
timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
timings.write("TOTAL : %5.2f\n" % total_time)
self.machine = dfa
def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
try:
(re, action_spec) = self.parse_token_definition(token_spec)
# Disabled this -- matching empty strings can be useful
#if re.nullable:
# raise Errors.InvalidToken(
# token_number, "Pattern can match 0 input symbols")
if isinstance(action_spec, Actions.Action):
action = action_spec
else:
......@@ -188,6 +184,7 @@ class Lexicon(object):
raise Errors.InvalidToken("Token definition is not a tuple")
if len(token_spec) != 2:
raise Errors.InvalidToken("Wrong number of items in token definition")
pattern, action = token_spec
if not isinstance(pattern, Regexps.RE):
raise Errors.InvalidToken("Pattern is not an RE instance")
......@@ -195,6 +192,3 @@ class Lexicon(object):
def get_initial_state(self, name):
return self.machine.get_initial_state(name)
#=======================================================================
#
# Python Lexical Analyser
#
# Classes for building NFAs and DFAs
#
#=======================================================================
"""
Python Lexical Analyser
Classes for building NFAs and DFAs
"""
from __future__ import absolute_import
import sys
from .Transitions import TransitionMap
try:
......@@ -36,7 +31,6 @@ class Machine(object):
self.initial_states = {}
def __del__(self):
#print "Destroying", self ###
for state in self.states:
state.destroy()
......@@ -81,12 +75,10 @@ class Node(object):
def __init__(self):
# Preinitialise the list of empty transitions, because
# the nfa-to-dfa algorithm needs it
#self.transitions = {'':[]}
self.transitions = TransitionMap()
self.action_priority = LOWEST_PRIORITY
def destroy(self):
#print "Destroying", self ###
self.transitions = None
self.action = None
self.epsilon_closure = None
......
#=======================================================================
#
# Python Lexical Analyser
#
# Regular Expressions
#
#=======================================================================
"""
Python Lexical Analyser
Regular Expressions
"""
from __future__ import absolute_import
import types
......@@ -186,37 +183,6 @@ class RE(object):
# These are the basic REs from which all others are built.
#
## class Char(RE):
## """
## Char(c) is an RE which matches the character |c|.
## """
## nullable = 0
## def __init__(self, char):
## self.char = char
## self.match_nl = char == '\n'
## def build_machine(self, m, initial_state, final_state, match_bol, nocase):
## c = self.char
## if match_bol and c != BOL:
## s1 = self.build_opt(m, initial_state, BOL)
## else:
## s1 = initial_state
## if c == '\n' or c == EOF:
## s1 = self.build_opt(m, s1, EOL)
## if len(c) == 1:
## code = ord(self.char)
## s1.add_transition((code, code+1), final_state)
## if nocase and is_letter_code(code):
## code2 = other_case_code(code)
## s1.add_transition((code2, code2+1), final_state)
## else:
## s1.add_transition(c, final_state)
## def calc_str(self):
## return "Char(%s)" % repr(self.char)
def Char(c):
"""
......@@ -428,6 +394,7 @@ class SwitchCase(RE):
name = "Case"
return "%s(%s)" % (name, self.re)
#
# Composite RE constructors
# -------------------------
......@@ -469,7 +436,6 @@ def Any(s):
"""
Any(s) is an RE which matches any character in the string |s|.
"""
#result = apply(Alt, tuple(map(Char, s)))
result = CodeRanges(chars_to_ranges(s))
result.str = "Any(%s)" % repr(s)
return result
......@@ -549,6 +515,7 @@ def Case(re):
"""
return SwitchCase(re, nocase=0)
#
# RE Constants
#
......@@ -573,4 +540,3 @@ Eof.__doc__ = \
Eof is an RE which matches the end of the file.
"""
Eof.str = "Eof"
# cython: language_level=3str
# cython: auto_pickle=False
#=======================================================================
#
# Python Lexical Analyser
#
#
# Scanning an input stream
#
#=======================================================================
"""
Python Lexical Analyser
Scanning an input stream
"""
from __future__ import absolute_import
import cython
cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object) # noqa:E402
from . import Errors
from .Regexps import BOL, EOL, EOF
......@@ -174,26 +170,28 @@ class Scanner(object):
buf_len = len(buffer)
b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
None, 0, 0, 0, u'', 0, 0
trace = self.trace
while 1:
if trace: #TRACE#
print("State %d, %d/%d:%s -->" % ( #TRACE#
state['number'], input_state, cur_pos, repr(cur_char))) #TRACE#
if trace:
print("State %d, %d/%d:%s -->" % (
state['number'], input_state, cur_pos, repr(cur_char)))
# Begin inlined self.save_for_backup()
#action = state.action #@slow
action = state['action'] #@fast
action = state['action']
if action is not None:
b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
# End inlined self.save_for_backup()
c = cur_char
#new_state = state.new_state(c) #@slow
new_state = state.get(c, NOT_FOUND) #@fast
if new_state is NOT_FOUND: #@fast
new_state = c and state.get('else') #@fast
new_state = state.get(c, NOT_FOUND)
if new_state is NOT_FOUND:
new_state = c and state.get('else')
if new_state:
if trace: #TRACE#
print("State %d" % new_state['number']) #TRACE#
if trace:
print("State %d" % new_state['number'])
state = new_state
# Begin inlined: self.next_char()
if input_state == 1:
......@@ -241,8 +239,8 @@ class Scanner(object):
cur_char = u''
# End inlined self.next_char()
else: # not new_state
if trace: #TRACE#
print("blocked") #TRACE#
if trace:
print("blocked")
# Begin inlined: action = self.back_up()
if b_action is not None:
(action, cur_pos, cur_line, cur_line_start,
......@@ -253,15 +251,16 @@ class Scanner(object):
action = None
break # while 1
# End inlined: action = self.back_up()
self.cur_pos = cur_pos
self.cur_line = cur_line
self.cur_line_start = cur_line_start
self.cur_char = cur_char
self.input_state = input_state
self.next_pos = next_pos
if trace: #TRACE#
if action is not None: #TRACE#
print("Doing %s" % action) #TRACE#
if trace:
if action is not None:
print("Doing %s" % action)
return action
def next_char(self):
......@@ -307,7 +306,8 @@ class Scanner(object):
return (self.name, self.start_line, self.start_col)
def get_position(self):
"""Python accessible wrapper around position(), only for error reporting.
"""
Python accessible wrapper around position(), only for error reporting.
"""
return self.position()
......@@ -337,3 +337,4 @@ class Scanner(object):
Override this method if you want something to be done at
end of file.
"""
pass
#
# Get time in platform-dependent way
#
"""
Get time in platform-dependent way
"""
from __future__ import absolute_import
......@@ -9,14 +9,19 @@ from sys import platform, exit, stderr
if platform == 'mac':
import MacOS
def time():
return MacOS.GetTicks() / 60.0
timekind = "real"
elif hasattr(os, 'times'):
def time():
t = os.times()
return t[0] + t[1]
timekind = "cpu"
else:
stderr.write(
"Don't know how to get time on platform %s\n" % repr(platform))
......
#=======================================================================
#
# Python Lexical Analyser
#
# Traditional Regular Expression Syntax
#
#=======================================================================
"""
Python Lexical Analyser
Traditional Regular Expression Syntax
"""
from __future__ import absolute_import
from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
......@@ -51,7 +48,7 @@ class REParser(object):
def parse_seq(self):
"""Parse a sequence of regexps."""
re_list = []
while not self.end and not self.c in "|)":
while not self.end and self.c not in "|)":
re_list.append(self.parse_mod())
return Seq(*re_list)
......@@ -108,6 +105,7 @@ class REParser(object):
char_list.append(chr(a))
else:
char_list.append(c1)
chars = ''.join(char_list)
if invert:
return AnyBut(chars)
......@@ -153,6 +151,3 @@ class REParser(object):
"""Raise exception to signal syntax error in regexp."""
raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
repr(self.s), self.i, mess))
#
# Plex - Transition Maps
#
# This version represents state sets directly as dicts for speed.
#
"""
Plex - Transition Maps
This version represents state sets directly as dicts for speed.
"""
from __future__ import absolute_import
try:
......@@ -50,7 +49,6 @@ class TransitionMap(object):
special = {}
self.map = map
self.special = special
#self.check() ###
def add(self, event, new_state,
TupleType=tuple):
......@@ -84,12 +82,11 @@ class TransitionMap(object):
else:
self.get_special(event).update(new_set)
def get_epsilon(self,
none=None):
def get_epsilon(self):
"""
Return the mapping for epsilon, or None.
"""
return self.special.get('', none)
return self.special.get('')
def iteritems(self,
len=len):
......@@ -132,6 +129,7 @@ class TransitionMap(object):
# Special case: code == map[-1]
if code == maxint:
return hi
# General case
lo = 0
# loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
......@@ -147,7 +145,6 @@ class TransitionMap(object):
return lo
else:
map[hi:hi] = [code, map[hi - 1].copy()]
#self.check() ###
return hi
def get_special(self, event):
......@@ -243,9 +240,5 @@ class TransitionMap(object):
# State set manipulation functions
#
#def merge_state_sets(set1, set2):
# for state in set2.keys():
# set1[state] = 1
def state_set_str(set):
return "[%s]" % ','.join(["S%d" % state.number for state in set])
#=======================================================================
#
# Python Lexical Analyser
#
#=======================================================================
"""
Python Lexical Analyser
The Plex module provides lexical analysers with similar capabilities
to GNU Flex. The following classes and functions are exported;
see the attached docstrings for more information.
......@@ -29,7 +25,7 @@ see the attached docstrings for more information.
Actions for associating with patterns when
creating a Lexicon.
"""
# flake8: noqa:F401
from __future__ import absolute_import
from .Actions import TEXT, IGNORE, Begin
......
[flake8]
exclude = .git,build,__pycache__
max-complexity = 10
max-line-length = 120
ignore =
W504,
# W504 line break after binary operator
S001,
# S001 found module formatter
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment