Commit ce7c8f08 authored by jbrockmendel's avatar jbrockmendel Committed by Stefan Behnel

CLN: implement setup.cfg, lint cleanup of Plex directory (#2827)

implement setup.cfg, lint cleanup of Plex directory
parent f34c5b13
# cython: language_level=3str # cython: language_level=3str
# cython: auto_pickle=False # cython: auto_pickle=False
#======================================================================= """
# Python Lexical Analyser
# Python Lexical Analyser
# Actions for use in token specifications
# Actions for use in token specifications """
#
#=======================================================================
class Action(object): class Action(object):
def perform(self, token_stream, text): def perform(self, token_stream, text):
...@@ -88,7 +87,6 @@ class Ignore(Action): ...@@ -88,7 +87,6 @@ class Ignore(Action):
IGNORE = Ignore() IGNORE = Ignore()
#IGNORE.__doc__ = Ignore.__doc__
class Text(Action): class Text(Action):
...@@ -106,6 +104,3 @@ class Text(Action): ...@@ -106,6 +104,3 @@ class Text(Action):
TEXT = Text() TEXT = Text()
#TEXT.__doc__ = Text.__doc__
#======================================================================= """
# Python Lexical Analyser
# Python Lexical Analyser
#
# Converting NFA to DFA
#
#=======================================================================
Converting NFA to DFA
"""
from __future__ import absolute_import from __future__ import absolute_import
from . import Machines from . import Machines
...@@ -29,12 +26,14 @@ def nfa_to_dfa(old_machine, debug=None): ...@@ -29,12 +26,14 @@ def nfa_to_dfa(old_machine, debug=None):
# is reached. # is reached.
new_machine = Machines.FastMachine() new_machine = Machines.FastMachine()
state_map = StateMap(new_machine) state_map = StateMap(new_machine)
# Seed the process using the initial states of the old machine. # Seed the process using the initial states of the old machine.
# Make the corresponding new states into initial states of the new # Make the corresponding new states into initial states of the new
# machine with the same names. # machine with the same names.
for (key, old_state) in old_machine.initial_states.items(): for (key, old_state) in old_machine.initial_states.items():
new_state = state_map.old_to_new(epsilon_closure(old_state)) new_state = state_map.old_to_new(epsilon_closure(old_state))
new_machine.make_initial_state(key, new_state) new_machine.make_initial_state(key, new_state)
# Tricky bit here: we add things to the end of this list while we're # Tricky bit here: we add things to the end of this list while we're
# iterating over it. The iteration stops when closure is achieved. # iterating over it. The iteration stops when closure is achieved.
for new_state in new_machine.states: for new_state in new_machine.states:
...@@ -45,6 +44,7 @@ def nfa_to_dfa(old_machine, debug=None): ...@@ -45,6 +44,7 @@ def nfa_to_dfa(old_machine, debug=None):
transitions.add_set(event, set_epsilon_closure(old_target_states)) transitions.add_set(event, set_epsilon_closure(old_target_states))
for event, old_states in transitions.items(): for event, old_states in transitions.items():
new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states)) new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states))
if debug: if debug:
debug.write("\n===== State Mapping =====\n") debug.write("\n===== State Mapping =====\n")
state_map.dump(debug) state_map.dump(debug)
...@@ -119,8 +119,6 @@ class StateMap(object): ...@@ -119,8 +119,6 @@ class StateMap(object):
new_state = self.new_machine.new_state(action) new_state = self.new_machine.new_state(action)
self.old_to_new_dict[key] = new_state self.old_to_new_dict[key] = new_state
self.new_to_old_dict[id(new_state)] = old_state_set self.new_to_old_dict[id(new_state)] = old_state_set
#for old_state in old_state_set.keys():
#new_state.merge_actions(old_state)
return new_state return new_state
def highest_priority_action(self, state_set): def highest_priority_action(self, state_set):
...@@ -133,13 +131,6 @@ class StateMap(object): ...@@ -133,13 +131,6 @@ class StateMap(object):
best_priority = priority best_priority = priority
return best_action return best_action
# def old_to_new_set(self, old_state_set):
# """
# Return the new state corresponding to a set of old states as
# a singleton set.
# """
# return {self.old_to_new(old_state_set):1}
def new_to_old(self, new_state): def new_to_old(self, new_state):
"""Given a new state, return a set of corresponding old states.""" """Given a new state, return a set of corresponding old states."""
return self.new_to_old_dict[id(new_state)] return self.new_to_old_dict[id(new_state)]
...@@ -160,5 +151,3 @@ class StateMap(object): ...@@ -160,5 +151,3 @@ class StateMap(object):
old_state_set = self.new_to_old_dict[id(new_state)] old_state_set = self.new_to_old_dict[id(new_state)]
file.write(" State %s <-- %s\n" % ( file.write(" State %s <-- %s\n" % (
new_state['number'], state_set_str(old_state_set))) new_state['number'], state_set_str(old_state_set)))
#======================================================================= """
# Python Lexical Analyser
# Python Lexical Analyser
# Exception classes
# Exception classes """
#
#=======================================================================
class PlexError(Exception): class PlexError(Exception):
......
#======================================================================= """
# Python Lexical Analyser
# Python Lexical Analyser
#
# Lexical Analyser Specification
#
#=======================================================================
Lexical Analyser Specification
"""
from __future__ import absolute_import from __future__ import absolute_import
import types
from . import Actions from . import Actions
from . import DFA from . import DFA
from . import Errors from . import Errors
...@@ -122,9 +117,11 @@ class Lexicon(object): ...@@ -122,9 +117,11 @@ class Lexicon(object):
total_time = 0.0 total_time = 0.0
time1 = time() time1 = time()
nfa = Machines.Machine() nfa = Machines.Machine()
default_initial_state = nfa.new_initial_state('') default_initial_state = nfa.new_initial_state('')
token_number = 1 token_number = 1
for spec in specifications: for spec in specifications:
if isinstance(spec, State): if isinstance(spec, State):
user_initial_state = nfa.new_initial_state(spec.name) user_initial_state = nfa.new_initial_state(spec.name)
...@@ -140,6 +137,7 @@ class Lexicon(object): ...@@ -140,6 +137,7 @@ class Lexicon(object):
raise Errors.InvalidToken( raise Errors.InvalidToken(
token_number, token_number,
"Expected a token definition (tuple) or State instance") "Expected a token definition (tuple) or State instance")
if timings: if timings:
time2 = time() time2 = time()
total_time = total_time + (time2 - time1) total_time = total_time + (time2 - time1)
...@@ -147,6 +145,7 @@ class Lexicon(object): ...@@ -147,6 +145,7 @@ class Lexicon(object):
if debug and (debug_flags & 1): if debug and (debug_flags & 1):
debug.write("\n============= NFA ===========\n") debug.write("\n============= NFA ===========\n")
nfa.dump(debug) nfa.dump(debug)
dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug) dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug)
if timings: if timings:
time4 = time() time4 = time()
...@@ -158,15 +157,12 @@ class Lexicon(object): ...@@ -158,15 +157,12 @@ class Lexicon(object):
timings.write("Constructing NFA : %5.2f\n" % (time2 - time1)) timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
timings.write("Converting to DFA: %5.2f\n" % (time4 - time3)) timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
timings.write("TOTAL : %5.2f\n" % total_time) timings.write("TOTAL : %5.2f\n" % total_time)
self.machine = dfa self.machine = dfa
def add_token_to_machine(self, machine, initial_state, token_spec, token_number): def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
try: try:
(re, action_spec) = self.parse_token_definition(token_spec) (re, action_spec) = self.parse_token_definition(token_spec)
# Disabled this -- matching empty strings can be useful
#if re.nullable:
# raise Errors.InvalidToken(
# token_number, "Pattern can match 0 input symbols")
if isinstance(action_spec, Actions.Action): if isinstance(action_spec, Actions.Action):
action = action_spec action = action_spec
else: else:
...@@ -188,6 +184,7 @@ class Lexicon(object): ...@@ -188,6 +184,7 @@ class Lexicon(object):
raise Errors.InvalidToken("Token definition is not a tuple") raise Errors.InvalidToken("Token definition is not a tuple")
if len(token_spec) != 2: if len(token_spec) != 2:
raise Errors.InvalidToken("Wrong number of items in token definition") raise Errors.InvalidToken("Wrong number of items in token definition")
pattern, action = token_spec pattern, action = token_spec
if not isinstance(pattern, Regexps.RE): if not isinstance(pattern, Regexps.RE):
raise Errors.InvalidToken("Pattern is not an RE instance") raise Errors.InvalidToken("Pattern is not an RE instance")
...@@ -195,6 +192,3 @@ class Lexicon(object): ...@@ -195,6 +192,3 @@ class Lexicon(object):
def get_initial_state(self, name): def get_initial_state(self, name):
return self.machine.get_initial_state(name) return self.machine.get_initial_state(name)
#======================================================================= """
# Python Lexical Analyser
# Python Lexical Analyser
#
# Classes for building NFAs and DFAs
#
#=======================================================================
Classes for building NFAs and DFAs
"""
from __future__ import absolute_import from __future__ import absolute_import
import sys
from .Transitions import TransitionMap from .Transitions import TransitionMap
try: try:
...@@ -36,7 +31,6 @@ class Machine(object): ...@@ -36,7 +31,6 @@ class Machine(object):
self.initial_states = {} self.initial_states = {}
def __del__(self): def __del__(self):
#print "Destroying", self ###
for state in self.states: for state in self.states:
state.destroy() state.destroy()
...@@ -81,12 +75,10 @@ class Node(object): ...@@ -81,12 +75,10 @@ class Node(object):
def __init__(self): def __init__(self):
# Preinitialise the list of empty transitions, because # Preinitialise the list of empty transitions, because
# the nfa-to-dfa algorithm needs it # the nfa-to-dfa algorithm needs it
#self.transitions = {'':[]}
self.transitions = TransitionMap() self.transitions = TransitionMap()
self.action_priority = LOWEST_PRIORITY self.action_priority = LOWEST_PRIORITY
def destroy(self): def destroy(self):
#print "Destroying", self ###
self.transitions = None self.transitions = None
self.action = None self.action = None
self.epsilon_closure = None self.epsilon_closure = None
......
#======================================================================= """
# Python Lexical Analyser
# Python Lexical Analyser
#
# Regular Expressions
#
#=======================================================================
Regular Expressions
"""
from __future__ import absolute_import from __future__ import absolute_import
import types import types
...@@ -186,37 +183,6 @@ class RE(object): ...@@ -186,37 +183,6 @@ class RE(object):
# These are the basic REs from which all others are built. # These are the basic REs from which all others are built.
# #
## class Char(RE):
## """
## Char(c) is an RE which matches the character |c|.
## """
## nullable = 0
## def __init__(self, char):
## self.char = char
## self.match_nl = char == '\n'
## def build_machine(self, m, initial_state, final_state, match_bol, nocase):
## c = self.char
## if match_bol and c != BOL:
## s1 = self.build_opt(m, initial_state, BOL)
## else:
## s1 = initial_state
## if c == '\n' or c == EOF:
## s1 = self.build_opt(m, s1, EOL)
## if len(c) == 1:
## code = ord(self.char)
## s1.add_transition((code, code+1), final_state)
## if nocase and is_letter_code(code):
## code2 = other_case_code(code)
## s1.add_transition((code2, code2+1), final_state)
## else:
## s1.add_transition(c, final_state)
## def calc_str(self):
## return "Char(%s)" % repr(self.char)
def Char(c): def Char(c):
""" """
...@@ -428,6 +394,7 @@ class SwitchCase(RE): ...@@ -428,6 +394,7 @@ class SwitchCase(RE):
name = "Case" name = "Case"
return "%s(%s)" % (name, self.re) return "%s(%s)" % (name, self.re)
# #
# Composite RE constructors # Composite RE constructors
# ------------------------- # -------------------------
...@@ -469,7 +436,6 @@ def Any(s): ...@@ -469,7 +436,6 @@ def Any(s):
""" """
Any(s) is an RE which matches any character in the string |s|. Any(s) is an RE which matches any character in the string |s|.
""" """
#result = apply(Alt, tuple(map(Char, s)))
result = CodeRanges(chars_to_ranges(s)) result = CodeRanges(chars_to_ranges(s))
result.str = "Any(%s)" % repr(s) result.str = "Any(%s)" % repr(s)
return result return result
...@@ -549,6 +515,7 @@ def Case(re): ...@@ -549,6 +515,7 @@ def Case(re):
""" """
return SwitchCase(re, nocase=0) return SwitchCase(re, nocase=0)
# #
# RE Constants # RE Constants
# #
...@@ -573,4 +540,3 @@ Eof.__doc__ = \ ...@@ -573,4 +540,3 @@ Eof.__doc__ = \
Eof is an RE which matches the end of the file. Eof is an RE which matches the end of the file.
""" """
Eof.str = "Eof" Eof.str = "Eof"
# cython: language_level=3str # cython: language_level=3str
# cython: auto_pickle=False # cython: auto_pickle=False
#======================================================================= """
# Python Lexical Analyser
# Python Lexical Analyser
#
#
# Scanning an input stream
#
#=======================================================================
Scanning an input stream
"""
from __future__ import absolute_import from __future__ import absolute_import
import cython import cython
cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object) cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object) # noqa:E402
from . import Errors from . import Errors
from .Regexps import BOL, EOL, EOF from .Regexps import BOL, EOL, EOF
...@@ -174,26 +170,28 @@ class Scanner(object): ...@@ -174,26 +170,28 @@ class Scanner(object):
buf_len = len(buffer) buf_len = len(buffer)
b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \ b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
None, 0, 0, 0, u'', 0, 0 None, 0, 0, 0, u'', 0, 0
trace = self.trace trace = self.trace
while 1: while 1:
if trace: #TRACE# if trace:
print("State %d, %d/%d:%s -->" % ( #TRACE# print("State %d, %d/%d:%s -->" % (
state['number'], input_state, cur_pos, repr(cur_char))) #TRACE# state['number'], input_state, cur_pos, repr(cur_char)))
# Begin inlined self.save_for_backup() # Begin inlined self.save_for_backup()
#action = state.action #@slow action = state['action']
action = state['action'] #@fast
if action is not None: if action is not None:
b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \ b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
# End inlined self.save_for_backup() # End inlined self.save_for_backup()
c = cur_char c = cur_char
#new_state = state.new_state(c) #@slow new_state = state.get(c, NOT_FOUND)
new_state = state.get(c, NOT_FOUND) #@fast if new_state is NOT_FOUND:
if new_state is NOT_FOUND: #@fast new_state = c and state.get('else')
new_state = c and state.get('else') #@fast
if new_state: if new_state:
if trace: #TRACE# if trace:
print("State %d" % new_state['number']) #TRACE# print("State %d" % new_state['number'])
state = new_state state = new_state
# Begin inlined: self.next_char() # Begin inlined: self.next_char()
if input_state == 1: if input_state == 1:
...@@ -241,8 +239,8 @@ class Scanner(object): ...@@ -241,8 +239,8 @@ class Scanner(object):
cur_char = u'' cur_char = u''
# End inlined self.next_char() # End inlined self.next_char()
else: # not new_state else: # not new_state
if trace: #TRACE# if trace:
print("blocked") #TRACE# print("blocked")
# Begin inlined: action = self.back_up() # Begin inlined: action = self.back_up()
if b_action is not None: if b_action is not None:
(action, cur_pos, cur_line, cur_line_start, (action, cur_pos, cur_line, cur_line_start,
...@@ -253,15 +251,16 @@ class Scanner(object): ...@@ -253,15 +251,16 @@ class Scanner(object):
action = None action = None
break # while 1 break # while 1
# End inlined: action = self.back_up() # End inlined: action = self.back_up()
self.cur_pos = cur_pos self.cur_pos = cur_pos
self.cur_line = cur_line self.cur_line = cur_line
self.cur_line_start = cur_line_start self.cur_line_start = cur_line_start
self.cur_char = cur_char self.cur_char = cur_char
self.input_state = input_state self.input_state = input_state
self.next_pos = next_pos self.next_pos = next_pos
if trace: #TRACE# if trace:
if action is not None: #TRACE# if action is not None:
print("Doing %s" % action) #TRACE# print("Doing %s" % action)
return action return action
def next_char(self): def next_char(self):
...@@ -307,7 +306,8 @@ class Scanner(object): ...@@ -307,7 +306,8 @@ class Scanner(object):
return (self.name, self.start_line, self.start_col) return (self.name, self.start_line, self.start_col)
def get_position(self): def get_position(self):
"""Python accessible wrapper around position(), only for error reporting. """
Python accessible wrapper around position(), only for error reporting.
""" """
return self.position() return self.position()
...@@ -337,3 +337,4 @@ class Scanner(object): ...@@ -337,3 +337,4 @@ class Scanner(object):
Override this method if you want something to be done at Override this method if you want something to be done at
end of file. end of file.
""" """
pass
# """
# Get time in platform-dependent way Get time in platform-dependent way
# """
from __future__ import absolute_import from __future__ import absolute_import
...@@ -9,14 +9,19 @@ from sys import platform, exit, stderr ...@@ -9,14 +9,19 @@ from sys import platform, exit, stderr
if platform == 'mac': if platform == 'mac':
import MacOS import MacOS
def time(): def time():
return MacOS.GetTicks() / 60.0 return MacOS.GetTicks() / 60.0
timekind = "real" timekind = "real"
elif hasattr(os, 'times'): elif hasattr(os, 'times'):
def time(): def time():
t = os.times() t = os.times()
return t[0] + t[1] return t[0] + t[1]
timekind = "cpu" timekind = "cpu"
else: else:
stderr.write( stderr.write(
"Don't know how to get time on platform %s\n" % repr(platform)) "Don't know how to get time on platform %s\n" % repr(platform))
......
#======================================================================= """
# Python Lexical Analyser
# Python Lexical Analyser
#
# Traditional Regular Expression Syntax
#
#=======================================================================
Traditional Regular Expression Syntax
"""
from __future__ import absolute_import from __future__ import absolute_import
from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
...@@ -51,7 +48,7 @@ class REParser(object): ...@@ -51,7 +48,7 @@ class REParser(object):
def parse_seq(self): def parse_seq(self):
"""Parse a sequence of regexps.""" """Parse a sequence of regexps."""
re_list = [] re_list = []
while not self.end and not self.c in "|)": while not self.end and self.c not in "|)":
re_list.append(self.parse_mod()) re_list.append(self.parse_mod())
return Seq(*re_list) return Seq(*re_list)
...@@ -108,6 +105,7 @@ class REParser(object): ...@@ -108,6 +105,7 @@ class REParser(object):
char_list.append(chr(a)) char_list.append(chr(a))
else: else:
char_list.append(c1) char_list.append(c1)
chars = ''.join(char_list) chars = ''.join(char_list)
if invert: if invert:
return AnyBut(chars) return AnyBut(chars)
...@@ -153,6 +151,3 @@ class REParser(object): ...@@ -153,6 +151,3 @@ class REParser(object):
"""Raise exception to signal syntax error in regexp.""" """Raise exception to signal syntax error in regexp."""
raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % ( raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
repr(self.s), self.i, mess)) repr(self.s), self.i, mess))
# """
# Plex - Transition Maps Plex - Transition Maps
#
# This version represents state sets directly as dicts for speed.
#
This version represents state sets directly as dicts for speed.
"""
from __future__ import absolute_import from __future__ import absolute_import
try: try:
...@@ -50,7 +49,6 @@ class TransitionMap(object): ...@@ -50,7 +49,6 @@ class TransitionMap(object):
special = {} special = {}
self.map = map self.map = map
self.special = special self.special = special
#self.check() ###
def add(self, event, new_state, def add(self, event, new_state,
TupleType=tuple): TupleType=tuple):
...@@ -84,12 +82,11 @@ class TransitionMap(object): ...@@ -84,12 +82,11 @@ class TransitionMap(object):
else: else:
self.get_special(event).update(new_set) self.get_special(event).update(new_set)
def get_epsilon(self, def get_epsilon(self):
none=None):
""" """
Return the mapping for epsilon, or None. Return the mapping for epsilon, or None.
""" """
return self.special.get('', none) return self.special.get('')
def iteritems(self, def iteritems(self,
len=len): len=len):
...@@ -132,6 +129,7 @@ class TransitionMap(object): ...@@ -132,6 +129,7 @@ class TransitionMap(object):
# Special case: code == map[-1] # Special case: code == map[-1]
if code == maxint: if code == maxint:
return hi return hi
# General case # General case
lo = 0 lo = 0
# loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2 # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
...@@ -147,7 +145,6 @@ class TransitionMap(object): ...@@ -147,7 +145,6 @@ class TransitionMap(object):
return lo return lo
else: else:
map[hi:hi] = [code, map[hi - 1].copy()] map[hi:hi] = [code, map[hi - 1].copy()]
#self.check() ###
return hi return hi
def get_special(self, event): def get_special(self, event):
...@@ -243,9 +240,5 @@ class TransitionMap(object): ...@@ -243,9 +240,5 @@ class TransitionMap(object):
# State set manipulation functions # State set manipulation functions
# #
#def merge_state_sets(set1, set2):
# for state in set2.keys():
# set1[state] = 1
def state_set_str(set): def state_set_str(set):
return "[%s]" % ','.join(["S%d" % state.number for state in set]) return "[%s]" % ','.join(["S%d" % state.number for state in set])
#=======================================================================
#
# Python Lexical Analyser
#
#=======================================================================
""" """
Python Lexical Analyser
The Plex module provides lexical analysers with similar capabilities The Plex module provides lexical analysers with similar capabilities
to GNU Flex. The following classes and functions are exported; to GNU Flex. The following classes and functions are exported;
see the attached docstrings for more information. see the attached docstrings for more information.
...@@ -29,7 +25,7 @@ see the attached docstrings for more information. ...@@ -29,7 +25,7 @@ see the attached docstrings for more information.
Actions for associating with patterns when Actions for associating with patterns when
creating a Lexicon. creating a Lexicon.
""" """
# flake8: noqa:F401
from __future__ import absolute_import from __future__ import absolute_import
from .Actions import TEXT, IGNORE, Begin from .Actions import TEXT, IGNORE, Begin
......
[flake8]
exclude = .git,build,__pycache__
max-complexity = 10
max-line-length = 120
ignore =
W504,
# W504 line break after binary operator
S001,
# S001 found module formatter
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment