Commit 5607fabd authored by Stefan Behnel's avatar Stefan Behnel

reformat Plex code files

parent 727e57d9
......@@ -7,7 +7,6 @@
#=======================================================================
class Action(object):
def perform(self, token_stream, text):
pass # abstract
......@@ -78,15 +77,18 @@ class Ignore(Action):
to be ignored. See the docstring of Plex.Lexicon for more
information.
"""
def perform(self, token_stream, text):
return None
def __repr__(self):
return "IGNORE"
IGNORE = Ignore()
#IGNORE.__doc__ = Ignore.__doc__
class Text(Action):
"""
TEXT is a Plex action which causes the text of a token to
......@@ -100,6 +102,7 @@ class Text(Action):
def __repr__(self):
return "TEXT"
TEXT = Text()
#TEXT.__doc__ = Text.__doc__
......
......@@ -13,7 +13,7 @@ from .Machines import LOWEST_PRIORITY
from .Transitions import TransitionMap
def nfa_to_dfa(old_machine, debug = None):
def nfa_to_dfa(old_machine, debug=None):
"""
Given a nondeterministic Machine, return a new equivalent
Machine which is deterministic.
......@@ -50,6 +50,7 @@ def nfa_to_dfa(old_machine, debug = None):
state_map.dump(debug)
return new_machine
def set_epsilon_closure(state_set):
"""
Given a set of states, return the union of the epsilon
......@@ -61,6 +62,7 @@ def set_epsilon_closure(state_set):
result[state2] = 1
return result
def epsilon_closure(state):
"""
Return the set of states reachable from the given state
......@@ -74,6 +76,7 @@ def epsilon_closure(state):
add_to_epsilon_closure(result, state)
return result
def add_to_epsilon_closure(state_set, state):
"""
Recursively add to |state_set| states reachable from the given state
......@@ -86,6 +89,7 @@ def add_to_epsilon_closure(state_set, state):
for state2 in state_set_2:
add_to_epsilon_closure(state_set, state2)
class StateMap(object):
"""
Helper class used by nfa_to_dfa() to map back and forth between
......@@ -98,7 +102,7 @@ class StateMap(object):
def __init__(self, new_machine):
self.new_machine = new_machine
self.old_to_new_dict = {}
self.new_to_old_dict= {}
self.new_to_old_dict = {}
def old_to_new(self, old_state_set):
"""
......@@ -129,12 +133,12 @@ class StateMap(object):
best_priority = priority
return best_action
# def old_to_new_set(self, old_state_set):
# """
# Return the new state corresponding to a set of old states as
# a singleton set.
# """
# return {self.old_to_new(old_state_set):1}
# def old_to_new_set(self, old_state_set):
# """
# Return the new state corresponding to a set of old states as
# a singleton set.
# """
# return {self.old_to_new(old_state_set):1}
def new_to_old(self, new_state):
"""Given a new state, return a set of corresponding old states."""
......@@ -151,6 +155,7 @@ class StateMap(object):
def dump(self, file):
from .Transitions import state_set_str
for new_state in self.new_machine.states:
old_state_set = self.new_to_old_dict[id(new_state)]
file.write(" State %s <-- %s\n" % (
......
......@@ -6,32 +6,39 @@
#
#=======================================================================
class PlexError(Exception):
message = ""
class PlexTypeError(PlexError, TypeError):
pass
class PlexValueError(PlexError, ValueError):
pass
class InvalidRegex(PlexError):
pass
class InvalidToken(PlexError):
class InvalidToken(PlexError):
def __init__(self, token_number, message):
PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
class InvalidScanner(PlexError):
pass
class AmbiguousAction(PlexError):
message = "Two tokens with different actions can match the same string"
def __init__(self):
pass
class UnrecognizedInput(PlexError):
scanner = None
position = None
......@@ -43,8 +50,5 @@ class UnrecognizedInput(PlexError):
self.state_name = state_name
def __str__(self):
return ("'%s', line %d, char %d: Token not recognised in state %s"
% (self.position + (repr(self.state_name),)))
return ("'%s', line %d, char %d: Token not recognised in state %s" % (
self.position + (repr(self.state_name),)))
......@@ -38,6 +38,7 @@ class State(object):
self.name = name
self.tokens = tokens
class Lexicon(object):
"""
Lexicon(specification) builds a lexical analyser from the given
......@@ -113,11 +114,12 @@ class Lexicon(object):
machine = None # Machine
tables = None # StateTableMachine
def __init__(self, specifications, debug = None, debug_flags = 7, timings = None):
def __init__(self, specifications, debug=None, debug_flags=7, timings=None):
if type(specifications) != types.ListType:
raise Errors.InvalidScanner("Scanner definition is not a list")
if timings:
from .Timing import time
total_time = 0.0
time1 = time()
nfa = Machines.Machine()
......@@ -129,11 +131,11 @@ class Lexicon(object):
for token in spec.tokens:
self.add_token_to_machine(
nfa, user_initial_state, token, token_number)
token_number = token_number + 1
token_number += 1
elif type(spec) == types.TupleType:
self.add_token_to_machine(
nfa, default_initial_state, spec, token_number)
token_number = token_number + 1
token_number += 1
else:
raise Errors.InvalidToken(
token_number,
......@@ -145,7 +147,7 @@ class Lexicon(object):
if debug and (debug_flags & 1):
debug.write("\n============= NFA ===========\n")
nfa.dump(debug)
dfa = DFA.nfa_to_dfa(nfa, debug = (debug_flags & 3) == 3 and debug)
dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug)
if timings:
time4 = time()
total_time = total_time + (time4 - time3)
......@@ -176,8 +178,8 @@ class Lexicon(object):
action = Actions.Call(action_spec)
final_state = machine.new_state()
re.build_machine(machine, initial_state, final_state,
match_bol = 1, nocase = 0)
final_state.set_action(action, priority = -token_number)
match_bol=1, nocase=0)
final_state.set_action(action, priority=-token_number)
except Errors.PlexError, e:
raise e.__class__("Token number %d: %s" % (token_number, e))
......
......@@ -59,6 +59,7 @@ class Machine(object):
for s in self.states:
s.dump(file)
class Node(object):
"""A state of an NFA or DFA."""
transitions = None # TransitionMap
......@@ -111,7 +112,7 @@ class Node(object):
# Header
file.write(" State %d:\n" % self.number)
# Transitions
# self.dump_transitions(file)
# self.dump_transitions(file)
self.transitions.dump(file)
# Action
action = self.action
......@@ -122,21 +123,21 @@ class Node(object):
def __lt__(self, other):
return self.number < other.number
class FastMachine(object):
"""
FastMachine is a deterministic machine represented in a way that
allows fast scanning.
"""
initial_states = None # {state_name:state}
states = None # [state]
# where state = {event:state, 'else':state, 'action':Action}
states = None # [state] where state = {event:state, 'else':state, 'action':Action}
next_number = 1 # for debugging
new_state_template = {
'':None, 'bol':None, 'eol':None, 'eof':None, 'else':None
'': None, 'bol': None, 'eol': None, 'eof': None, 'else': None
}
def __init__(self, old_machine = None):
def __init__(self, old_machine=None):
self.initial_states = initial_states = {}
self.states = []
if old_machine:
......@@ -159,7 +160,7 @@ class FastMachine(object):
for state in self.states:
state.clear()
def new_state(self, action = None):
def new_state(self, action=None):
number = self.next_number
self.next_number = number + 1
result = self.new_state_template.copy()
......@@ -179,7 +180,7 @@ class FastMachine(object):
elif code1 != maxint:
while code0 < code1:
state[unichr(code0)] = new_state
code0 = code0 + 1
code0 += 1
else:
state[event] = new_state
......@@ -241,10 +242,10 @@ class FastMachine(object):
while i < n:
c1 = ord(char_list[i])
c2 = c1
i = i + 1
i += 1
while i < n and ord(char_list[i]) == c2 + 1:
i = i + 1
c2 = c2 + 1
i += 1
c2 += 1
result.append((chr(c1), chr(c2)))
return tuple(result)
......
......@@ -42,14 +42,15 @@ def chars_to_ranges(s):
while i < n:
code1 = ord(char_list[i])
code2 = code1 + 1
i = i + 1
i += 1
while i < n and code2 >= ord(char_list[i]):
code2 = code2 + 1
i = i + 1
code2 += 1
i += 1
result.append(code1)
result.append(code2)
return result
def uppercase_range(code1, code2):
"""
If the range of characters from code1 to code2-1 includes any
......@@ -63,6 +64,7 @@ def uppercase_range(code1, code2):
else:
return None
def lowercase_range(code1, code2):
"""
If the range of characters from code1 to code2-1 includes any
......@@ -76,6 +78,7 @@ def lowercase_range(code1, code2):
else:
return None
def CodeRanges(code_list):
"""
Given a list of codes as returned by chars_to_ranges, return
......@@ -86,6 +89,7 @@ def CodeRanges(code_list):
re_list.append(CodeRange(code_list[i], code_list[i + 1]))
return Alt(*re_list)
def CodeRange(code1, code2):
"""
CodeRange(code1, code2) is an RE which matches any character
......@@ -98,6 +102,7 @@ def CodeRange(code1, code2):
else:
return RawCodeRange(code1, code2)
#
# Abstract classes
#
......@@ -211,6 +216,7 @@ class RE(object):
## def calc_str(self):
## return "Char(%s)" % repr(self.char)
def Char(c):
"""
Char(c) is an RE which matches the character |c|.
......@@ -222,6 +228,7 @@ def Char(c):
result.str = "Char(%s)" % repr(c)
return result
class RawCodeRange(RE):
"""
RawCodeRange(code1, code2) is a low-level RE which matches any character
......@@ -252,6 +259,7 @@ class RawCodeRange(RE):
def calc_str(self):
return "CodeRange(%d,%d)" % (self.code1, self.code2)
class _RawNewline(RE):
"""
RawNewline is a low-level RE which matches a newline character.
......@@ -266,6 +274,7 @@ class _RawNewline(RE):
s = self.build_opt(m, initial_state, EOL)
s.add_transition((nl_code, nl_code + 1), final_state)
RawNewline = _RawNewline()
......@@ -304,7 +313,7 @@ class Seq(RE):
i = len(re_list)
match_nl = 0
while i:
i = i - 1
i -= 1
re = re_list[i]
if re.match_nl:
match_nl = 1
......@@ -354,7 +363,7 @@ class Alt(RE):
non_nullable_res.append(re)
if re.match_nl:
match_nl = 1
i = i + 1
i += 1
self.nullable_res = nullable_res
self.non_nullable_res = non_nullable_res
self.nullable = nullable
......@@ -434,6 +443,7 @@ Empty.__doc__ = \
"""
Empty.str = "Empty"
def Str1(s):
"""
Str1(s) is an RE which matches the literal string |s|.
......@@ -442,6 +452,7 @@ def Str1(s):
result.str = "Str(%s)" % repr(s)
return result
def Str(*strs):
"""
Str(s) is an RE which matches the literal string |s|.
......@@ -454,6 +465,7 @@ def Str(*strs):
result.str = "Str(%s)" % ','.join(map(repr, strs))
return result
def Any(s):
"""
Any(s) is an RE which matches any character in the string |s|.
......@@ -463,6 +475,7 @@ def Any(s):
result.str = "Any(%s)" % repr(s)
return result
def AnyBut(s):
"""
AnyBut(s) is an RE which matches any character (including
......@@ -475,6 +488,7 @@ def AnyBut(s):
result.str = "AnyBut(%s)" % repr(s)
return result
AnyChar = AnyBut("")
AnyChar.__doc__ = \
"""
......@@ -482,7 +496,8 @@ AnyChar.__doc__ = \
"""
AnyChar.str = "AnyChar"
def Range(s1, s2 = None):
def Range(s1, s2=None):
"""
Range(c1, c2) is an RE which matches any single character in the range
|c1| to |c2| inclusive.
......@@ -495,11 +510,12 @@ def Range(s1, s2 = None):
else:
ranges = []
for i in range(0, len(s1), 2):
ranges.append(CodeRange(ord(s1[i]), ord(s1[i+1]) + 1))
ranges.append(CodeRange(ord(s1[i]), ord(s1[i + 1]) + 1))
result = Alt(*ranges)
result.str = "Range(%s)" % repr(s1)
return result
def Opt(re):
"""
Opt(re) is an RE which matches either |re| or the empty string.
......@@ -508,6 +524,7 @@ def Opt(re):
result.str = "Opt(%s)" % re
return result
def Rep(re):
"""
Rep(re) is an RE which matches zero or more repetitions of |re|.
......@@ -516,12 +533,14 @@ def Rep(re):
result.str = "Rep(%s)" % re
return result
def NoCase(re):
"""
NoCase(re) is an RE which matches the same strings as RE, but treating
upper and lower case letters as equivalent.
"""
return SwitchCase(re, nocase = 1)
return SwitchCase(re, nocase=1)
def Case(re):
"""
......@@ -529,7 +548,7 @@ def Case(re):
upper and lower case letters as distinct, i.e. it cancels the effect
of any enclosing NoCase().
"""
return SwitchCase(re, nocase = 0)
return SwitchCase(re, nocase=0)
#
# RE Constants
......
......@@ -10,6 +10,7 @@
from __future__ import absolute_import
import cython
cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
from . import Errors
......@@ -50,25 +51,25 @@ class Scanner(object):
"""
# lexicon = None # Lexicon
# stream = None # file-like object
# name = ''
# buffer = ''
# buf_start_pos = 0 # position in input of start of buffer
# next_pos = 0 # position in input of next char to read
# cur_pos = 0 # position in input of current char
# cur_line = 1 # line number of current char
# cur_line_start = 0 # position in input of start of current line
# start_pos = 0 # position in input of start of token
# start_line = 0 # line number of start of token
# start_col = 0 # position in line of start of token
# text = None # text of last token read
# initial_state = None # Node
# state_name = '' # Name of initial state
# queue = None # list of tokens to be returned
# trace = 0
def __init__(self, lexicon, stream, name = '', initial_pos = None):
# lexicon = None # Lexicon
# stream = None # file-like object
# name = ''
# buffer = ''
# buf_start_pos = 0 # position in input of start of buffer
# next_pos = 0 # position in input of next char to read
# cur_pos = 0 # position in input of current char
# cur_line = 1 # line number of current char
# cur_line_start = 0 # position in input of start of current line
# start_pos = 0 # position in input of start of token
# start_line = 0 # line number of start of token
# start_col = 0 # position in line of start of token
# text = None # text of last token read
# initial_state = None # Node
# state_name = '' # Name of initial state
# queue = None # list of tokens to be returned
# trace = 0
def __init__(self, lexicon, stream, name='', initial_pos=None):
"""
Scanner(lexicon, stream, name = '')
......@@ -143,7 +144,8 @@ class Scanner(object):
if self.trace:
print("Scanner: read: Performing %s %d:%d" % (
action, self.start_pos, self.cur_pos))
text = self.buffer[self.start_pos - self.buf_start_pos :
text = self.buffer[
self.start_pos - self.buf_start_pos:
self.cur_pos - self.buf_start_pos]
return (text, action)
else:
......@@ -198,19 +200,19 @@ class Scanner(object):
buf_index = next_pos - buf_start_pos
if buf_index < buf_len:
c = buffer[buf_index]
next_pos = next_pos + 1
next_pos += 1
else:
discard = self.start_pos - buf_start_pos
data = self.stream.read(0x1000)
buffer = self.buffer[discard:] + data
self.buffer = buffer
buf_start_pos = buf_start_pos + discard
buf_start_pos += discard
self.buf_start_pos = buf_start_pos
buf_len = len(buffer)
buf_index = buf_index - discard
buf_index -= discard
if data:
c = buffer[buf_index]
next_pos = next_pos + 1
next_pos += 1
else:
c = u''
# End inlined: c = self.read_char()
......@@ -226,7 +228,7 @@ class Scanner(object):
cur_char = u'\n'
input_state = 3
elif input_state == 3:
cur_line = cur_line + 1
cur_line += 1
cur_line_start = cur_pos = next_pos
cur_char = BOL
input_state = 1
......@@ -263,7 +265,7 @@ class Scanner(object):
def next_char(self):
input_state = self.input_state
if self.trace:
print("Scanner: next: %s [%d] %d" % (" "*20, input_state, self.cur_pos))
print("Scanner: next: %s [%d] %d" % (" " * 20, input_state, self.cur_pos))
if input_state == 1:
self.cur_pos = self.next_pos
c = self.read_char()
......@@ -279,7 +281,7 @@ class Scanner(object):
self.cur_char = u'\n'
self.input_state = 3
elif input_state == 3:
self.cur_line = self.cur_line + 1
self.cur_line += 1
self.cur_line_start = self.cur_pos = self.next_pos
self.cur_char = BOL
self.input_state = 1
......@@ -313,7 +315,7 @@ class Scanner(object):
self.lexicon.get_initial_state(state_name))
self.state_name = state_name
def produce(self, value, text = None):
def produce(self, value, text=None):
"""
Called from an action procedure, causes |value| to be returned
as the token value from read(). If |text| is supplied, it is
......
......@@ -25,7 +25,6 @@ def re(s):
class REParser(object):
def __init__(self, s):
self.s = s
self.i = -1
......
......@@ -40,7 +40,7 @@ class TransitionMap(object):
map = None # The list of codes and states
special = None # Mapping for special events
def __init__(self, map = None, special = None):
def __init__(self, map=None, special=None):
if not map:
map = [-maxint, {}, maxint]
if not special:
......@@ -50,7 +50,7 @@ class TransitionMap(object):
#self.check() ###
def add(self, event, new_state,
TupleType = tuple):
TupleType=tuple):
"""
Add transition to |new_state| on |event|.
"""
......@@ -61,12 +61,12 @@ class TransitionMap(object):
map = self.map
while i < j:
map[i + 1][new_state] = 1
i = i + 2
i += 2
else:
self.get_special(event)[new_state] = 1
def add_set(self, event, new_set,
TupleType = tuple):
TupleType=tuple):
"""
Add transitions to the states in |new_set| on |event|.
"""
......@@ -77,19 +77,19 @@ class TransitionMap(object):
map = self.map
while i < j:
map[i + 1].update(new_set)
i = i + 2
i += 2
else:
self.get_special(event).update(new_set)
def get_epsilon(self,
none = None):
none=None):
"""
Return the mapping for epsilon, or None.
"""
return self.special.get('', none)
def iteritems(self,
len = len):
len=len):
"""
Return the mapping as an iterable of ((code1, code2), state_set) and
(special_event, state_set) pairs.
......@@ -106,17 +106,18 @@ class TransitionMap(object):
if set or else_set:
result.append(((code0, code1), set))
code0 = code1
i = i + 2
i += 2
for event, set in self.special.iteritems():
if set:
result.append((event, set))
return iter(result)
items = iteritems
# ------------------- Private methods --------------------
def split(self, code,
len = len, maxint = maxint):
len=len, maxint=maxint):
"""
Search the list for the position of the split point for |code|,
inserting a new split point if necessary. Returns index |i| such
......@@ -173,10 +174,10 @@ class TransitionMap(object):
else:
code_str = str(code)
map_strs.append(code_str)
i = i + 1
i += 1
if i < n:
map_strs.append(state_set_str(map[i]))
i = i + 1
i += 1
special_strs = {}
for event, set in self.special.iteritems():
special_strs[event] = state_set_str(set)
......@@ -199,7 +200,7 @@ class TransitionMap(object):
n = len(map) - 1
while i < n:
self.dump_range(map[i], map[i + 2], map[i + 1], file)
i = i + 2
i += 2
for event, set in self.special.iteritems():
if set:
if not event:
......@@ -234,6 +235,7 @@ class TransitionMap(object):
def dump_set(self, set):
return state_set_str(set)
#
# State set manipulation functions
#
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment