Commit 5607fabd authored by Stefan Behnel's avatar Stefan Behnel

reformat Plex code files

parent 727e57d9
......@@ -7,98 +7,101 @@
#=======================================================================
class Action(object):
def perform(self, token_stream, text):
pass # abstract
def perform(self, token_stream, text):
pass # abstract
def same_as(self, other):
return self is other
def same_as(self, other):
return self is other
class Return(Action):
"""
Internal Plex action which causes |value| to
be returned as the value of the associated token
"""
"""
Internal Plex action which causes |value| to
be returned as the value of the associated token
"""
def __init__(self, value):
self.value = value
def __init__(self, value):
self.value = value
def perform(self, token_stream, text):
return self.value
def perform(self, token_stream, text):
return self.value
def same_as(self, other):
return isinstance(other, Return) and self.value == other.value
def same_as(self, other):
return isinstance(other, Return) and self.value == other.value
def __repr__(self):
return "Return(%s)" % repr(self.value)
def __repr__(self):
return "Return(%s)" % repr(self.value)
class Call(Action):
"""
Internal Plex action which causes a function to be called.
"""
"""
Internal Plex action which causes a function to be called.
"""
def __init__(self, function):
self.function = function
def __init__(self, function):
self.function = function
def perform(self, token_stream, text):
return self.function(token_stream, text)
def perform(self, token_stream, text):
return self.function(token_stream, text)
def __repr__(self):
return "Call(%s)" % self.function.__name__
def __repr__(self):
return "Call(%s)" % self.function.__name__
def same_as(self, other):
return isinstance(other, Call) and self.function is other.function
def same_as(self, other):
return isinstance(other, Call) and self.function is other.function
class Begin(Action):
"""
Begin(state_name) is a Plex action which causes the Scanner to
enter the state |state_name|. See the docstring of Plex.Lexicon
for more information.
"""
"""
Begin(state_name) is a Plex action which causes the Scanner to
enter the state |state_name|. See the docstring of Plex.Lexicon
for more information.
"""
def __init__(self, state_name):
self.state_name = state_name
def __init__(self, state_name):
self.state_name = state_name
def perform(self, token_stream, text):
token_stream.begin(self.state_name)
def perform(self, token_stream, text):
token_stream.begin(self.state_name)
def __repr__(self):
return "Begin(%s)" % self.state_name
def __repr__(self):
return "Begin(%s)" % self.state_name
def same_as(self, other):
return isinstance(other, Begin) and self.state_name == other.state_name
def same_as(self, other):
return isinstance(other, Begin) and self.state_name == other.state_name
class Ignore(Action):
"""
IGNORE is a Plex action which causes its associated token
to be ignored. See the docstring of Plex.Lexicon for more
information.
"""
def perform(self, token_stream, text):
return None
"""
IGNORE is a Plex action which causes its associated token
to be ignored. See the docstring of Plex.Lexicon for more
information.
"""
def perform(self, token_stream, text):
return None
def __repr__(self):
return "IGNORE"
def __repr__(self):
return "IGNORE"
IGNORE = Ignore()
#IGNORE.__doc__ = Ignore.__doc__
class Text(Action):
"""
TEXT is a Plex action which causes the text of a token to
be returned as the value of the token. See the docstring of
Plex.Lexicon for more information.
"""
"""
TEXT is a Plex action which causes the text of a token to
be returned as the value of the token. See the docstring of
Plex.Lexicon for more information.
"""
def perform(self, token_stream, text):
return text
def perform(self, token_stream, text):
return text
def __repr__(self):
return "TEXT"
def __repr__(self):
return "TEXT"
TEXT = Text()
#TEXT.__doc__ = Text.__doc__
......
This diff is collapsed.
......@@ -6,45 +6,49 @@
#
#=======================================================================
class PlexError(Exception):
message = ""
message = ""
class PlexTypeError(PlexError, TypeError):
pass
pass
class PlexValueError(PlexError, ValueError):
pass
pass
class InvalidRegex(PlexError):
pass
pass
class InvalidToken(PlexError):
def __init__(self, token_number, message):
PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
def __init__(self, token_number, message):
PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
class InvalidScanner(PlexError):
pass
class AmbiguousAction(PlexError):
message = "Two tokens with different actions can match the same string"
def __init__(self):
pass
class UnrecognizedInput(PlexError):
scanner = None
position = None
state_name = None
def __init__(self, scanner, state_name):
self.scanner = scanner
self.position = scanner.get_position()
self.state_name = state_name
def __str__(self):
return ("'%s', line %d, char %d: Token not recognised in state %s"
% (self.position + (repr(self.state_name),)))
class AmbiguousAction(PlexError):
message = "Two tokens with different actions can match the same string"
def __init__(self):
pass
class UnrecognizedInput(PlexError):
scanner = None
position = None
state_name = None
def __init__(self, scanner, state_name):
self.scanner = scanner
self.position = scanner.get_position()
self.state_name = state_name
def __str__(self):
return ("'%s', line %d, char %d: Token not recognised in state %s" % (
self.position + (repr(self.state_name),)))
This diff is collapsed.
This diff is collapsed.
......@@ -42,14 +42,15 @@ def chars_to_ranges(s):
while i < n:
code1 = ord(char_list[i])
code2 = code1 + 1
i = i + 1
i += 1
while i < n and code2 >= ord(char_list[i]):
code2 = code2 + 1
i = i + 1
code2 += 1
i += 1
result.append(code1)
result.append(code2)
return result
def uppercase_range(code1, code2):
"""
If the range of characters from code1 to code2-1 includes any
......@@ -63,6 +64,7 @@ def uppercase_range(code1, code2):
else:
return None
def lowercase_range(code1, code2):
"""
If the range of characters from code1 to code2-1 includes any
......@@ -76,6 +78,7 @@ def lowercase_range(code1, code2):
else:
return None
def CodeRanges(code_list):
"""
Given a list of codes as returned by chars_to_ranges, return
......@@ -86,6 +89,7 @@ def CodeRanges(code_list):
re_list.append(CodeRange(code_list[i], code_list[i + 1]))
return Alt(*re_list)
def CodeRange(code1, code2):
"""
CodeRange(code1, code2) is an RE which matches any character
......@@ -93,11 +97,12 @@ def CodeRange(code1, code2):
"""
if code1 <= nl_code < code2:
return Alt(RawCodeRange(code1, nl_code),
RawNewline,
RawCodeRange(nl_code + 1, code2))
RawNewline,
RawCodeRange(nl_code + 1, code2))
else:
return RawCodeRange(code1, code2)
#
# Abstract classes
#
......@@ -110,12 +115,12 @@ class RE(object):
re1 | re2 is an RE which matches either |re1| or |re2|
"""
nullable = 1 # True if this RE can match 0 input symbols
match_nl = 1 # True if this RE can match a string ending with '\n'
str = None # Set to a string to override the class's __str__ result
nullable = 1 # True if this RE can match 0 input symbols
match_nl = 1 # True if this RE can match a string ending with '\n'
str = None # Set to a string to override the class's __str__ result
def build_machine(self, machine, initial_state, final_state,
match_bol, nocase):
match_bol, nocase):
"""
This method should add states to |machine| to implement this
RE, starting at |initial_state| and ending at |final_state|.
......@@ -124,7 +129,7 @@ class RE(object):
letters should be treated as equivalent.
"""
raise NotImplementedError("%s.build_machine not implemented" %
self.__class__.__name__)
self.__class__.__name__)
def build_opt(self, m, initial_state, c):
"""
......@@ -160,18 +165,18 @@ class RE(object):
self.check_string(num, value)
if len(value) != 1:
raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
"Expected a string of length 1, got: %s" % (
num, self.__class__.__name__, repr(value)))
"Expected a string of length 1, got: %s" % (
num, self.__class__.__name__, repr(value)))
def wrong_type(self, num, value, expected):
if type(value) == types.InstanceType:
got = "%s.%s instance" % (
value.__class__.__module__, value.__class__.__name__)
got = "%s.%s instance" % (
value.__class__.__module__, value.__class__.__name__)
else:
got = type(value).__name__
raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
"(expected %s, got %s" % (
num, self.__class__.__name__, expected, got))
"(expected %s, got %s" % (
num, self.__class__.__name__, expected, got))
#
# Primitive RE constructors
......@@ -211,6 +216,7 @@ class RE(object):
## def calc_str(self):
## return "Char(%s)" % repr(self.char)
def Char(c):
"""
Char(c) is an RE which matches the character |c|.
......@@ -222,6 +228,7 @@ def Char(c):
result.str = "Char(%s)" % repr(c)
return result
class RawCodeRange(RE):
"""
RawCodeRange(code1, code2) is a low-level RE which matches any character
......@@ -230,9 +237,9 @@ class RawCodeRange(RE):
"""
nullable = 0
match_nl = 0
range = None # (code, code)
uppercase_range = None # (code, code) or None
lowercase_range = None # (code, code) or None
range = None # (code, code)
uppercase_range = None # (code, code) or None
lowercase_range = None # (code, code) or None
def __init__(self, code1, code2):
self.range = (code1, code2)
......@@ -252,6 +259,7 @@ class RawCodeRange(RE):
def calc_str(self):
return "CodeRange(%d,%d)" % (self.code1, self.code2)
class _RawNewline(RE):
"""
RawNewline is a low-level RE which matches a newline character.
......@@ -266,6 +274,7 @@ class _RawNewline(RE):
s = self.build_opt(m, initial_state, EOL)
s.add_transition((nl_code, nl_code + 1), final_state)
RawNewline = _RawNewline()
......@@ -304,7 +313,7 @@ class Seq(RE):
i = len(re_list)
match_nl = 0
while i:
i = i - 1
i -= 1
re = re_list[i]
if re.match_nl:
match_nl = 1
......@@ -354,7 +363,7 @@ class Alt(RE):
non_nullable_res.append(re)
if re.match_nl:
match_nl = 1
i = i + 1
i += 1
self.nullable_res = nullable_res
self.non_nullable_res = non_nullable_res
self.nullable = nullable
......@@ -411,7 +420,7 @@ class SwitchCase(RE):
def build_machine(self, m, initial_state, final_state, match_bol, nocase):
self.re.build_machine(m, initial_state, final_state, match_bol,
self.nocase)
self.nocase)
def calc_str(self):
if self.nocase:
......@@ -434,6 +443,7 @@ Empty.__doc__ = \
"""
Empty.str = "Empty"
def Str1(s):
"""
Str1(s) is an RE which matches the literal string |s|.
......@@ -442,6 +452,7 @@ def Str1(s):
result.str = "Str(%s)" % repr(s)
return result
def Str(*strs):
"""
Str(s) is an RE which matches the literal string |s|.
......@@ -454,6 +465,7 @@ def Str(*strs):
result.str = "Str(%s)" % ','.join(map(repr, strs))
return result
def Any(s):
"""
Any(s) is an RE which matches any character in the string |s|.
......@@ -463,6 +475,7 @@ def Any(s):
result.str = "Any(%s)" % repr(s)
return result
def AnyBut(s):
"""
AnyBut(s) is an RE which matches any character (including
......@@ -475,6 +488,7 @@ def AnyBut(s):
result.str = "AnyBut(%s)" % repr(s)
return result
AnyChar = AnyBut("")
AnyChar.__doc__ = \
"""
......@@ -482,7 +496,8 @@ AnyChar.__doc__ = \
"""
AnyChar.str = "AnyChar"
def Range(s1, s2 = None):
def Range(s1, s2=None):
"""
Range(c1, c2) is an RE which matches any single character in the range
|c1| to |c2| inclusive.
......@@ -495,11 +510,12 @@ def Range(s1, s2 = None):
else:
ranges = []
for i in range(0, len(s1), 2):
ranges.append(CodeRange(ord(s1[i]), ord(s1[i+1]) + 1))
ranges.append(CodeRange(ord(s1[i]), ord(s1[i + 1]) + 1))
result = Alt(*ranges)
result.str = "Range(%s)" % repr(s1)
return result
def Opt(re):
"""
Opt(re) is an RE which matches either |re| or the empty string.
......@@ -508,6 +524,7 @@ def Opt(re):
result.str = "Opt(%s)" % re
return result
def Rep(re):
"""
Rep(re) is an RE which matches zero or more repetitions of |re|.
......@@ -516,12 +533,14 @@ def Rep(re):
result.str = "Rep(%s)" % re
return result
def NoCase(re):
"""
NoCase(re) is an RE which matches the same strings as RE, but treating
upper and lower case letters as equivalent.
"""
return SwitchCase(re, nocase = 1)
return SwitchCase(re, nocase=1)
def Case(re):
"""
......@@ -529,7 +548,7 @@ def Case(re):
upper and lower case letters as distinct, i.e. it cancels the effect
of any enclosing NoCase().
"""
return SwitchCase(re, nocase = 0)
return SwitchCase(re, nocase=0)
#
# RE Constants
......
This diff is collapsed.
......@@ -13,147 +13,146 @@ from .Errors import PlexError
class RegexpSyntaxError(PlexError):
pass
pass
def re(s):
"""
Convert traditional string representation of regular expression |s|
into Plex representation.
"""
return REParser(s).parse_re()
"""
Convert traditional string representation of regular expression |s|
into Plex representation.
"""
return REParser(s).parse_re()
class REParser(object):
def __init__(self, s):
self.s = s
self.i = -1
self.end = 0
self.next()
def parse_re(self):
re = self.parse_alt()
if not self.end:
self.error("Unexpected %s" % repr(self.c))
return re
def parse_alt(self):
"""Parse a set of alternative regexps."""
re = self.parse_seq()
if self.c == '|':
re_list = [re]
while self.c == '|':
def __init__(self, s):
self.s = s
self.i = -1
self.end = 0
self.next()
re_list.append(self.parse_seq())
re = Alt(*re_list)
return re
def parse_seq(self):
"""Parse a sequence of regexps."""
re_list = []
while not self.end and not self.c in "|)":
re_list.append(self.parse_mod())
return Seq(*re_list)
def parse_mod(self):
"""Parse a primitive regexp followed by *, +, ? modifiers."""
re = self.parse_prim()
while not self.end and self.c in "*+?":
if self.c == '*':
re = Rep(re)
elif self.c == '+':
re = Rep1(re)
else: # self.c == '?'
re = Opt(re)
self.next()
return re
def parse_prim(self):
"""Parse a primitive regexp."""
c = self.get()
if c == '.':
re = AnyBut("\n")
elif c == '^':
re = Bol
elif c == '$':
re = Eol
elif c == '(':
re = self.parse_alt()
self.expect(')')
elif c == '[':
re = self.parse_charset()
self.expect(']')
else:
if c == '\\':
def parse_re(self):
re = self.parse_alt()
if not self.end:
self.error("Unexpected %s" % repr(self.c))
return re
def parse_alt(self):
"""Parse a set of alternative regexps."""
re = self.parse_seq()
if self.c == '|':
re_list = [re]
while self.c == '|':
self.next()
re_list.append(self.parse_seq())
re = Alt(*re_list)
return re
def parse_seq(self):
"""Parse a sequence of regexps."""
re_list = []
while not self.end and not self.c in "|)":
re_list.append(self.parse_mod())
return Seq(*re_list)
def parse_mod(self):
"""Parse a primitive regexp followed by *, +, ? modifiers."""
re = self.parse_prim()
while not self.end and self.c in "*+?":
if self.c == '*':
re = Rep(re)
elif self.c == '+':
re = Rep1(re)
else: # self.c == '?'
re = Opt(re)
self.next()
return re
def parse_prim(self):
"""Parse a primitive regexp."""
c = self.get()
re = Char(c)
return re
def parse_charset(self):
"""Parse a charset. Does not include the surrounding []."""
char_list = []
invert = 0
if self.c == '^':
invert = 1
self.next()
if self.c == ']':
char_list.append(']')
self.next()
while not self.end and self.c != ']':
c1 = self.get()
if self.c == '-' and self.lookahead(1) != ']':
if c == '.':
re = AnyBut("\n")
elif c == '^':
re = Bol
elif c == '$':
re = Eol
elif c == '(':
re = self.parse_alt()
self.expect(')')
elif c == '[':
re = self.parse_charset()
self.expect(']')
else:
if c == '\\':
c = self.get()
re = Char(c)
return re
def parse_charset(self):
"""Parse a charset. Does not include the surrounding []."""
char_list = []
invert = 0
if self.c == '^':
invert = 1
self.next()
if self.c == ']':
char_list.append(']')
self.next()
while not self.end and self.c != ']':
c1 = self.get()
if self.c == '-' and self.lookahead(1) != ']':
self.next()
c2 = self.get()
for a in xrange(ord(c1), ord(c2) + 1):
char_list.append(chr(a))
else:
char_list.append(c1)
chars = ''.join(char_list)
if invert:
return AnyBut(chars)
else:
return Any(chars)
def next(self):
"""Advance to the next char."""
s = self.s
i = self.i = self.i + 1
if i < len(s):
self.c = s[i]
else:
self.c = ''
self.end = 1
def get(self):
if self.end:
self.error("Premature end of string")
c = self.c
self.next()
c2 = self.get()
for a in xrange(ord(c1), ord(c2) + 1):
char_list.append(chr(a))
else:
char_list.append(c1)
chars = ''.join(char_list)
if invert:
return AnyBut(chars)
else:
return Any(chars)
def next(self):
"""Advance to the next char."""
s = self.s
i = self.i = self.i + 1
if i < len(s):
self.c = s[i]
else:
self.c = ''
self.end = 1
def get(self):
if self.end:
self.error("Premature end of string")
c = self.c
self.next()
return c
def lookahead(self, n):
"""Look ahead n chars."""
j = self.i + n
if j < len(self.s):
return self.s[j]
else:
return ''
def expect(self, c):
"""
Expect to find character |c| at current position.
Raises an exception otherwise.
"""
if self.c == c:
self.next()
else:
self.error("Missing %s" % repr(c))
def error(self, mess):
"""Raise exception to signal syntax error in regexp."""
raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
repr(self.s), self.i, mess))
return c
def lookahead(self, n):
"""Look ahead n chars."""
j = self.i + n
if j < len(self.s):
return self.s[j]
else:
return ''
def expect(self, c):
"""
Expect to find character |c| at current position.
Raises an exception otherwise.
"""
if self.c == c:
self.next()
else:
self.error("Missing %s" % repr(c))
def error(self, mess):
"""Raise exception to signal syntax error in regexp."""
raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
repr(self.s), self.i, mess))
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment