Commit 5bc5b14f authored by Guido van Rossum's avatar Guido van Rossum

Checking in AMK's latest installement.

parent c3861078
......@@ -7,9 +7,6 @@ import sys
import string
from pcre import *
[ NORMAL, CHARCLASS, REPLACEMENT ] = range(3)
[ CHAR, MEMORY_REFERENCE, SYNTAX, NOT_SYNTAX, SET, WORD_BOUNDARY, NOT_WORD_BOUNDARY, BEGINNING_OF_BUFFER, END_OF_BUFFER ] = range(9)
#
# First, the public part of the interface:
#
......@@ -231,199 +228,9 @@ def escape(pattern):
result.append(char)
return string.join(result, '')
_idprog = None
def valid_identifier(id):
global _idprog
if not _idprog:
_idprog = compile(r"[a-zA-Z_]\w*$")
if _idprog.match(id):
return 1
else:
return 0
def compile(pattern, flags=0):
groupindex={}
code=pcre_compile(pattern, flags, groupindex)
return RegexObject(pattern, flags, code, groupindex)
def _expand(m, repl):
results = []
index = 0
size = len(repl)
while index < size:
found = string.find(repl, '\\', index)
if found < 0:
results.append(repl[index:])
break
if found > index:
results.append(repl[index:found])
escape_type, value, index = _expand_escape(repl, found+1, REPLACEMENT)
if escape_type == CHAR:
results.append(value)
elif escape_type == MEMORY_REFERENCE:
r = m.group(value)
if r is None:
raise error, ('group "' + str(value) + '" did not contribute '
'to the match')
results.append(m.group(value))
else:
raise error, "bad escape in replacement"
return string.join(results, '')
def _expand_escape(pattern, index, context=NORMAL):
if index >= len(pattern):
raise error, 'escape ends too soon'
elif pattern[index] == 't':
return CHAR, chr(9), index + 1
elif pattern[index] == 'n':
return CHAR, chr(10), index + 1
elif pattern[index] == 'v':
return CHAR, chr(11), index + 1
elif pattern[index] == 'r':
return CHAR, chr(13), index + 1
elif pattern[index] == 'f':
return CHAR, chr(12), index + 1
elif pattern[index] == 'a':
return CHAR, chr(7), index + 1
elif pattern[index] == 'x':
# CAUTION: this is the Python rule, not the Perl rule!
end = index + 1 # Skip over the 'x' character
while (end < len(pattern)) and (pattern[end] in string.hexdigits):
end = end + 1
if end == index:
raise error, "\\x must be followed by hex digit(s)"
# let Python evaluate it, so we don't incorrectly 2nd-guess
# what it's doing (and Python in turn passes it on to sscanf,
# so that *it* doesn't incorrectly 2nd-guess what C does!)
char = eval ('"' + pattern[index-1:end] + '"')
# assert len(char) == 1
return CHAR, char, end
elif pattern[index] == 'b':
if context != NORMAL:
return CHAR, chr(8), index + 1
else:
return WORD_BOUNDARY, '', index + 1
elif pattern[index] == 'B':
if context != NORMAL:
return CHAR, 'B', index + 1
else:
return NOT_WORD_BOUNDARY, '', index + 1
elif pattern[index] == 'A':
if context != NORMAL:
return CHAR, 'A', index + 1
else:
return BEGINNING_OF_BUFFER, '', index + 1
elif pattern[index] == 'Z':
if context != NORMAL:
return CHAR, 'Z', index + 1
else:
return END_OF_BUFFER, '', index + 1
elif pattern[index] in 'GluLUQE':
raise error, ('\\' + pattern[index] + ' is not allowed')
elif pattern[index] == 'w':
return CHAR, 'w', index + 1
elif pattern[index] == 'W':
return CHAR, 'W', index + 1
elif pattern[index] == 's':
return CHAR, 's', index + 1
elif pattern[index] == 'S':
return CHAR, 'S', index + 1
elif pattern[index] == 'd':
return CHAR, 'd', index + 1
elif pattern[index] == 'D':
return CHAR, 'D', index + 1
elif pattern[index] in '0123456789':
if pattern[index] == '0':
if (index + 1 < len(pattern)) and \
(pattern[index + 1] in string.octdigits):
if (index + 2 < len(pattern)) and \
(pattern[index + 2] in string.octdigits):
value = string.atoi(pattern[index:index + 3], 8)
index = index + 3
else:
value = string.atoi(pattern[index:index + 2], 8)
index = index + 2
else:
value = 0
index = index + 1
if value > 255:
raise error, 'octal value out of range'
return CHAR, chr(value), index
else:
if (index + 1 < len(pattern)) and \
(pattern[index + 1] in string.digits):
if (index + 2 < len(pattern)) and \
(pattern[index + 2] in string.octdigits) and \
(pattern[index + 1] in string.octdigits) and \
(pattern[index] in string.octdigits):
value = string.atoi(pattern[index:index + 3], 8)
if value > 255:
raise error, 'octal value out of range'
return CHAR, chr(value), index + 3
else:
value = string.atoi(pattern[index:index + 2])
if (value < 1) or (value > 99):
raise error, 'memory reference out of range'
if context == CHARCLASS:
raise error, ('cannot reference a register from '
'inside a character class')
return MEMORY_REFERENCE, value, index + 2
else:
if context == CHARCLASS:
raise error, ('cannot reference a register from '
'inside a character class')
value = string.atoi(pattern[index])
return MEMORY_REFERENCE, value, index + 1
elif pattern[index] == 'g':
if context != REPLACEMENT:
return CHAR, 'g', index + 1
index = index + 1
if index >= len(pattern):
raise error, 'unfinished symbolic reference'
if pattern[index] != '<':
raise error, 'missing < in symbolic reference'
index = index + 1
end = string.find(pattern, '>', index)
if end == -1:
raise error, 'unfinished symbolic reference'
value = pattern[index:end]
if not valid_identifier(value):
raise error, 'illegal symbolic reference'
return MEMORY_REFERENCE, value, end + 1
else:
return CHAR, pattern[index], index + 1
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment