Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
436c3d58
Commit
436c3d58
authored
Jun 29, 2000
by
Fredrik Lundh
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
towards 1.6b1
parent
102f3ad6
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
570 additions
and
338 deletions
+570
-338
Lib/sre.py
Lib/sre.py
+17
-6
Lib/sre_compile.py
Lib/sre_compile.py
+39
-27
Lib/sre_constants.py
Lib/sre_constants.py
+50
-22
Lib/sre_parse.py
Lib/sre_parse.py
+78
-35
Modules/_sre.c
Modules/_sre.c
+386
-248
No files found.
Lib/sre.py
View file @
436c3d58
...
...
@@ -12,6 +12,7 @@
#
import
sre_compile
import
sre_parse
# flags
I
=
IGNORECASE
=
sre_compile
.
SRE_FLAG_IGNORECASE
...
...
@@ -20,6 +21,13 @@ M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
S
=
DOTALL
=
sre_compile
.
SRE_FLAG_DOTALL
X
=
VERBOSE
=
sre_compile
.
SRE_FLAG_VERBOSE
# sre extensions (may or may not be in 1.6 final)
T
=
TEMPLATE
=
sre_compile
.
SRE_FLAG_TEMPLATE
U
=
UNICODE
=
sre_compile
.
SRE_FLAG_UNICODE
# sre exception
error
=
sre_parse
.
error
# --------------------------------------------------------------------
# public interface
...
...
@@ -46,6 +54,9 @@ def findall(pattern, string, maxsplit=0):
def
compile
(
pattern
,
flags
=
0
):
return
_compile
(
pattern
,
flags
)
def
template
(
pattern
,
flags
=
0
):
return
_compile
(
pattern
,
flags
|
T
)
def
escape
(
pattern
):
s
=
list
(
pattern
)
for
i
in
range
(
len
(
pattern
)):
...
...
@@ -83,18 +94,14 @@ def _sub(pattern, template, string, count=0):
# internal: pattern.sub implementation hook
return
_subn
(
pattern
,
template
,
string
,
count
)[
0
]
def
_expand
(
match
,
template
):
# internal: expand template
return
template
# FIXME
def
_subn
(
pattern
,
template
,
string
,
count
=
0
):
# internal: pattern.subn implementation hook
if
callable
(
template
):
filter
=
template
else
:
# FIXME: prepare template
template
=
sre_parse
.
parse_template
(
template
,
pattern
)
def
filter
(
match
,
template
=
template
):
return
_expand
(
match
,
template
)
return
sre_parse
.
expand_template
(
template
,
match
)
n
=
i
=
0
s
=
[]
append
=
s
.
append
...
...
@@ -108,6 +115,8 @@ def _subn(pattern, template, string, count=0):
append
(
string
[
i
:
j
])
append
(
filter
(
m
))
i
=
m
.
end
()
if
i
<=
j
:
break
n
=
n
+
1
if
i
<
len
(
string
):
append
(
string
[
i
:])
...
...
@@ -126,6 +135,8 @@ def _split(pattern, string, maxsplit=0):
j
=
m
.
start
()
append
(
string
[
i
:
j
])
i
=
m
.
end
()
if
i
<=
j
:
break
n
=
n
+
1
if
i
<
len
(
string
):
append
(
string
[
i
:])
...
...
Lib/sre_compile.py
View file @
436c3d58
...
...
@@ -48,7 +48,7 @@ class Code:
print
self
.
data
raise
def
_compile
(
code
,
pattern
,
flags
,
level
=
0
):
def
_compile
(
code
,
pattern
,
flags
):
append
=
code
.
append
for
op
,
av
in
pattern
:
if
op
is
ANY
:
...
...
@@ -70,23 +70,26 @@ def _compile(code, pattern, flags, level=0):
tail
=
[]
for
av
in
av
[
1
]:
skip
=
len
(
code
);
append
(
0
)
_compile
(
code
,
av
,
flags
,
level
)
append
(
OPCODES
[
JUMP
])
tail
.
append
(
len
(
code
));
append
(
0
)
_compile
(
code
,
av
,
flags
)
## append(OPCODES[SUCCESS])
append
(
OPCODES
[
JUMP
])
tail
.
append
(
len
(
code
));
append
(
0
)
code
[
skip
]
=
len
(
code
)
-
skip
append
(
0
)
# end of branch
for
tail
in
tail
:
for
tail
in
tail
:
code
[
tail
]
=
len
(
code
)
-
tail
elif
op
is
CALL
:
append
(
OPCODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
_compile
(
code
,
av
,
flags
,
level
+
1
)
_compile
(
code
,
av
,
flags
)
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
CATEGORY
:
# not used by current parser
elif
op
is
CATEGORY
:
append
(
OPCODES
[
op
])
if
flags
&
SRE_FLAG_LOCALE
:
append
(
CH_LOCALE
[
CHCODES
[
av
]])
elif
flags
&
SRE_FLAG_UNICODE
:
append
(
CH_UNICODE
[
CHCODES
[
av
]])
else
:
append
(
CHCODES
[
av
])
elif
op
is
GROUP
:
...
...
@@ -98,8 +101,8 @@ def _compile(code, pattern, flags, level=0):
elif
op
is
IN
:
if
flags
&
SRE_FLAG_IGNORECASE
:
append
(
OPCODES
[
OP_IGNORE
[
op
]])
def
fixup
(
literal
):
return
ord
(
literal
.
lower
()
)
def
fixup
(
literal
,
flags
=
flags
):
return
_sre
.
getlower
(
ord
(
literal
),
flags
)
else
:
append
(
OPCODES
[
op
])
fixup
=
ord
...
...
@@ -116,6 +119,8 @@ def _compile(code, pattern, flags, level=0):
elif
op
is
CATEGORY
:
if
flags
&
SRE_FLAG_LOCALE
:
append
(
CH_LOCALE
[
CHCODES
[
av
]])
elif
flags
&
SRE_FLAG_UNICODE
:
append
(
CH_UNICODE
[
CHCODES
[
av
]])
else
:
append
(
CHCODES
[
av
])
else
:
...
...
@@ -125,42 +130,49 @@ def _compile(code, pattern, flags, level=0):
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
if
flags
&
SRE_FLAG_IGNORECASE
:
append
(
OPCODES
[
OP_IGNORE
[
op
]])
append
(
ord
(
av
.
lower
()))
else
:
append
(
OPCODES
[
op
])
append
(
ord
(
av
))
append
(
ord
(
av
))
elif
op
is
MARK
:
append
(
OPCODES
[
op
])
append
(
av
)
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
lo
,
hi
=
av
[
2
].
getwidth
()
if
lo
==
0
:
raise
SyntaxError
,
"cannot repeat zero-width items"
if
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
append
(
OPCODES
[
MAX_REPEAT_ONE
])
if
flags
&
SRE_FLAG_TEMPLATE
:
append
(
OPCODES
[
REPEAT
])
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
,
level
+
1
)
_compile
(
code
,
av
[
2
],
flags
)
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
else
:
append
(
OPCODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
,
level
+
1
)
if
op
is
MIN_REPEAT
:
append
(
OPCODES
[
MIN_UNTIL
])
lo
,
hi
=
av
[
2
].
getwidth
()
if
lo
==
0
:
raise
error
,
"nothing to repeat"
if
0
and
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
# FIXME: <fl> need a better way to figure out when
# it's safe to use this one (in the parser, probably)
append
(
OPCODES
[
MAX_REPEAT_ONE
])
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
else
:
append
(
OPCODES
[
MAX_UNTIL
])
code
[
skip
]
=
len
(
code
)
-
skip
append
(
OPCODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
append
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
SUBPATTERN
:
group
=
av
[
0
]
if
group
:
append
(
OPCODES
[
MARK
])
append
((
group
-
1
)
*
2
)
_compile
(
code
,
av
[
1
],
flags
,
level
+
1
)
_compile
(
code
,
av
[
1
],
flags
)
if
group
:
append
(
OPCODES
[
MARK
])
append
((
group
-
1
)
*
2
+
1
)
...
...
Lib/sre_constants.py
View file @
436c3d58
...
...
@@ -15,6 +15,11 @@
# other compatibility work.
#
# should this really be here?
class
error
(
Exception
):
pass
# operators
FAILURE
=
"failure"
...
...
@@ -30,20 +35,20 @@ GROUP = "group"
GROUP_IGNORE
=
"group_ignore"
IN
=
"in"
IN_IGNORE
=
"in_ignore"
INFO
=
"info"
JUMP
=
"jump"
LITERAL
=
"literal"
LITERAL_IGNORE
=
"literal_ignore"
MARK
=
"mark"
MAX_REPEAT
=
"max_repeat"
MAX_REPEAT_ONE
=
"max_repeat_one"
MAX_UNTIL
=
"max_until"
MIN_REPEAT
=
"min_repeat"
MIN_UNTIL
=
"min_until"
NEGATE
=
"negate"
NOT_LITERAL
=
"not_literal"
NOT_LITERAL_IGNORE
=
"not_literal_ignore"
RANGE
=
"range"
REPEAT
=
"repeat"
REPEAT_ONE
=
"repeat_one"
SUBPATTERN
=
"subpattern"
# positions
...
...
@@ -63,14 +68,16 @@ CATEGORY_WORD = "category_word"
CATEGORY_NOT_WORD
=
"category_not_word"
CATEGORY_LINEBREAK
=
"category_linebreak"
CATEGORY_NOT_LINEBREAK
=
"category_not_linebreak"
CATEGORY_LOC_DIGIT
=
"category_loc_digit"
CATEGORY_LOC_NOT_DIGIT
=
"category_loc_not_digit"
CATEGORY_LOC_SPACE
=
"category_loc_space"
CATEGORY_LOC_NOT_SPACE
=
"category_loc_not_space"
CATEGORY_LOC_WORD
=
"category_loc_word"
CATEGORY_LOC_NOT_WORD
=
"category_loc_not_word"
CATEGORY_LOC_LINEBREAK
=
"category_loc_linebreak"
CATEGORY_LOC_NOT_LINEBREAK
=
"category_loc_not_linebreak"
CATEGORY_UNI_DIGIT
=
"category_uni_digit"
CATEGORY_UNI_NOT_DIGIT
=
"category_uni_not_digit"
CATEGORY_UNI_SPACE
=
"category_uni_space"
CATEGORY_UNI_NOT_SPACE
=
"category_uni_not_space"
CATEGORY_UNI_WORD
=
"category_uni_word"
CATEGORY_UNI_NOT_WORD
=
"category_uni_not_word"
CATEGORY_UNI_LINEBREAK
=
"category_uni_linebreak"
CATEGORY_UNI_NOT_LINEBREAK
=
"category_uni_not_linebreak"
OPCODES
=
[
...
...
@@ -85,12 +92,13 @@ OPCODES = [
CATEGORY
,
GROUP
,
GROUP_IGNORE
,
IN
,
IN_IGNORE
,
INFO
,
JUMP
,
LITERAL
,
LITERAL_IGNORE
,
MARK
,
MAX_REPEAT
,
MAX_UNTIL
,
MAX_REPEAT
,
MAX_REPEAT_ONE
,
MIN_REPEAT
,
MIN_UNTIL
,
MIN_REPEAT
,
NOT_LITERAL
,
NOT_LITERAL_IGNORE
,
NEGATE
,
RANGE
,
...
...
@@ -106,10 +114,11 @@ ATCODES = [
CHCODES
=
[
CATEGORY_DIGIT
,
CATEGORY_NOT_DIGIT
,
CATEGORY_SPACE
,
CATEGORY_NOT_SPACE
,
CATEGORY_WORD
,
CATEGORY_NOT_WORD
,
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
,
CATEGORY_LOC_DIGIT
,
CATEGORY_LOC_NOT_DIGIT
,
CATEGORY_LOC_SPACE
,
CATEGORY_LOC_NOT_SPACE
,
CATEGORY_LOC_WORD
,
CATEGORY_LOC_NOT_WORD
,
CATEGORY_LOC_LINEBREAK
,
CATEGORY_LOC_NOT_LINEBREAK
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
,
CATEGORY_LOC_WORD
,
CATEGORY_LOC_NOT_WORD
,
CATEGORY_UNI_DIGIT
,
CATEGORY_UNI_NOT_DIGIT
,
CATEGORY_UNI_SPACE
,
CATEGORY_UNI_NOT_SPACE
,
CATEGORY_UNI_WORD
,
CATEGORY_UNI_NOT_WORD
,
CATEGORY_UNI_LINEBREAK
,
CATEGORY_UNI_NOT_LINEBREAK
]
def
makedict
(
list
):
...
...
@@ -138,23 +147,35 @@ AT_MULTILINE = {
}
CH_LOCALE
=
{
CATEGORY_DIGIT
:
CATEGORY_
LOC_
DIGIT
,
CATEGORY_NOT_DIGIT
:
CATEGORY_
LOC_
NOT_DIGIT
,
CATEGORY_SPACE
:
CATEGORY_
LOC_
SPACE
,
CATEGORY_NOT_SPACE
:
CATEGORY_
LOC_
NOT_SPACE
,
CATEGORY_DIGIT
:
CATEGORY_DIGIT
,
CATEGORY_NOT_DIGIT
:
CATEGORY_NOT_DIGIT
,
CATEGORY_SPACE
:
CATEGORY_SPACE
,
CATEGORY_NOT_SPACE
:
CATEGORY_NOT_SPACE
,
CATEGORY_WORD
:
CATEGORY_LOC_WORD
,
CATEGORY_NOT_WORD
:
CATEGORY_LOC_NOT_WORD
,
CATEGORY_LINEBREAK
:
CATEGORY_LOC_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
:
CATEGORY_LOC_NOT_LINEBREAK
CATEGORY_LINEBREAK
:
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
:
CATEGORY_NOT_LINEBREAK
}
CH_UNICODE
=
{
CATEGORY_DIGIT
:
CATEGORY_UNI_DIGIT
,
CATEGORY_NOT_DIGIT
:
CATEGORY_UNI_NOT_DIGIT
,
CATEGORY_SPACE
:
CATEGORY_UNI_SPACE
,
CATEGORY_NOT_SPACE
:
CATEGORY_UNI_NOT_SPACE
,
CATEGORY_WORD
:
CATEGORY_UNI_WORD
,
CATEGORY_NOT_WORD
:
CATEGORY_UNI_NOT_WORD
,
CATEGORY_LINEBREAK
:
CATEGORY_UNI_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
:
CATEGORY_UNI_NOT_LINEBREAK
}
# flags
SRE_FLAG_TEMPLATE
=
1
# NYI
SRE_FLAG_TEMPLATE
=
1
SRE_FLAG_IGNORECASE
=
2
SRE_FLAG_LOCALE
=
4
SRE_FLAG_MULTILINE
=
8
SRE_FLAG_DOTALL
=
16
SRE_FLAG_VERBOSE
=
32
SRE_FLAG_UNICODE
=
32
SRE_FLAG_VERBOSE
=
64
if
__name__
==
"__main__"
:
import
string
...
...
@@ -168,5 +189,12 @@ if __name__ == "__main__":
dump
(
f
,
OPCODES
,
"SRE_OP"
)
dump
(
f
,
ATCODES
,
"SRE"
)
dump
(
f
,
CHCODES
,
"SRE"
)
f
.
write
(
"#define SRE_FLAG_TEMPLATE %d
\
n
"
%
SRE_FLAG_TEMPLATE
)
f
.
write
(
"#define SRE_FLAG_IGNORECASE %d
\
n
"
%
SRE_FLAG_IGNORECASE
)
f
.
write
(
"#define SRE_FLAG_LOCALE %d
\
n
"
%
SRE_FLAG_LOCALE
)
f
.
write
(
"#define SRE_FLAG_MULTILINE %d
\
n
"
%
SRE_FLAG_MULTILINE
)
f
.
write
(
"#define SRE_FLAG_DOTALL %d
\
n
"
%
SRE_FLAG_DOTALL
)
f
.
write
(
"#define SRE_FLAG_UNICODE %d
\
n
"
%
SRE_FLAG_UNICODE
)
f
.
write
(
"#define SRE_FLAG_VERBOSE %d
\
n
"
%
SRE_FLAG_VERBOSE
)
f
.
close
()
print
"done"
Lib/sre_parse.py
View file @
436c3d58
...
...
@@ -20,14 +20,15 @@ import _sre
from
sre_constants
import
*
# FIXME:
should be 65535, but the array module currently chokes on
#
unsigned integers larger than 32767...
# FIXME:
<fl> should be 65535, but the array module currently chokes
#
on unsigned integers larger than 32767 [fixed in 1.6b1?]
MAXREPEAT
=
int
(
2L
**
(
_sre
.
getcodesize
()
*
8
-
1
))
-
1
SPECIAL_CHARS
=
".
\
\
[{()*+?^$|"
REPEAT_CHARS
=
"*+?{"
# FIXME: string in tuple tests may explode with if char is unicode :-(
# FIXME: <fl> string in tuple tests may explode with if char is
# unicode [fixed in 1.6b1?]
DIGITS
=
tuple
(
string
.
digits
)
OCTDIGITS
=
tuple
(
"01234567"
)
...
...
@@ -59,12 +60,15 @@ CATEGORIES = {
}
FLAGS
=
{
# standard flags
"i"
:
SRE_FLAG_IGNORECASE
,
"L"
:
SRE_FLAG_LOCALE
,
"m"
:
SRE_FLAG_MULTILINE
,
"s"
:
SRE_FLAG_DOTALL
,
"t"
:
SRE_FLAG_TEMPLATE
,
"x"
:
SRE_FLAG_VERBOSE
,
# extensions
"t"
:
SRE_FLAG_TEMPLATE
,
"u"
:
SRE_FLAG_UNICODE
,
}
class
State
:
...
...
@@ -151,7 +155,7 @@ class Tokenizer:
try
:
c
=
self
.
string
[
self
.
index
+
1
]
except
IndexError
:
raise
SyntaxE
rror
,
"bogus escape"
raise
e
rror
,
"bogus escape"
char
=
char
+
c
self
.
index
=
self
.
index
+
len
(
char
)
return
char
...
...
@@ -205,7 +209,7 @@ def _class_escape(source, escape):
return
LITERAL
,
escape
[
1
]
except
ValueError
:
pass
raise
SyntaxE
rror
,
"bogus escape: %s"
%
repr
(
escape
)
raise
e
rror
,
"bogus escape: %s"
%
repr
(
escape
)
def
_escape
(
source
,
escape
,
state
):
# handle escape code in expression
...
...
@@ -241,13 +245,12 @@ def _escape(source, escape, state):
return
LITERAL
,
escape
[
1
]
except
ValueError
:
pass
raise
SyntaxE
rror
,
"bogus escape: %s"
%
repr
(
escape
)
raise
e
rror
,
"bogus escape: %s"
%
repr
(
escape
)
def
_branch
(
pattern
,
items
):
# form a branch operator from a set of items (FIXME: move this
# optimization to the compiler module!)
# form a branch operator from a set of items
subpattern
=
SubPattern
(
pattern
)
...
...
@@ -332,7 +335,7 @@ def _parse(source, state, flags=0):
elif
this
:
code1
=
LITERAL
,
this
else
:
raise
SyntaxE
rror
,
"unexpected end of regular expression"
raise
e
rror
,
"unexpected end of regular expression"
if
source
.
match
(
"-"
):
# potential range
this
=
source
.
get
()
...
...
@@ -346,9 +349,9 @@ def _parse(source, state, flags=0):
else
:
code2
=
LITERAL
,
this
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
SyntaxE
rror
,
"illegal range"
raise
e
rror
,
"illegal range"
if
len
(
code1
[
1
])
!=
1
or
len
(
code2
[
1
])
!=
1
:
raise
SyntaxE
rror
,
"illegal range"
raise
e
rror
,
"illegal range"
set
.
append
((
RANGE
,
(
code1
[
1
],
code2
[
1
])))
else
:
if
code1
[
0
]
is
IN
:
...
...
@@ -383,19 +386,19 @@ def _parse(source, state, flags=0):
else
:
hi
=
lo
if
not
source
.
match
(
"}"
):
raise
SyntaxE
rror
,
"bogus range"
raise
e
rror
,
"bogus range"
if
lo
:
min
=
int
(
lo
)
if
hi
:
max
=
int
(
hi
)
# FIXME: <fl> check that hi >= lo!
else
:
raise
SyntaxE
rror
,
"not supported"
raise
e
rror
,
"not supported"
# figure out which item to repeat
if
subpattern
:
item
=
subpattern
[
-
1
:]
else
:
raise
SyntaxE
rror
,
"nothing to repeat"
raise
e
rror
,
"nothing to repeat"
if
source
.
match
(
"?"
):
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
else
:
...
...
@@ -418,7 +421,7 @@ def _parse(source, state, flags=0):
while
1
:
char
=
source
.
get
()
if
char
is
None
:
raise
SyntaxE
rror
,
"unterminated name"
raise
e
rror
,
"unterminated name"
if
char
==
">"
:
break
# FIXME: check for valid character
...
...
@@ -426,22 +429,21 @@ def _parse(source, state, flags=0):
group
=
1
elif
source
.
match
(
"="
):
# named backreference
raise
SyntaxError
,
"not yet implemented"
raise
error
,
"not yet implemented"
else
:
char
=
source
.
get
()
if
char
is
None
:
raise
SyntaxE
rror
,
"unexpected end of pattern"
raise
SyntaxE
rror
,
"unknown specifier: ?P%s"
%
char
raise
e
rror
,
"unexpected end of pattern"
raise
e
rror
,
"unknown specifier: ?P%s"
%
char
elif
source
.
match
(
":"
):
# non-capturing group
group
=
2
elif
source
.
match
(
"#"
):
# comment
while
1
:
char
=
source
.
get
()
if
char
is
None
or
char
==
")"
:
if
source
.
next
is
None
or
source
.
next
==
")"
:
break
source
.
get
()
else
:
# flags
while
FLAGS
.
has_key
(
source
.
next
):
...
...
@@ -465,13 +467,13 @@ def _parse(source, state, flags=0):
elif
source
.
match
(
"|"
):
b
.
append
(
p
)
else
:
raise
SyntaxE
rror
,
"group not properly closed"
raise
e
rror
,
"group not properly closed"
else
:
while
1
:
char
=
source
.
get
()
if
char
is
None
or
char
==
")"
:
break
# FIXME: skip characters?
raise
error
,
"unknown extension"
elif
this
==
"^"
:
subpattern
.
append
((
AT
,
AT_BEGINNING
))
...
...
@@ -484,7 +486,7 @@ def _parse(source, state, flags=0):
subpattern
.
append
(
code
)
else
:
raise
SyntaxE
rror
,
"parser error"
raise
e
rror
,
"parser error"
return
subpattern
...
...
@@ -499,17 +501,17 @@ def parse(pattern, flags=0):
if
tail
==
"|"
:
b
.
append
(
p
)
elif
tail
==
")"
:
raise
SyntaxE
rror
,
"unbalanced parenthesis"
raise
e
rror
,
"unbalanced parenthesis"
elif
tail
is
None
:
if
b
:
b
.
append
(
p
)
p
=
_branch
(
state
,
b
)
break
else
:
raise
SyntaxE
rror
,
"bogus characters at end of regular expression"
raise
e
rror
,
"bogus characters at end of regular expression"
return
p
def
parse_
replacement
(
source
,
pattern
):
def
parse_
template
(
source
,
pattern
):
# parse 're' replacement string into list of literals and
# group references
s
=
Tokenizer
(
source
)
...
...
@@ -520,15 +522,56 @@ def parse_replacement(source, pattern):
if
this
is
None
:
break
# end of replacement string
if
this
and
this
[
0
]
==
"
\
\
"
:
try
:
a
(
LITERAL
,
ESCAPES
[
this
])
except
KeyError
:
for
char
in
this
:
a
(
LITERAL
,
char
)
if
this
==
"
\
\
g"
:
name
=
""
if
s
.
match
(
"<"
):
while
1
:
char
=
s
.
get
()
if
char
is
None
:
raise
error
,
"unterminated index"
if
char
==
">"
:
break
# FIXME: check for valid character
name
=
name
+
char
if
not
name
:
raise
error
,
"bad index"
try
:
index
=
int
(
name
)
except
ValueError
:
try
:
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
raise
IndexError
,
"unknown index"
a
((
MARK
,
index
))
elif
len
(
this
)
>
1
and
this
[
1
]
in
DIGITS
:
while
s
.
next
in
DIGITS
:
this
=
this
+
s
.
get
()
a
((
MARK
,
int
(
this
[
1
:])))
else
:
try
:
a
(
ESCAPES
[
this
])
except
KeyError
:
for
char
in
this
:
a
((
LITERAL
,
char
))
else
:
a
(
LITERAL
,
this
)
a
(
(
LITERAL
,
this
)
)
return
p
def
expand_template
(
template
,
match
):
# FIXME: <fl> this is sooooo slow. drop in the slicelist
# code instead
p
=
[]
a
=
p
.
append
for
c
,
s
in
template
:
if
c
is
LITERAL
:
a
(
s
)
elif
c
is
MARK
:
s
=
match
.
group
(
s
)
if
s
is
None
:
raise
error
,
"empty group"
a
(
s
)
return
match
.
string
[:
0
].
join
(
p
)
if
__name__
==
"__main__"
:
from
pprint
import
pprint
from
testpatterns
import
PATTERNS
...
...
@@ -548,7 +591,7 @@ if __name__ == "__main__":
except
:
pass
a
=
a
+
1
except
SyntaxE
rror
,
v
:
except
e
rror
,
v
:
print
"**"
,
repr
(
pattern
),
v
b
=
b
+
1
print
"-"
*
68
...
...
Modules/_sre.c
View file @
436c3d58
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment