Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
5bec97e0
Commit
5bec97e0
authored
Jun 09, 2000
by
Andrew M. Kuchling
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Patch from /F: this patch brings the CVS version of SRE in sync with the
latest public snapshot.""
parent
2cf74f15
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
178 additions
and
118 deletions
+178
-118
Lib/sre_parse.py
Lib/sre_parse.py
+178
-118
No files found.
Lib/sre_parse.py
View file @
5bec97e0
...
@@ -2,9 +2,7 @@
...
@@ -2,9 +2,7 @@
# Secret Labs' Regular Expression Engine
# Secret Labs' Regular Expression Engine
# $Id$
# $Id$
#
#
# convert re-style regular expression to SRE template. the current
# convert re-style regular expression to sre pattern
# implementation is somewhat incomplete, and not very fast. should
# definitely be rewritten before Python 1.6 goes beta.
#
#
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
#
...
@@ -16,13 +14,16 @@
...
@@ -16,13 +14,16 @@
# other compatibility work.
# other compatibility work.
#
#
# FIXME: comments marked with the FIXME tag are open issues. all such
# issues should be closed before the final beta.
import
string
,
sys
import
string
,
sys
import
_sre
from
sre_constants
import
*
from
sre_constants
import
*
# FIXME: should be 65535, but the array module currently chokes on
# unsigned integers larger than 32767...
MAXREPEAT
=
int
(
2L
**
(
_sre
.
getcodesize
()
*
8
-
1
))
-
1
SPECIAL_CHARS
=
".
\
\
[{()*+?^$|"
SPECIAL_CHARS
=
".
\
\
[{()*+?^$|"
REPEAT_CHARS
=
"*+?{"
REPEAT_CHARS
=
"*+?{"
...
@@ -32,6 +33,8 @@ DIGITS = tuple(string.digits)
...
@@ -32,6 +33,8 @@ DIGITS = tuple(string.digits)
OCTDIGITS
=
tuple
(
"01234567"
)
OCTDIGITS
=
tuple
(
"01234567"
)
HEXDIGITS
=
tuple
(
"0123456789abcdefABCDEF"
)
HEXDIGITS
=
tuple
(
"0123456789abcdefABCDEF"
)
WHITESPACE
=
tuple
(
string
.
whitespace
)
ESCAPES
=
{
ESCAPES
=
{
"
\
\
a"
:
(
LITERAL
,
chr
(
7
)),
"
\
\
a"
:
(
LITERAL
,
chr
(
7
)),
"
\
\
b"
:
(
LITERAL
,
chr
(
8
)),
"
\
\
b"
:
(
LITERAL
,
chr
(
8
)),
...
@@ -55,10 +58,18 @@ CATEGORIES = {
...
@@ -55,10 +58,18 @@ CATEGORIES = {
"
\
\
Z"
:
(
AT
,
AT_END
),
# end of string
"
\
\
Z"
:
(
AT
,
AT_END
),
# end of string
}
}
class
Pattern
:
FLAGS
=
{
# FIXME: <fl> rename class, and store flags in here too!
"i"
:
SRE_FLAG_IGNORECASE
,
"L"
:
SRE_FLAG_LOCALE
,
"m"
:
SRE_FLAG_MULTILINE
,
"s"
:
SRE_FLAG_DOTALL
,
"t"
:
SRE_FLAG_TEMPLATE
,
"x"
:
SRE_FLAG_VERBOSE
,
}
class
State
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
flags
=
[]
self
.
flags
=
0
self
.
groups
=
1
self
.
groups
=
1
self
.
groupdict
=
{}
self
.
groupdict
=
{}
def
getgroup
(
self
,
name
=
None
):
def
getgroup
(
self
,
name
=
None
):
...
@@ -67,9 +78,6 @@ class Pattern:
...
@@ -67,9 +78,6 @@ class Pattern:
if
name
:
if
name
:
self
.
groupdict
[
name
]
=
gid
self
.
groupdict
[
name
]
=
gid
return
gid
return
gid
def
setflag
(
self
,
flag
):
if
flag
in
self
.
flags
:
self
.
flags
.
append
(
flag
)
class
SubPattern
:
class
SubPattern
:
# a subpattern, in intermediate form
# a subpattern, in intermediate form
...
@@ -78,7 +86,6 @@ class SubPattern:
...
@@ -78,7 +86,6 @@ class SubPattern:
if
not
data
:
if
not
data
:
data
=
[]
data
=
[]
self
.
data
=
data
self
.
data
=
data
self
.
flags
=
[]
self
.
width
=
None
self
.
width
=
None
def
__repr__
(
self
):
def
__repr__
(
self
):
return
repr
(
self
.
data
)
return
repr
(
self
.
data
)
...
@@ -121,8 +128,8 @@ class SubPattern:
...
@@ -121,8 +128,8 @@ class SubPattern:
hi
=
hi
+
j
hi
=
hi
+
j
elif
op
in
(
MIN_REPEAT
,
MAX_REPEAT
):
elif
op
in
(
MIN_REPEAT
,
MAX_REPEAT
):
i
,
j
=
av
[
2
].
getwidth
()
i
,
j
=
av
[
2
].
getwidth
()
lo
=
lo
+
i
*
av
[
0
]
lo
=
lo
+
long
(
i
)
*
av
[
0
]
hi
=
hi
+
j
*
av
[
1
]
hi
=
hi
+
long
(
j
)
*
av
[
1
]
elif
op
in
(
ANY
,
RANGE
,
IN
,
LITERAL
,
NOT_LITERAL
,
CATEGORY
):
elif
op
in
(
ANY
,
RANGE
,
IN
,
LITERAL
,
NOT_LITERAL
,
CATEGORY
):
lo
=
lo
+
1
lo
=
lo
+
1
hi
=
hi
+
1
hi
=
hi
+
1
...
@@ -130,47 +137,23 @@ class SubPattern:
...
@@ -130,47 +137,23 @@ class SubPattern:
break
break
self
.
width
=
int
(
min
(
lo
,
sys
.
maxint
)),
int
(
min
(
hi
,
sys
.
maxint
))
self
.
width
=
int
(
min
(
lo
,
sys
.
maxint
)),
int
(
min
(
hi
,
sys
.
maxint
))
return
self
.
width
return
self
.
width
def
set
(
self
,
flag
):
if
not
flag
in
self
.
flags
:
self
.
flags
.
append
(
flag
)
def
reset
(
self
,
flag
):
if
flag
in
self
.
flags
:
self
.
flags
.
remove
(
flag
)
class
Tokenizer
:
class
Tokenizer
:
def
__init__
(
self
,
string
):
def
__init__
(
self
,
string
):
self
.
string
=
list
(
string
)
self
.
index
=
0
self
.
string
=
string
self
.
next
=
self
.
__next
()
self
.
next
=
self
.
__next
()
def
__next
(
self
):
def
__next
(
self
):
if
not
self
.
string
:
if
self
.
index
>=
len
(
self
.
string
)
:
return
None
return
None
char
=
self
.
string
[
0
]
char
=
self
.
string
[
self
.
index
]
if
char
[
0
]
==
"
\
\
"
:
if
char
[
0
]
==
"
\
\
"
:
try
:
try
:
c
=
self
.
string
[
1
]
c
=
self
.
string
[
self
.
index
+
1
]
except
IndexError
:
except
IndexError
:
raise
SyntaxError
,
"bogus escape"
raise
SyntaxError
,
"bogus escape"
char
=
char
+
c
char
=
char
+
c
try
:
self
.
index
=
self
.
index
+
len
(
char
)
if
c
==
"x"
:
# hexadecimal constant
for
i
in
xrange
(
2
,
sys
.
maxint
):
c
=
self
.
string
[
i
]
if
str
(
c
)
not
in
HEXDIGITS
:
break
char
=
char
+
c
elif
str
(
c
)
in
DIGITS
:
# decimal (or octal) number
for
i
in
xrange
(
2
,
sys
.
maxint
):
c
=
self
.
string
[
i
]
# FIXME: if larger than current number of
# groups, interpret as an octal number
if
str
(
c
)
not
in
DIGITS
:
break
char
=
char
+
c
except
IndexError
:
pass
# use what we've got this far
del
self
.
string
[
0
:
len
(
char
)]
return
char
return
char
def
match
(
self
,
char
):
def
match
(
self
,
char
):
if
char
==
self
.
next
:
if
char
==
self
.
next
:
...
@@ -187,46 +170,87 @@ class Tokenizer:
...
@@ -187,46 +170,87 @@ class Tokenizer:
self
.
next
=
self
.
__next
()
self
.
next
=
self
.
__next
()
return
this
return
this
def
_fixescape
(
escape
,
character_class
=
0
):
def
_group
(
escape
,
state
):
# convert escape to (type, value)
# check if the escape string represents a valid group
if
character_class
:
try
:
# inside a character class, we'll look in the character
group
=
int
(
escape
[
1
:])
# escapes dictionary first
if
group
and
group
<
state
.
groups
:
code
=
ESCAPES
.
get
(
escape
)
return
group
if
code
:
except
ValueError
:
return
code
pass
code
=
CATEGORIES
.
get
(
escape
)
return
None
# not a valid group
else
:
code
=
CATEGORIES
.
get
(
escape
)
def
_class_escape
(
source
,
escape
):
if
code
:
# handle escape code inside character class
return
code
code
=
ESCAPES
.
get
(
escape
)
code
=
ESCAPES
.
get
(
escape
)
if
code
:
return
code
code
=
CATEGORIES
.
get
(
escape
)
if
code
:
return
code
try
:
if
escape
[
1
:
2
]
==
"x"
:
while
source
.
next
in
HEXDIGITS
:
escape
=
escape
+
source
.
get
()
escape
=
escape
[
2
:]
# FIXME: support unicode characters!
return
LITERAL
,
chr
(
int
(
escape
[
-
4
:],
16
)
&
0xff
)
elif
str
(
escape
[
1
:
2
])
in
OCTDIGITS
:
while
source
.
next
in
OCTDIGITS
:
escape
=
escape
+
source
.
get
()
escape
=
escape
[
1
:]
# FIXME: support unicode characters!
return
LITERAL
,
chr
(
int
(
escape
[
-
6
:],
8
)
&
0xff
)
if
len
(
escape
)
==
2
:
return
LITERAL
,
escape
[
1
]
except
ValueError
:
pass
raise
SyntaxError
,
"bogus escape: %s"
%
repr
(
escape
)
def
_escape
(
source
,
escape
,
state
):
# handle escape code in expression
code
=
CATEGORIES
.
get
(
escape
)
if
code
:
return
code
code
=
ESCAPES
.
get
(
escape
)
if
code
:
if
code
:
return
code
return
code
if
not
character_class
:
try
:
group
=
int
(
escape
[
1
:])
# FIXME: only valid if group <= current number of groups
return
GROUP
,
group
except
ValueError
:
pass
try
:
try
:
if
escape
[
1
:
2
]
==
"x"
:
if
escape
[
1
:
2
]
==
"x"
:
while
source
.
next
in
HEXDIGITS
:
escape
=
escape
+
source
.
get
()
escape
=
escape
[
2
:]
escape
=
escape
[
2
:]
return
LITERAL
,
chr
(
int
(
escape
[
-
2
:],
16
)
&
0xff
)
# FIXME: support unicode characters!
return
LITERAL
,
chr
(
int
(
escape
[
-
4
:],
16
)
&
0xff
)
elif
str
(
escape
[
1
:
2
])
in
DIGITS
:
elif
str
(
escape
[
1
:
2
])
in
DIGITS
:
return
LITERAL
,
chr
(
int
(
escape
[
1
:],
8
)
&
0xff
)
while
1
:
elif
len
(
escape
)
==
2
:
group
=
_group
(
escape
,
state
)
if
group
:
if
(
not
source
.
next
or
not
_group
(
escape
+
source
.
next
,
state
)):
return
GROUP
,
group
escape
=
escape
+
source
.
get
()
elif
source
.
next
in
OCTDIGITS
:
escape
=
escape
+
source
.
get
()
else
:
break
escape
=
escape
[
1
:]
# FIXME: support unicode characters!
return
LITERAL
,
chr
(
int
(
escape
[
-
6
:],
8
)
&
0xff
)
if
len
(
escape
)
==
2
:
return
LITERAL
,
escape
[
1
]
return
LITERAL
,
escape
[
1
]
except
ValueError
:
except
ValueError
:
pass
pass
raise
SyntaxError
,
"bogus escape: %s"
%
repr
(
escape
)
raise
SyntaxError
,
"bogus escape: %s"
%
repr
(
escape
)
def
_branch
(
subpattern
,
items
):
def
_branch
(
pattern
,
items
):
# form a branch operator from a set of items (FIXME: move this
# form a branch operator from a set of items (FIXME: move this
# optimization to the compiler module!)
# optimization to the compiler module!)
subpattern
=
SubPattern
(
pattern
)
# check if all items share a common prefix
# check if all items share a common prefix
while
1
:
while
1
:
prefix
=
None
prefix
=
None
...
@@ -257,17 +281,16 @@ def _branch(subpattern, items):
...
@@ -257,17 +281,16 @@ def _branch(subpattern, items):
for
item
in
items
:
for
item
in
items
:
set
.
append
(
item
[
0
])
set
.
append
(
item
[
0
])
subpattern
.
append
((
IN
,
set
))
subpattern
.
append
((
IN
,
set
))
return
return
subpattern
subpattern
.
append
((
BRANCH
,
(
None
,
items
)))
subpattern
.
append
((
BRANCH
,
(
None
,
items
)))
return
subpattern
def
_parse
(
source
,
pattern
,
flags
=
()
):
def
_parse
(
source
,
state
,
flags
=
0
):
# parse regular expression pattern into an operator list.
# parse regular expression pattern into an operator list.
subpattern
=
SubPattern
(
pattern
)
subpattern
=
SubPattern
(
state
)
this
=
None
while
1
:
while
1
:
...
@@ -277,6 +300,17 @@ def _parse(source, pattern, flags=()):
...
@@ -277,6 +300,17 @@ def _parse(source, pattern, flags=()):
if
this
is
None
:
if
this
is
None
:
break
# end of pattern
break
# end of pattern
if
state
.
flags
&
SRE_FLAG_VERBOSE
:
# skip whitespace and comments
if
this
in
WHITESPACE
:
continue
if
this
==
"#"
:
while
1
:
this
=
source
.
get
()
if
this
in
(
None
,
"
\
n
"
):
break
continue
if
this
and
this
[
0
]
not
in
SPECIAL_CHARS
:
if
this
and
this
[
0
]
not
in
SPECIAL_CHARS
:
subpattern
.
append
((
LITERAL
,
this
))
subpattern
.
append
((
LITERAL
,
this
))
...
@@ -294,7 +328,7 @@ def _parse(source, pattern, flags=()):
...
@@ -294,7 +328,7 @@ def _parse(source, pattern, flags=()):
if
this
==
"]"
and
set
!=
start
:
if
this
==
"]"
and
set
!=
start
:
break
break
elif
this
and
this
[
0
]
==
"
\
\
"
:
elif
this
and
this
[
0
]
==
"
\
\
"
:
code1
=
_
fixescape
(
this
,
1
)
code1
=
_
class_escape
(
source
,
this
)
elif
this
:
elif
this
:
code1
=
LITERAL
,
this
code1
=
LITERAL
,
this
else
:
else
:
...
@@ -308,7 +342,7 @@ def _parse(source, pattern, flags=()):
...
@@ -308,7 +342,7 @@ def _parse(source, pattern, flags=()):
break
break
else
:
else
:
if
this
[
0
]
==
"
\
\
"
:
if
this
[
0
]
==
"
\
\
"
:
code2
=
_
fixescape
(
this
,
1
)
code2
=
_
class_escape
(
source
,
this
)
else
:
else
:
code2
=
LITERAL
,
this
code2
=
LITERAL
,
this
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
...
@@ -321,7 +355,7 @@ def _parse(source, pattern, flags=()):
...
@@ -321,7 +355,7 @@ def _parse(source, pattern, flags=()):
code1
=
code1
[
1
][
0
]
code1
=
code1
[
1
][
0
]
set
.
append
(
code1
)
set
.
append
(
code1
)
# FIXME: <fl> move set optimization to
support function
# FIXME: <fl> move set optimization to
compiler!
if
len
(
set
)
==
1
and
set
[
0
][
0
]
is
LITERAL
:
if
len
(
set
)
==
1
and
set
[
0
][
0
]
is
LITERAL
:
subpattern
.
append
(
set
[
0
])
# optimization
subpattern
.
append
(
set
[
0
])
# optimization
elif
len
(
set
)
==
2
and
set
[
0
][
0
]
is
NEGATE
and
set
[
1
][
0
]
is
LITERAL
:
elif
len
(
set
)
==
2
and
set
[
0
][
0
]
is
NEGATE
and
set
[
1
][
0
]
is
LITERAL
:
...
@@ -335,11 +369,11 @@ def _parse(source, pattern, flags=()):
...
@@ -335,11 +369,11 @@ def _parse(source, pattern, flags=()):
if
this
==
"?"
:
if
this
==
"?"
:
min
,
max
=
0
,
1
min
,
max
=
0
,
1
elif
this
==
"*"
:
elif
this
==
"*"
:
min
,
max
=
0
,
sys
.
maxint
min
,
max
=
0
,
MAXREPEAT
elif
this
==
"+"
:
elif
this
==
"+"
:
min
,
max
=
1
,
sys
.
maxint
min
,
max
=
1
,
MAXREPEAT
elif
this
==
"{"
:
elif
this
==
"{"
:
min
,
max
=
0
,
sys
.
maxint
min
,
max
=
0
,
MAXREPEAT
lo
=
hi
=
""
lo
=
hi
=
""
while
str
(
source
.
next
)
in
DIGITS
:
while
str
(
source
.
next
)
in
DIGITS
:
lo
=
lo
+
source
.
get
()
lo
=
lo
+
source
.
get
()
...
@@ -358,20 +392,18 @@ def _parse(source, pattern, flags=()):
...
@@ -358,20 +392,18 @@ def _parse(source, pattern, flags=()):
else
:
else
:
raise
SyntaxError
,
"not supported"
raise
SyntaxError
,
"not supported"
# figure out which item to repeat
# figure out which item to repeat
# FIXME: should back up to the right mark, right?
if
subpattern
:
if
subpattern
:
index
=
len
(
subpattern
)
-
1
item
=
subpattern
[
-
1
:]
while
subpattern
[
index
][
0
]
is
MARK
:
index
=
index
-
1
item
=
subpattern
[
index
:
index
+
1
]
else
:
else
:
raise
SyntaxError
,
"nothing to repeat"
raise
SyntaxError
,
"nothing to repeat"
if
source
.
match
(
"?"
):
if
source
.
match
(
"?"
):
subpattern
[
index
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
subpattern
[
-
1
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
else
:
else
:
subpattern
[
index
]
=
(
MAX_REPEAT
,
(
min
,
max
,
item
))
subpattern
[
-
1
]
=
(
MAX_REPEAT
,
(
min
,
max
,
item
))
elif
this
==
"."
:
elif
this
==
"."
:
subpattern
.
append
((
ANY
,
None
))
subpattern
.
append
((
ANY
,
None
))
elif
this
==
"("
:
elif
this
==
"("
:
group
=
1
group
=
1
name
=
None
name
=
None
...
@@ -379,28 +411,41 @@ def _parse(source, pattern, flags=()):
...
@@ -379,28 +411,41 @@ def _parse(source, pattern, flags=()):
group
=
0
group
=
0
# options
# options
if
source
.
match
(
"P"
):
if
source
.
match
(
"P"
):
#
named group: skip forward to end of name
#
python extensions
if
source
.
match
(
"<"
):
if
source
.
match
(
"<"
):
# named group: skip forward to end of name
name
=
""
name
=
""
while
1
:
while
1
:
char
=
source
.
get
()
char
=
source
.
get
()
if
char
is
None
or
char
==
">"
:
if
char
is
None
:
raise
SyntaxError
,
"unterminated name"
if
char
==
">"
:
break
break
# FIXME: check for valid character
name
=
name
+
char
name
=
name
+
char
group
=
1
group
=
1
elif
source
.
match
(
"="
):
# named backreference
raise
SyntaxError
,
"not yet implemented"
else
:
char
=
source
.
get
()
if
char
is
None
:
raise
SyntaxError
,
"unexpected end of pattern"
raise
SyntaxError
,
"unknown specifier: ?P%s"
%
char
elif
source
.
match
(
":"
):
elif
source
.
match
(
":"
):
# non-capturing group
# non-capturing group
group
=
2
group
=
2
elif
source
.
match
_set
(
"iI
"
):
elif
source
.
match
(
"#
"
):
pattern
.
setflag
(
"i"
)
# comment
elif
source
.
match_set
(
"lL"
)
:
while
1
:
pattern
.
setflag
(
"l"
)
char
=
source
.
get
(
)
elif
source
.
match_set
(
"mM"
)
:
if
char
is
None
or
char
==
")"
:
pattern
.
setflag
(
"m"
)
break
el
if
source
.
match_set
(
"sS"
)
:
el
se
:
pattern
.
setflag
(
"s"
)
# flags
elif
source
.
match_set
(
"xX"
):
while
FLAGS
.
has_key
(
source
.
next
):
pattern
.
setflag
(
"x"
)
state
.
flags
=
state
.
flags
|
FLAGS
[
source
.
get
()]
if
group
:
if
group
:
# parse group contents
# parse group contents
b
=
[]
b
=
[]
...
@@ -408,30 +453,25 @@ def _parse(source, pattern, flags=()):
...
@@ -408,30 +453,25 @@ def _parse(source, pattern, flags=()):
# anonymous group
# anonymous group
group
=
None
group
=
None
else
:
else
:
group
=
pattern
.
getgroup
(
name
)
group
=
state
.
getgroup
(
name
)
if
group
:
subpattern
.
append
((
MARK
,
(
group
-
1
)
*
2
))
while
1
:
while
1
:
p
=
_parse
(
source
,
pattern
,
flags
)
p
=
_parse
(
source
,
state
,
flags
)
if
source
.
match
(
")"
):
if
source
.
match
(
")"
):
if
b
:
if
b
:
b
.
append
(
p
)
b
.
append
(
p
)
_branch
(
subpattern
,
b
)
p
=
_branch
(
state
,
b
)
else
:
subpattern
.
append
((
SUBPATTERN
,
(
group
,
p
)))
subpattern
.
append
((
SUBPATTERN
,
(
group
,
p
)))
break
break
elif
source
.
match
(
"|"
):
elif
source
.
match
(
"|"
):
b
.
append
(
p
)
b
.
append
(
p
)
else
:
else
:
raise
SyntaxError
,
"group not properly closed"
raise
SyntaxError
,
"group not properly closed"
if
group
:
subpattern
.
append
((
MARK
,
(
group
-
1
)
*
2
+
1
))
else
:
else
:
# FIXME: should this really be a while loop?
while
1
:
while
1
:
char
=
source
.
get
()
char
=
source
.
get
()
if
char
is
None
or
char
==
")"
:
if
char
is
None
or
char
==
")"
:
break
break
# FIXME: skip characters?
elif
this
==
"^"
:
elif
this
==
"^"
:
subpattern
.
append
((
AT
,
AT_BEGINNING
))
subpattern
.
append
((
AT
,
AT_BEGINNING
))
...
@@ -440,7 +480,7 @@ def _parse(source, pattern, flags=()):
...
@@ -440,7 +480,7 @@ def _parse(source, pattern, flags=()):
subpattern
.
append
((
AT
,
AT_END
))
subpattern
.
append
((
AT
,
AT_END
))
elif
this
and
this
[
0
]
==
"
\
\
"
:
elif
this
and
this
[
0
]
==
"
\
\
"
:
code
=
_fixescape
(
this
)
code
=
_escape
(
source
,
this
,
state
)
subpattern
.
append
(
code
)
subpattern
.
append
(
code
)
else
:
else
:
...
@@ -448,13 +488,14 @@ def _parse(source, pattern, flags=()):
...
@@ -448,13 +488,14 @@ def _parse(source, pattern, flags=()):
return
subpattern
return
subpattern
def
parse
(
source
,
flags
=
()):
def
parse
(
pattern
,
flags
=
0
):
s
=
Tokenizer
(
source
)
# parse 're' pattern into list of (opcode, argument) tuples
g
=
Pattern
()
source
=
Tokenizer
(
pattern
)
state
=
State
()
b
=
[]
b
=
[]
while
1
:
while
1
:
p
=
_parse
(
s
,
g
,
flags
)
p
=
_parse
(
s
ource
,
state
,
flags
)
tail
=
s
.
get
()
tail
=
s
ource
.
get
()
if
tail
==
"|"
:
if
tail
==
"|"
:
b
.
append
(
p
)
b
.
append
(
p
)
elif
tail
==
")"
:
elif
tail
==
")"
:
...
@@ -462,13 +503,32 @@ def parse(source, flags=()):
...
@@ -462,13 +503,32 @@ def parse(source, flags=()):
elif
tail
is
None
:
elif
tail
is
None
:
if
b
:
if
b
:
b
.
append
(
p
)
b
.
append
(
p
)
p
=
SubPattern
(
g
)
p
=
_branch
(
state
,
b
)
_branch
(
p
,
b
)
break
break
else
:
else
:
raise
SyntaxError
,
"bogus characters at end of regular expression"
raise
SyntaxError
,
"bogus characters at end of regular expression"
return
p
return
p
def
parse_replacement
(
source
,
pattern
):
# parse 're' replacement string into list of literals and
# group references
s
=
Tokenizer
(
source
)
p
=
[]
a
=
p
.
append
while
1
:
this
=
s
.
get
()
if
this
is
None
:
break
# end of replacement string
if
this
and
this
[
0
]
==
"
\
\
"
:
try
:
a
(
LITERAL
,
ESCAPES
[
this
])
except
KeyError
:
for
char
in
this
:
a
(
LITERAL
,
char
)
else
:
a
(
LITERAL
,
this
)
return
p
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
from
pprint
import
pprint
from
pprint
import
pprint
from
testpatterns
import
PATTERNS
from
testpatterns
import
PATTERNS
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment