Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
8f417748
Commit
8f417748
authored
Apr 02, 2000
by
Andrew M. Kuchling
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
This patch looks large, but it just deletes the ^M characters and
untabifies the files. No actual code changes were made.
parent
bd83b7ee
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
482 additions
and
482 deletions
+482
-482
Lib/sre_compile.py
Lib/sre_compile.py
+124
-124
Lib/sre_constants.py
Lib/sre_constants.py
+1
-1
Lib/sre_parse.py
Lib/sre_parse.py
+357
-357
No files found.
Lib/sre_compile.py
View file @
8f417748
...
@@ -26,7 +26,7 @@ from sre_constants import *
...
@@ -26,7 +26,7 @@ from sre_constants import *
# find an array type code that matches the engine's code size
# find an array type code that matches the engine's code size
for
WORDSIZE
in
"BHil"
:
for
WORDSIZE
in
"BHil"
:
if
len
(
array
.
array
(
WORDSIZE
,
[
0
]).
tostring
())
==
_sre
.
getcodesize
():
if
len
(
array
.
array
(
WORDSIZE
,
[
0
]).
tostring
())
==
_sre
.
getcodesize
():
break
break
else
:
else
:
raise
RuntimeError
,
"cannot find a useable array type"
raise
RuntimeError
,
"cannot find a useable array type"
...
@@ -34,18 +34,18 @@ else:
...
@@ -34,18 +34,18 @@ else:
class
Code
:
class
Code
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
data
=
[]
self
.
data
=
[]
def
__len__
(
self
):
def
__len__
(
self
):
return
len
(
self
.
data
)
return
len
(
self
.
data
)
def
__getitem__
(
self
,
index
):
def
__getitem__
(
self
,
index
):
return
self
.
data
[
index
]
return
self
.
data
[
index
]
def
__setitem__
(
self
,
index
,
code
):
def
__setitem__
(
self
,
index
,
code
):
self
.
data
[
index
]
=
code
self
.
data
[
index
]
=
code
def
append
(
self
,
code
):
def
append
(
self
,
code
):
self
.
data
.
append
(
code
)
self
.
data
.
append
(
code
)
def
todata
(
self
):
def
todata
(
self
):
# print self.data
# print self.data
return
array
.
array
(
WORDSIZE
,
self
.
data
).
tostring
()
return
array
.
array
(
WORDSIZE
,
self
.
data
).
tostring
()
def
_lower
(
literal
):
def
_lower
(
literal
):
# return _sre._lower(literal) # FIXME
# return _sre._lower(literal) # FIXME
...
@@ -54,122 +54,122 @@ def _lower(literal):
...
@@ -54,122 +54,122 @@ def _lower(literal):
def
_compile
(
code
,
pattern
,
flags
):
def
_compile
(
code
,
pattern
,
flags
):
append
=
code
.
append
append
=
code
.
append
for
op
,
av
in
pattern
:
for
op
,
av
in
pattern
:
if
op
is
ANY
:
if
op
is
ANY
:
if
"s"
in
flags
:
if
"s"
in
flags
:
append
(
CODES
[
op
])
# any character at all!
append
(
CODES
[
op
])
# any character at all!
else
:
else
:
append
(
CODES
[
NOT_LITERAL
])
append
(
CODES
[
NOT_LITERAL
])
append
(
10
)
append
(
10
)
elif
op
in
(
SUCCESS
,
FAILURE
):
elif
op
in
(
SUCCESS
,
FAILURE
):
append
(
CODES
[
op
])
append
(
CODES
[
op
])
elif
op
is
AT
:
elif
op
is
AT
:
append
(
CODES
[
op
])
append
(
CODES
[
op
])
append
(
POSITIONS
[
av
])
append
(
POSITIONS
[
av
])
elif
op
is
BRANCH
:
elif
op
is
BRANCH
:
append
(
CODES
[
op
])
append
(
CODES
[
op
])
tail
=
[]
tail
=
[]
for
av
in
av
[
1
]:
for
av
in
av
[
1
]:
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
_compile
(
code
,
av
,
flags
)
_compile
(
code
,
av
,
flags
)
append
(
CODES
[
JUMP
])
append
(
CODES
[
JUMP
])
tail
.
append
(
len
(
code
));
append
(
0
)
tail
.
append
(
len
(
code
));
append
(
0
)
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
append
(
0
)
# end of branch
append
(
0
)
# end of branch
for
tail
in
tail
:
for
tail
in
tail
:
code
[
tail
]
=
len
(
code
)
-
tail
code
[
tail
]
=
len
(
code
)
-
tail
elif
op
is
CALL
:
elif
op
is
CALL
:
append
(
CODES
[
op
])
append
(
CODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
_compile
(
code
,
av
,
flags
)
_compile
(
code
,
av
,
flags
)
append
(
CODES
[
SUCCESS
])
append
(
CODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
CATEGORY
:
# not used by current parser
elif
op
is
CATEGORY
:
# not used by current parser
append
(
CODES
[
op
])
append
(
CODES
[
op
])
append
(
CATEGORIES
[
av
])
append
(
CATEGORIES
[
av
])
elif
op
is
GROUP
:
elif
op
is
GROUP
:
if
"i"
in
flags
:
if
"i"
in
flags
:
append
(
CODES
[
MAP_IGNORE
[
op
]])
append
(
CODES
[
MAP_IGNORE
[
op
]])
else
:
else
:
append
(
CODES
[
op
])
append
(
CODES
[
op
])
append
(
av
)
append
(
av
)
elif
op
is
IN
:
elif
op
is
IN
:
if
"i"
in
flags
:
if
"i"
in
flags
:
append
(
CODES
[
MAP_IGNORE
[
op
]])
append
(
CODES
[
MAP_IGNORE
[
op
]])
def
fixup
(
literal
):
def
fixup
(
literal
):
return
ord
(
_lower
(
literal
))
return
ord
(
_lower
(
literal
))
else
:
else
:
append
(
CODES
[
op
])
append
(
CODES
[
op
])
fixup
=
ord
fixup
=
ord
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
for
op
,
av
in
av
:
for
op
,
av
in
av
:
append
(
CODES
[
op
])
append
(
CODES
[
op
])
if
op
is
NEGATE
:
if
op
is
NEGATE
:
pass
pass
elif
op
is
LITERAL
:
elif
op
is
LITERAL
:
append
(
fixup
(
av
))
append
(
fixup
(
av
))
elif
op
is
RANGE
:
elif
op
is
RANGE
:
append
(
fixup
(
av
[
0
]))
append
(
fixup
(
av
[
0
]))
append
(
fixup
(
av
[
1
]))
append
(
fixup
(
av
[
1
]))
elif
op
is
CATEGORY
:
elif
op
is
CATEGORY
:
append
(
CATEGORIES
[
av
])
append
(
CATEGORIES
[
av
])
else
:
else
:
raise
ValueError
,
"unsupported set operator"
raise
ValueError
,
"unsupported set operator"
append
(
CODES
[
FAILURE
])
append
(
CODES
[
FAILURE
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
if
"i"
in
flags
:
if
"i"
in
flags
:
append
(
CODES
[
MAP_IGNORE
[
op
]])
append
(
CODES
[
MAP_IGNORE
[
op
]])
append
(
ord
(
_lower
(
av
)))
append
(
ord
(
_lower
(
av
)))
else
:
else
:
append
(
CODES
[
op
])
append
(
CODES
[
op
])
append
(
ord
(
av
))
append
(
ord
(
av
))
elif
op
is
MARK
:
elif
op
is
MARK
:
append
(
CODES
[
op
])
append
(
CODES
[
op
])
append
(
av
)
append
(
av
)
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
lo
,
hi
=
av
[
2
].
getwidth
()
lo
,
hi
=
av
[
2
].
getwidth
()
if
lo
==
0
:
if
lo
==
0
:
raise
SyntaxError
,
"cannot repeat zero-width items"
raise
SyntaxError
,
"cannot repeat zero-width items"
if
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
if
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
append
(
CODES
[
MAX_REPEAT_ONE
])
append
(
CODES
[
MAX_REPEAT_ONE
])
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
0
])
append
(
av
[
1
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
_compile
(
code
,
av
[
2
],
flags
)
append
(
CODES
[
SUCCESS
])
append
(
CODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
else
:
else
:
append
(
CODES
[
op
])
append
(
CODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
0
])
append
(
av
[
1
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
_compile
(
code
,
av
[
2
],
flags
)
if
op
is
MIN_REPEAT
:
if
op
is
MIN_REPEAT
:
append
(
CODES
[
MIN_UNTIL
])
append
(
CODES
[
MIN_UNTIL
])
else
:
else
:
# FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
# FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
append
(
CODES
[
MAX_UNTIL
])
append
(
CODES
[
MAX_UNTIL
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
SUBPATTERN
:
elif
op
is
SUBPATTERN
:
##
group = av[0]
##
group = av[0]
##
if group:
##
if group:
##
append(CODES[MARK])
##
append(CODES[MARK])
##
append((group-1)*2)
##
append((group-1)*2)
_compile
(
code
,
av
[
1
],
flags
)
_compile
(
code
,
av
[
1
],
flags
)
##
if group:
##
if group:
##
append(CODES[MARK])
##
append(CODES[MARK])
##
append((group-1)*2+1)
##
append((group-1)*2+1)
else
:
else
:
raise
ValueError
,
(
"unsupported operand type"
,
op
)
raise
ValueError
,
(
"unsupported operand type"
,
op
)
def
compile
(
p
,
flags
=
()):
def
compile
(
p
,
flags
=
()):
# convert pattern list to internal format
# convert pattern list to internal format
if
type
(
p
)
is
type
(
""
):
if
type
(
p
)
is
type
(
""
):
import
sre_parse
import
sre_parse
pattern
=
p
pattern
=
p
p
=
sre_parse
.
parse
(
p
)
p
=
sre_parse
.
parse
(
p
)
else
:
else
:
pattern
=
None
pattern
=
None
# print p.getwidth()
# print p.getwidth()
# print p
# print p
code
=
Code
()
code
=
Code
()
...
@@ -178,10 +178,10 @@ def compile(p, flags=()):
...
@@ -178,10 +178,10 @@ def compile(p, flags=()):
# print list(code.data)
# print list(code.data)
data
=
code
.
todata
()
data
=
code
.
todata
()
if
0
:
# debugging
if
0
:
# debugging
print
print
print
"-"
*
68
print
"-"
*
68
import
sre_disasm
import
sre_disasm
sre_disasm
.
disasm
(
data
)
sre_disasm
.
disasm
(
data
)
print
"-"
*
68
print
"-"
*
68
# print len(data), p.pattern.groups, len(p.pattern.groupdict)
# print len(data), p.pattern.groups, len(p.pattern.groupdict)
return
_sre
.
compile
(
pattern
,
data
,
p
.
pattern
.
groups
-
1
,
p
.
pattern
.
groupdict
)
return
_sre
.
compile
(
pattern
,
data
,
p
.
pattern
.
groups
-
1
,
p
.
pattern
.
groupdict
)
Lib/sre_constants.py
View file @
8f417748
...
@@ -126,6 +126,6 @@ if __name__ == "__main__":
...
@@ -126,6 +126,6 @@ if __name__ == "__main__":
f
=
open
(
"sre_constants.h"
,
"w"
)
f
=
open
(
"sre_constants.h"
,
"w"
)
f
.
write
(
"/* generated by sre_constants.py */
\
n
"
)
f
.
write
(
"/* generated by sre_constants.py */
\
n
"
)
for
k
,
v
in
items
:
for
k
,
v
in
items
:
f
.
write
(
"#define SRE_OP_"
+
string
.
upper
(
k
)
+
" "
+
str
(
v
)
+
"
\
n
"
)
f
.
write
(
"#define SRE_OP_"
+
string
.
upper
(
k
)
+
" "
+
str
(
v
)
+
"
\
n
"
)
f
.
close
()
f
.
close
()
print
"done"
print
"done"
Lib/sre_parse.py
View file @
8f417748
...
@@ -55,168 +55,168 @@ CATEGORIES = {
...
@@ -55,168 +55,168 @@ CATEGORIES = {
class
Pattern
:
class
Pattern
:
# FIXME: <fl> rename class, and store flags in here too!
# FIXME: <fl> rename class, and store flags in here too!
def
__init__
(
self
):
def
__init__
(
self
):
self
.
flags
=
[]
self
.
flags
=
[]
self
.
groups
=
1
self
.
groups
=
1
self
.
groupdict
=
{}
self
.
groupdict
=
{}
def
getgroup
(
self
,
name
=
None
):
def
getgroup
(
self
,
name
=
None
):
gid
=
self
.
groups
gid
=
self
.
groups
self
.
groups
=
gid
+
1
self
.
groups
=
gid
+
1
if
name
:
if
name
:
self
.
groupdict
[
name
]
=
gid
self
.
groupdict
[
name
]
=
gid
return
gid
return
gid
def
setflag
(
self
,
flag
):
def
setflag
(
self
,
flag
):
if
flag
not
in
self
.
flags
:
if
flag
not
in
self
.
flags
:
self
.
flags
.
append
(
flag
)
self
.
flags
.
append
(
flag
)
class
SubPattern
:
class
SubPattern
:
# a subpattern, in intermediate form
# a subpattern, in intermediate form
def
__init__
(
self
,
pattern
,
data
=
None
):
def
__init__
(
self
,
pattern
,
data
=
None
):
self
.
pattern
=
pattern
self
.
pattern
=
pattern
if
not
data
:
if
not
data
:
data
=
[]
data
=
[]
self
.
data
=
data
self
.
data
=
data
self
.
flags
=
[]
self
.
flags
=
[]
self
.
width
=
None
self
.
width
=
None
def
__repr__
(
self
):
def
__repr__
(
self
):
return
repr
(
self
.
data
)
return
repr
(
self
.
data
)
def
__len__
(
self
):
def
__len__
(
self
):
return
len
(
self
.
data
)
return
len
(
self
.
data
)
def
__delitem__
(
self
,
index
):
def
__delitem__
(
self
,
index
):
del
self
.
data
[
index
]
del
self
.
data
[
index
]
def
__getitem__
(
self
,
index
):
def
__getitem__
(
self
,
index
):
return
self
.
data
[
index
]
return
self
.
data
[
index
]
def
__setitem__
(
self
,
index
,
code
):
def
__setitem__
(
self
,
index
,
code
):
self
.
data
[
index
]
=
code
self
.
data
[
index
]
=
code
def
__getslice__
(
self
,
start
,
stop
):
def
__getslice__
(
self
,
start
,
stop
):
return
SubPattern
(
self
.
pattern
,
self
.
data
[
start
:
stop
])
return
SubPattern
(
self
.
pattern
,
self
.
data
[
start
:
stop
])
def
insert
(
self
,
index
,
code
):
def
insert
(
self
,
index
,
code
):
self
.
data
.
insert
(
index
,
code
)
self
.
data
.
insert
(
index
,
code
)
def
append
(
self
,
code
):
def
append
(
self
,
code
):
self
.
data
.
append
(
code
)
self
.
data
.
append
(
code
)
def
getwidth
(
self
):
def
getwidth
(
self
):
# determine the width (min, max) for this subpattern
# determine the width (min, max) for this subpattern
if
self
.
width
:
if
self
.
width
:
return
self
.
width
return
self
.
width
lo
=
hi
=
0L
lo
=
hi
=
0L
for
op
,
av
in
self
.
data
:
for
op
,
av
in
self
.
data
:
if
op
is
BRANCH
:
if
op
is
BRANCH
:
l
=
sys
.
maxint
l
=
sys
.
maxint
h
=
0
h
=
0
for
av
in
av
[
1
]:
for
av
in
av
[
1
]:
i
,
j
=
av
.
getwidth
()
i
,
j
=
av
.
getwidth
()
l
=
min
(
l
,
i
)
l
=
min
(
l
,
i
)
h
=
min
(
h
,
j
)
h
=
min
(
h
,
j
)
lo
=
lo
+
i
lo
=
lo
+
i
hi
=
hi
+
j
hi
=
hi
+
j
elif
op
is
CALL
:
elif
op
is
CALL
:
i
,
j
=
av
.
getwidth
()
i
,
j
=
av
.
getwidth
()
lo
=
lo
+
i
lo
=
lo
+
i
hi
=
hi
+
j
hi
=
hi
+
j
elif
op
is
SUBPATTERN
:
elif
op
is
SUBPATTERN
:
i
,
j
=
av
[
1
].
getwidth
()
i
,
j
=
av
[
1
].
getwidth
()
lo
=
lo
+
i
lo
=
lo
+
i
hi
=
hi
+
j
hi
=
hi
+
j
elif
op
in
(
MIN_REPEAT
,
MAX_REPEAT
):
elif
op
in
(
MIN_REPEAT
,
MAX_REPEAT
):
i
,
j
=
av
[
2
].
getwidth
()
i
,
j
=
av
[
2
].
getwidth
()
lo
=
lo
+
i
*
av
[
0
]
lo
=
lo
+
i
*
av
[
0
]
hi
=
hi
+
j
*
av
[
1
]
hi
=
hi
+
j
*
av
[
1
]
elif
op
in
(
ANY
,
RANGE
,
IN
,
LITERAL
,
NOT_LITERAL
,
CATEGORY
):
elif
op
in
(
ANY
,
RANGE
,
IN
,
LITERAL
,
NOT_LITERAL
,
CATEGORY
):
lo
=
lo
+
1
lo
=
lo
+
1
hi
=
hi
+
1
hi
=
hi
+
1
elif
op
==
SUCCESS
:
elif
op
==
SUCCESS
:
break
break
self
.
width
=
int
(
min
(
lo
,
sys
.
maxint
)),
int
(
min
(
hi
,
sys
.
maxint
))
self
.
width
=
int
(
min
(
lo
,
sys
.
maxint
)),
int
(
min
(
hi
,
sys
.
maxint
))
return
self
.
width
return
self
.
width
def
set
(
self
,
flag
):
def
set
(
self
,
flag
):
if
not
flag
in
self
.
flags
:
if
not
flag
in
self
.
flags
:
self
.
flags
.
append
(
flag
)
self
.
flags
.
append
(
flag
)
def
reset
(
self
,
flag
):
def
reset
(
self
,
flag
):
if
flag
in
self
.
flags
:
if
flag
in
self
.
flags
:
self
.
flags
.
remove
(
flag
)
self
.
flags
.
remove
(
flag
)
class
Tokenizer
:
class
Tokenizer
:
def
__init__
(
self
,
string
):
def
__init__
(
self
,
string
):
self
.
string
=
list
(
string
)
self
.
string
=
list
(
string
)
self
.
next
=
self
.
__next
()
self
.
next
=
self
.
__next
()
def
__next
(
self
):
def
__next
(
self
):
if
not
self
.
string
:
if
not
self
.
string
:
return
None
return
None
char
=
self
.
string
[
0
]
char
=
self
.
string
[
0
]
if
char
[
0
]
==
"
\
\
"
:
if
char
[
0
]
==
"
\
\
"
:
try
:
try
:
c
=
self
.
string
[
1
]
c
=
self
.
string
[
1
]
except
IndexError
:
except
IndexError
:
raise
SyntaxError
,
"bogus escape"
raise
SyntaxError
,
"bogus escape"
char
=
char
+
c
char
=
char
+
c
try
:
try
:
if
c
==
"x"
:
if
c
==
"x"
:
# hexadecimal constant
# hexadecimal constant
for
i
in
xrange
(
2
,
sys
.
maxint
):
for
i
in
xrange
(
2
,
sys
.
maxint
):
c
=
self
.
string
[
i
]
c
=
self
.
string
[
i
]
if
c
not
in
HEXDIGITS
:
if
c
not
in
HEXDIGITS
:
break
break
char
=
char
+
c
char
=
char
+
c
elif
c
in
string
.
digits
:
elif
c
in
string
.
digits
:
# decimal (or octal) number
# decimal (or octal) number
for
i
in
xrange
(
2
,
sys
.
maxint
):
for
i
in
xrange
(
2
,
sys
.
maxint
):
c
=
self
.
string
[
i
]
c
=
self
.
string
[
i
]
# FIXME: if larger than current number of
# FIXME: if larger than current number of
# groups, interpret as an octal number
# groups, interpret as an octal number
if
c
not
in
string
.
digits
:
if
c
not
in
string
.
digits
:
break
break
char
=
char
+
c
char
=
char
+
c
except
IndexError
:
except
IndexError
:
pass
# use what we've got this far
pass
# use what we've got this far
del
self
.
string
[
0
:
len
(
char
)]
del
self
.
string
[
0
:
len
(
char
)]
return
char
return
char
def
match
(
self
,
char
):
def
match
(
self
,
char
):
if
char
==
self
.
next
:
if
char
==
self
.
next
:
self
.
next
=
self
.
__next
()
self
.
next
=
self
.
__next
()
return
1
return
1
return
0
return
0
def
match_set
(
self
,
set
):
def
match_set
(
self
,
set
):
if
self
.
next
in
set
:
if
self
.
next
in
set
:
self
.
next
=
self
.
__next
()
self
.
next
=
self
.
__next
()
return
1
return
1
return
0
return
0
def
get
(
self
):
def
get
(
self
):
this
=
self
.
next
this
=
self
.
next
self
.
next
=
self
.
__next
()
self
.
next
=
self
.
__next
()
return
this
return
this
def
_fixescape
(
escape
,
character_class
=
0
):
def
_fixescape
(
escape
,
character_class
=
0
):
# convert escape to (type, value)
# convert escape to (type, value)
if
character_class
:
if
character_class
:
# inside a character class, we'll look in the character
# inside a character class, we'll look in the character
# escapes dictionary first
# escapes dictionary first
code
=
ESCAPES
.
get
(
escape
)
code
=
ESCAPES
.
get
(
escape
)
if
code
:
if
code
:
return
code
return
code
code
=
CATEGORIES
.
get
(
escape
)
code
=
CATEGORIES
.
get
(
escape
)
else
:
else
:
code
=
CATEGORIES
.
get
(
escape
)
code
=
CATEGORIES
.
get
(
escape
)
if
code
:
if
code
:
return
code
return
code
code
=
ESCAPES
.
get
(
escape
)
code
=
ESCAPES
.
get
(
escape
)
if
code
:
if
code
:
return
code
return
code
if
not
character_class
:
if
not
character_class
:
try
:
try
:
group
=
int
(
escape
[
1
:])
group
=
int
(
escape
[
1
:])
# FIXME: only valid if group <= current number of groups
# FIXME: only valid if group <= current number of groups
return
GROUP
,
group
return
GROUP
,
group
except
ValueError
:
except
ValueError
:
pass
pass
try
:
try
:
if
escape
[
1
:
2
]
==
"x"
:
if
escape
[
1
:
2
]
==
"x"
:
escape
=
escape
[
2
:]
escape
=
escape
[
2
:]
return
LITERAL
,
chr
(
string
.
atoi
(
escape
[
-
2
:],
16
)
&
0xff
)
return
LITERAL
,
chr
(
string
.
atoi
(
escape
[
-
2
:],
16
)
&
0xff
)
elif
escape
[
1
:
2
]
in
string
.
digits
:
elif
escape
[
1
:
2
]
in
string
.
digits
:
return
LITERAL
,
chr
(
string
.
atoi
(
escape
[
1
:],
8
)
&
0xff
)
return
LITERAL
,
chr
(
string
.
atoi
(
escape
[
1
:],
8
)
&
0xff
)
elif
len
(
escape
)
==
2
:
elif
len
(
escape
)
==
2
:
return
LITERAL
,
escape
[
1
]
return
LITERAL
,
escape
[
1
]
except
ValueError
:
except
ValueError
:
pass
pass
raise
SyntaxError
,
"bogus escape: %s"
%
repr
(
escape
)
raise
SyntaxError
,
"bogus escape: %s"
%
repr
(
escape
)
def
_branch
(
subpattern
,
items
):
def
_branch
(
subpattern
,
items
):
...
@@ -226,35 +226,35 @@ def _branch(subpattern, items):
...
@@ -226,35 +226,35 @@ def _branch(subpattern, items):
# check if all items share a common prefix
# check if all items share a common prefix
while
1
:
while
1
:
prefix
=
None
prefix
=
None
for
item
in
items
:
for
item
in
items
:
if
not
item
:
if
not
item
:
break
break
if
prefix
is
None
:
if
prefix
is
None
:
prefix
=
item
[
0
]
prefix
=
item
[
0
]
elif
item
[
0
]
!=
prefix
:
elif
item
[
0
]
!=
prefix
:
break
break
else
:
else
:
# all subitems start with a common "prefix".
# all subitems start with a common "prefix".
# move it out of the branch
# move it out of the branch
for
item
in
items
:
for
item
in
items
:
del
item
[
0
]
del
item
[
0
]
subpattern
.
append
(
prefix
)
subpattern
.
append
(
prefix
)
continue
# check next one
continue
# check next one
break
break
# check if the branch can be replaced by a character set
# check if the branch can be replaced by a character set
for
item
in
items
:
for
item
in
items
:
if
len
(
item
)
!=
1
or
item
[
0
][
0
]
!=
LITERAL
:
if
len
(
item
)
!=
1
or
item
[
0
][
0
]
!=
LITERAL
:
break
break
else
:
else
:
# we can store this as a character set instead of a
# we can store this as a character set instead of a
# branch (FIXME: use a range if possible)
# branch (FIXME: use a range if possible)
set
=
[]
set
=
[]
for
item
in
items
:
for
item
in
items
:
set
.
append
(
item
[
0
])
set
.
append
(
item
[
0
])
subpattern
.
append
((
IN
,
set
))
subpattern
.
append
((
IN
,
set
))
return
return
subpattern
.
append
((
BRANCH
,
(
None
,
items
)))
subpattern
.
append
((
BRANCH
,
(
None
,
items
)))
...
@@ -268,178 +268,178 @@ def _parse(source, pattern, flags=()):
...
@@ -268,178 +268,178 @@ def _parse(source, pattern, flags=()):
while
1
:
while
1
:
if
source
.
next
in
(
"|"
,
")"
):
if
source
.
next
in
(
"|"
,
")"
):
break
# end of subpattern
break
# end of subpattern
this
=
source
.
get
()
this
=
source
.
get
()
if
this
is
None
:
if
this
is
None
:
break
# end of pattern
break
# end of pattern
if
this
and
this
[
0
]
not
in
SPECIAL_CHARS
:
if
this
and
this
[
0
]
not
in
SPECIAL_CHARS
:
subpattern
.
append
((
LITERAL
,
this
))
subpattern
.
append
((
LITERAL
,
this
))
elif
this
==
"["
:
elif
this
==
"["
:
# character set
# character set
set
=
[]
set
=
[]
##
if source.match(":"):
##
if source.match(":"):
##
pass # handle character classes
##
pass # handle character classes
if
source
.
match
(
"^"
):
if
source
.
match
(
"^"
):
set
.
append
((
NEGATE
,
None
))
set
.
append
((
NEGATE
,
None
))
# check remaining characters
# check remaining characters
start
=
set
[:]
start
=
set
[:]
while
1
:
while
1
:
this
=
source
.
get
()
this
=
source
.
get
()
if
this
==
"]"
and
set
!=
start
:
if
this
==
"]"
and
set
!=
start
:
break
break
elif
this
and
this
[
0
]
==
"
\
\
"
:
elif
this
and
this
[
0
]
==
"
\
\
"
:
code1
=
_fixescape
(
this
,
1
)
code1
=
_fixescape
(
this
,
1
)
elif
this
:
elif
this
:
code1
=
LITERAL
,
this
code1
=
LITERAL
,
this
else
:
else
:
raise
SyntaxError
,
"unexpected end of regular expression"
raise
SyntaxError
,
"unexpected end of regular expression"
if
source
.
match
(
"-"
):
if
source
.
match
(
"-"
):
# potential range
# potential range
this
=
source
.
get
()
this
=
source
.
get
()
if
this
==
"]"
:
if
this
==
"]"
:
set
.
append
(
code1
)
set
.
append
(
code1
)
set
.
append
((
LITERAL
,
"-"
))
set
.
append
((
LITERAL
,
"-"
))
break
break
else
:
else
:
if
this
[
0
]
==
"
\
\
"
:
if
this
[
0
]
==
"
\
\
"
:
code2
=
_fixescape
(
this
,
1
)
code2
=
_fixescape
(
this
,
1
)
else
:
else
:
code2
=
LITERAL
,
this
code2
=
LITERAL
,
this
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
if
code1
[
0
]
!=
LITERAL
or
code2
[
0
]
!=
LITERAL
:
raise
SyntaxError
,
"illegal range"
raise
SyntaxError
,
"illegal range"
if
len
(
code1
[
1
])
!=
1
or
len
(
code2
[
1
])
!=
1
:
if
len
(
code1
[
1
])
!=
1
or
len
(
code2
[
1
])
!=
1
:
raise
SyntaxError
,
"illegal range"
raise
SyntaxError
,
"illegal range"
set
.
append
((
RANGE
,
(
code1
[
1
],
code2
[
1
])))
set
.
append
((
RANGE
,
(
code1
[
1
],
code2
[
1
])))
else
:
else
:
if
code1
[
0
]
is
IN
:
if
code1
[
0
]
is
IN
:
code1
=
code1
[
1
][
0
]
code1
=
code1
[
1
][
0
]
set
.
append
(
code1
)
set
.
append
(
code1
)
# FIXME: <fl> move set optimization to support function
# FIXME: <fl> move set optimization to support function
if
len
(
set
)
==
1
and
set
[
0
][
0
]
is
LITERAL
:
if
len
(
set
)
==
1
and
set
[
0
][
0
]
is
LITERAL
:
subpattern
.
append
(
set
[
0
])
# optimization
subpattern
.
append
(
set
[
0
])
# optimization
elif
len
(
set
)
==
2
and
set
[
0
][
0
]
is
NEGATE
and
set
[
1
][
0
]
is
LITERAL
:
elif
len
(
set
)
==
2
and
set
[
0
][
0
]
is
NEGATE
and
set
[
1
][
0
]
is
LITERAL
:
subpattern
.
append
((
NOT_LITERAL
,
set
[
1
][
1
]))
# optimization
subpattern
.
append
((
NOT_LITERAL
,
set
[
1
][
1
]))
# optimization
else
:
else
:
# FIXME: <fl> add charmap optimization
# FIXME: <fl> add charmap optimization
subpattern
.
append
((
IN
,
set
))
subpattern
.
append
((
IN
,
set
))
elif
this
and
this
[
0
]
in
REPEAT_CHARS
:
elif
this
and
this
[
0
]
in
REPEAT_CHARS
:
# repeat previous item
# repeat previous item
if
this
==
"?"
:
if
this
==
"?"
:
min
,
max
=
0
,
1
min
,
max
=
0
,
1
elif
this
==
"*"
:
elif
this
==
"*"
:
min
,
max
=
0
,
sys
.
maxint
min
,
max
=
0
,
sys
.
maxint
elif
this
==
"+"
:
elif
this
==
"+"
:
min
,
max
=
1
,
sys
.
maxint
min
,
max
=
1
,
sys
.
maxint
elif
this
==
"{"
:
elif
this
==
"{"
:
min
,
max
=
0
,
sys
.
maxint
min
,
max
=
0
,
sys
.
maxint
lo
=
hi
=
""
lo
=
hi
=
""
while
source
.
next
in
string
.
digits
:
while
source
.
next
in
string
.
digits
:
lo
=
lo
+
source
.
get
()
lo
=
lo
+
source
.
get
()
if
source
.
match
(
","
):
if
source
.
match
(
","
):
while
source
.
next
in
string
.
digits
:
while
source
.
next
in
string
.
digits
:
hi
=
hi
+
source
.
get
()
hi
=
hi
+
source
.
get
()
else
:
else
:
hi
=
lo
hi
=
lo
if
not
source
.
match
(
"}"
):
if
not
source
.
match
(
"}"
):
raise
SyntaxError
,
"bogus range"
raise
SyntaxError
,
"bogus range"
if
lo
:
if
lo
:
min
=
int
(
lo
)
min
=
int
(
lo
)
if
hi
:
if
hi
:
max
=
int
(
hi
)
max
=
int
(
hi
)
# FIXME: <fl> check that hi >= lo!
# FIXME: <fl> check that hi >= lo!
else
:
else
:
raise
SyntaxError
,
"not supported"
raise
SyntaxError
,
"not supported"
# figure out which item to repeat
# figure out which item to repeat
# FIXME: should back up to the right mark, right?
# FIXME: should back up to the right mark, right?
if
subpattern
:
if
subpattern
:
index
=
len
(
subpattern
)
-
1
index
=
len
(
subpattern
)
-
1
while
subpattern
[
index
][
0
]
is
MARK
:
while
subpattern
[
index
][
0
]
is
MARK
:
index
=
index
-
1
index
=
index
-
1
item
=
subpattern
[
index
:
index
+
1
]
item
=
subpattern
[
index
:
index
+
1
]
else
:
else
:
raise
SyntaxError
,
"nothing to repeat"
raise
SyntaxError
,
"nothing to repeat"
if
source
.
match
(
"?"
):
if
source
.
match
(
"?"
):
subpattern
[
index
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
subpattern
[
index
]
=
(
MIN_REPEAT
,
(
min
,
max
,
item
))
else
:
else
:
subpattern
[
index
]
=
(
MAX_REPEAT
,
(
min
,
max
,
item
))
subpattern
[
index
]
=
(
MAX_REPEAT
,
(
min
,
max
,
item
))
elif
this
==
"."
:
elif
this
==
"."
:
subpattern
.
append
((
ANY
,
None
))
subpattern
.
append
((
ANY
,
None
))
elif
this
==
"("
:
elif
this
==
"("
:
group
=
1
group
=
1
name
=
None
name
=
None
if
source
.
match
(
"?"
):
if
source
.
match
(
"?"
):
group
=
0
group
=
0
# options
# options
if
source
.
match
(
"P"
):
if
source
.
match
(
"P"
):
# named group: skip forward to end of name
# named group: skip forward to end of name
if
source
.
match
(
"<"
):
if
source
.
match
(
"<"
):
name
=
""
name
=
""
while
1
:
while
1
:
char
=
source
.
get
()
char
=
source
.
get
()
if
char
in
(
">"
,
None
):
if
char
in
(
">"
,
None
):
break
break
name
=
name
+
char
name
=
name
+
char
group
=
1
group
=
1
elif
source
.
match
(
":"
):
elif
source
.
match
(
":"
):
# non-capturing group
# non-capturing group
group
=
2
group
=
2
elif
source
.
match_set
(
"iI"
):
elif
source
.
match_set
(
"iI"
):
pattern
.
setflag
(
"i"
)
pattern
.
setflag
(
"i"
)
elif
source
.
match_set
(
"lL"
):
elif
source
.
match_set
(
"lL"
):
pattern
.
setflag
(
"l"
)
pattern
.
setflag
(
"l"
)
elif
source
.
match_set
(
"mM"
):
elif
source
.
match_set
(
"mM"
):
pattern
.
setflag
(
"m"
)
pattern
.
setflag
(
"m"
)
elif
source
.
match_set
(
"sS"
):
elif
source
.
match_set
(
"sS"
):
pattern
.
setflag
(
"s"
)
pattern
.
setflag
(
"s"
)
elif
source
.
match_set
(
"xX"
):
elif
source
.
match_set
(
"xX"
):
pattern
.
setflag
(
"x"
)
pattern
.
setflag
(
"x"
)
if
group
:
if
group
:
# parse group contents
# parse group contents
b
=
[]
b
=
[]
if
group
==
2
:
if
group
==
2
:
# anonymous group
# anonymous group
group
=
None
group
=
None
else
:
else
:
group
=
pattern
.
getgroup
(
name
)
group
=
pattern
.
getgroup
(
name
)
if
group
:
if
group
:
subpattern
.
append
((
MARK
,
(
group
-
1
)
*
2
))
subpattern
.
append
((
MARK
,
(
group
-
1
)
*
2
))
while
1
:
while
1
:
p
=
_parse
(
source
,
pattern
,
flags
)
p
=
_parse
(
source
,
pattern
,
flags
)
if
source
.
match
(
")"
):
if
source
.
match
(
")"
):
if
b
:
if
b
:
b
.
append
(
p
)
b
.
append
(
p
)
_branch
(
subpattern
,
b
)
_branch
(
subpattern
,
b
)
else
:
else
:
subpattern
.
append
((
SUBPATTERN
,
(
group
,
p
)))
subpattern
.
append
((
SUBPATTERN
,
(
group
,
p
)))
break
break
elif
source
.
match
(
"|"
):
elif
source
.
match
(
"|"
):
b
.
append
(
p
)
b
.
append
(
p
)
else
:
else
:
raise
SyntaxError
,
"group not properly closed"
raise
SyntaxError
,
"group not properly closed"
if
group
:
if
group
:
subpattern
.
append
((
MARK
,
(
group
-
1
)
*
2
+
1
))
subpattern
.
append
((
MARK
,
(
group
-
1
)
*
2
+
1
))
else
:
else
:
# FIXME: should this really be a while loop?
# FIXME: should this really be a while loop?
while
source
.
get
()
not
in
(
")"
,
None
):
while
source
.
get
()
not
in
(
")"
,
None
):
pass
pass
elif
this
==
"^"
:
elif
this
==
"^"
:
subpattern
.
append
((
AT
,
AT_BEGINNING
))
subpattern
.
append
((
AT
,
AT_BEGINNING
))
elif
this
==
"$"
:
elif
this
==
"$"
:
subpattern
.
append
((
AT
,
AT_END
))
subpattern
.
append
((
AT
,
AT_END
))
elif
this
and
this
[
0
]
==
"
\
\
"
:
elif
this
and
this
[
0
]
==
"
\
\
"
:
code
=
_fixescape
(
this
)
code
=
_fixescape
(
this
)
subpattern
.
append
(
code
)
subpattern
.
append
(
code
)
else
:
else
:
raise
SyntaxError
,
"parser error"
raise
SyntaxError
,
"parser error"
return
subpattern
return
subpattern
...
@@ -448,20 +448,20 @@ def parse(source, flags=()):
...
@@ -448,20 +448,20 @@ def parse(source, flags=()):
g
=
Pattern
()
g
=
Pattern
()
b
=
[]
b
=
[]
while
1
:
while
1
:
p
=
_parse
(
s
,
g
,
flags
)
p
=
_parse
(
s
,
g
,
flags
)
tail
=
s
.
get
()
tail
=
s
.
get
()
if
tail
==
"|"
:
if
tail
==
"|"
:
b
.
append
(
p
)
b
.
append
(
p
)
elif
tail
==
")"
:
elif
tail
==
")"
:
raise
SyntaxError
,
"unbalanced parenthesis"
raise
SyntaxError
,
"unbalanced parenthesis"
elif
tail
is
None
:
elif
tail
is
None
:
if
b
:
if
b
:
b
.
append
(
p
)
b
.
append
(
p
)
p
=
SubPattern
(
g
)
p
=
SubPattern
(
g
)
_branch
(
p
,
b
)
_branch
(
p
,
b
)
break
break
else
:
else
:
raise
SyntaxError
,
"bogus characters at end of regular expression"
raise
SyntaxError
,
"bogus characters at end of regular expression"
return
p
return
p
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
@@ -469,23 +469,23 @@ if __name__ == "__main__":
...
@@ -469,23 +469,23 @@ if __name__ == "__main__":
from
testpatterns
import
PATTERNS
from
testpatterns
import
PATTERNS
a
=
b
=
c
=
0
a
=
b
=
c
=
0
for
pattern
,
flags
in
PATTERNS
:
for
pattern
,
flags
in
PATTERNS
:
if
flags
:
if
flags
:
continue
continue
print
"-"
*
68
print
"-"
*
68
try
:
try
:
p
=
parse
(
pattern
)
p
=
parse
(
pattern
)
print
repr
(
pattern
),
"->"
print
repr
(
pattern
),
"->"
pprint
(
p
.
data
)
pprint
(
p
.
data
)
import
sre_compile
import
sre_compile
try
:
try
:
code
=
sre_compile
.
compile
(
p
)
code
=
sre_compile
.
compile
(
p
)
c
=
c
+
1
c
=
c
+
1
except
:
except
:
pass
pass
a
=
a
+
1
a
=
a
+
1
except
SyntaxError
,
v
:
except
SyntaxError
,
v
:
print
"**"
,
repr
(
pattern
),
v
print
"**"
,
repr
(
pattern
),
v
b
=
b
+
1
b
=
b
+
1
print
"-"
*
68
print
"-"
*
68
print
a
,
"of"
,
b
,
"patterns successfully parsed"
print
a
,
"of"
,
b
,
"patterns successfully parsed"
print
c
,
"of"
,
b
,
"patterns successfully compiled"
print
c
,
"of"
,
b
,
"patterns successfully compiled"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment