Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
b1aa1951
Commit
b1aa1951
authored
Jun 01, 2000
by
Jeremy Hylton
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fredrik Lundh: here's the 96.6% version of SRE
parent
0292d78e
Changes
6
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
743 additions
and
303 deletions
+743
-303
Lib/sre.py
Lib/sre.py
+105
-18
Lib/sre_compile.py
Lib/sre_compile.py
+70
-64
Lib/sre_constants.py
Lib/sre_constants.py
+68
-27
Modules/_sre.c
Modules/_sre.c
+448
-188
Modules/sre.h
Modules/sre.h
+29
-5
Modules/sre_constants.h
Modules/sre_constants.h
+23
-1
No files found.
Lib/sre.py
View file @
b1aa1951
# -*- Mode: Python; tab-width: 4 -*-
#
#
# Secret Labs' Regular Expression Engine
# Secret Labs' Regular Expression Engine
# $Id$
# $Id$
...
@@ -7,39 +6,127 @@
...
@@ -7,39 +6,127 @@
#
#
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
#
# This code can only be used for 1.6 alpha testing. All other use
# require explicit permission from Secret Labs AB.
#
# Portions of this engine have been developed in cooperation with
# Portions of this engine have been developed in cooperation with
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
# other compatibility work.
# other compatibility work.
#
#
"""
this is a long string
"""
import
sre_compile
import
sre_compile
# flags
I
=
IGNORECASE
=
sre_compile
.
SRE_FLAG_IGNORECASE
L
=
LOCALE
=
sre_compile
.
SRE_FLAG_LOCALE
M
=
MULTILINE
=
sre_compile
.
SRE_FLAG_MULTILINE
S
=
DOTALL
=
sre_compile
.
SRE_FLAG_DOTALL
X
=
VERBOSE
=
sre_compile
.
SRE_FLAG_VERBOSE
# --------------------------------------------------------------------
# --------------------------------------------------------------------
# public interface
# public interface
def
compile
(
pattern
,
flags
=
0
):
# FIXME: add docstrings
return
sre_compile
.
compile
(
pattern
,
_fixflags
(
flags
))
def
match
(
pattern
,
string
,
flags
=
0
):
def
match
(
pattern
,
string
,
flags
=
0
):
return
compile
(
pattern
,
_fixflags
(
flags
)
).
match
(
string
)
return
_compile
(
pattern
,
flags
).
match
(
string
)
def
search
(
pattern
,
string
,
flags
=
0
):
def
search
(
pattern
,
string
,
flags
=
0
):
return
compile
(
pattern
,
_fixflags
(
flags
)).
search
(
string
)
return
_compile
(
pattern
,
flags
).
search
(
string
)
def
sub
(
pattern
,
repl
,
string
,
count
=
0
):
return
_compile
(
pattern
).
sub
(
repl
,
string
,
count
)
def
subn
(
pattern
,
repl
,
string
,
count
=
0
):
return
_compile
(
pattern
).
subn
(
repl
,
string
,
count
)
def
split
(
pattern
,
string
,
maxsplit
=
0
):
return
_compile
(
pattern
).
split
(
string
,
maxsplit
)
# FIXME: etc
def
findall
(
pattern
,
string
,
maxsplit
=
0
):
return
_compile
(
pattern
).
findall
(
string
,
maxsplit
)
def
compile
(
pattern
,
flags
=
0
):
return
_compile
(
pattern
,
flags
)
def
escape
(
pattern
):
s
=
list
(
pattern
)
for
i
in
range
(
len
(
pattern
)):
c
=
pattern
[
i
]
if
not
(
"a"
<=
c
<=
"z"
or
"A"
<=
c
<=
"Z"
or
"0"
<=
c
<=
"9"
):
if
c
==
"
\
000
"
:
s
[
i
]
=
"
\
\
000"
else
:
s
[
i
]
=
"
\
\
"
+
c
return
pattern
[:
0
].
join
(
s
)
# --------------------------------------------------------------------
# --------------------------------------------------------------------
# helpers
# internals
_cache
=
{}
_MAXCACHE
=
100
def
_compile
(
pattern
,
flags
=
0
):
# internal: compile pattern
tp
=
type
(
pattern
)
if
tp
not
in
(
type
(
""
),
type
(
u""
)):
return
pattern
key
=
(
tp
,
pattern
,
flags
)
try
:
return
_cache
[
key
]
except
KeyError
:
pass
p
=
sre_compile
.
compile
(
pattern
,
flags
)
if
len
(
_cache
)
>=
_MAXCACHE
:
_cache
.
clear
()
_cache
[
key
]
=
p
return
p
def
_sub
(
pattern
,
template
,
string
,
count
=
0
):
# internal: pattern.sub implementation hook
return
_subn
(
pattern
,
template
,
string
,
count
)[
0
]
def
_expand
(
match
,
template
):
# internal: expand template
return
template
# FIXME
def
_fixflags
(
flags
):
def
_subn
(
pattern
,
template
,
string
,
count
=
0
):
# convert flag bitmask to sequence
# internal: pattern.subn implementation hook
assert
not
flags
if
callable
(
template
):
return
()
filter
=
callable
else
:
# FIXME: prepare template
def
filter
(
match
,
template
=
template
):
return
_expand
(
match
,
template
)
n
=
i
=
0
s
=
[]
append
=
s
.
append
c
=
pattern
.
cursor
(
string
)
while
not
count
or
n
<
count
:
m
=
c
.
search
()
if
not
m
:
break
j
=
m
.
start
()
if
j
>
i
:
append
(
string
[
i
:
j
])
append
(
filter
(
m
))
i
=
m
.
end
()
n
=
n
+
1
if
i
<
len
(
string
):
append
(
string
[
i
:])
return
string
[:
0
].
join
(
s
),
n
def
_split
(
pattern
,
string
,
maxsplit
=
0
):
# internal: pattern.split implementation hook
n
=
i
=
0
s
=
[]
append
=
s
.
append
c
=
pattern
.
cursor
(
string
)
while
not
maxsplit
or
n
<
maxsplit
:
m
=
c
.
search
()
if
not
m
:
break
j
=
m
.
start
()
append
(
string
[
i
:
j
])
i
=
m
.
end
()
n
=
n
+
1
if
i
<
len
(
string
):
append
(
string
[
i
:])
return
s
Lib/sre_compile.py
View file @
b1aa1951
...
@@ -14,9 +14,6 @@
...
@@ -14,9 +14,6 @@
# other compatibility work.
# other compatibility work.
#
#
# FIXME: <fl> formalize (objectify?) and document the compiler code
# format, so that other frontends can use this compiler
import
array
,
string
,
sys
import
array
,
string
,
sys
import
_sre
import
_sre
...
@@ -45,64 +42,70 @@ class Code:
...
@@ -45,64 +42,70 @@ class Code:
self
.
data
.
append
(
code
)
self
.
data
.
append
(
code
)
def
todata
(
self
):
def
todata
(
self
):
# print self.data
# print self.data
try
:
return
array
.
array
(
WORDSIZE
,
self
.
data
).
tostring
()
return
array
.
array
(
WORDSIZE
,
self
.
data
).
tostring
()
except
OverflowError
:
print
self
.
data
raise
def
_lower
(
literal
):
def
_compile
(
code
,
pattern
,
flags
,
level
=
0
):
# return _sre._lower(literal) # FIXME
return
string
.
lower
(
literal
)
def
_compile
(
code
,
pattern
,
flags
):
append
=
code
.
append
append
=
code
.
append
for
op
,
av
in
pattern
:
for
op
,
av
in
pattern
:
if
op
is
ANY
:
if
op
is
ANY
:
if
"s"
in
flags
:
if
flags
&
SRE_FLAG_DOTALL
:
append
(
CODES
[
op
])
# any character at all!
append
(
OP
CODES
[
op
])
# any character at all!
else
:
else
:
append
(
CODES
[
NOT_LITERAL
])
append
(
OPCODES
[
CATEGORY
])
append
(
10
)
append
(
CHCODES
[
CATEGORY_NOT_LINEBREAK
]
)
elif
op
in
(
SUCCESS
,
FAILURE
):
elif
op
in
(
SUCCESS
,
FAILURE
):
append
(
CODES
[
op
])
append
(
OP
CODES
[
op
])
elif
op
is
AT
:
elif
op
is
AT
:
append
(
CODES
[
op
])
append
(
OPCODES
[
op
])
append
(
POSITIONS
[
av
])
if
flags
&
SRE_FLAG_MULTILINE
:
append
(
ATCODES
[
AT_MULTILINE
[
av
]])
else
:
append
(
ATCODES
[
av
])
elif
op
is
BRANCH
:
elif
op
is
BRANCH
:
append
(
CODES
[
op
])
append
(
OP
CODES
[
op
])
tail
=
[]
tail
=
[]
for
av
in
av
[
1
]:
for
av
in
av
[
1
]:
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
_compile
(
code
,
av
,
flags
)
_compile
(
code
,
av
,
flags
,
level
)
append
(
CODES
[
JUMP
])
append
(
OP
CODES
[
JUMP
])
tail
.
append
(
len
(
code
));
append
(
0
)
tail
.
append
(
len
(
code
));
append
(
0
)
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
append
(
0
)
# end of branch
append
(
0
)
# end of branch
for
tail
in
tail
:
for
tail
in
tail
:
code
[
tail
]
=
len
(
code
)
-
tail
code
[
tail
]
=
len
(
code
)
-
tail
elif
op
is
CALL
:
elif
op
is
CALL
:
append
(
CODES
[
op
])
append
(
OP
CODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
_compile
(
code
,
av
,
flags
)
_compile
(
code
,
av
,
flags
,
level
+
1
)
append
(
CODES
[
SUCCESS
])
append
(
OP
CODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
CATEGORY
:
# not used by current parser
elif
op
is
CATEGORY
:
# not used by current parser
append
(
CODES
[
op
])
append
(
OPCODES
[
op
])
append
(
CATEGORIES
[
av
])
if
flags
&
SRE_FLAG_LOCALE
:
append
(
CH_LOCALE
[
CHCODES
[
av
]])
else
:
append
(
CHCODES
[
av
])
elif
op
is
GROUP
:
elif
op
is
GROUP
:
if
"i"
in
flags
:
if
flags
&
SRE_FLAG_IGNORECASE
:
append
(
CODES
[
MA
P_IGNORE
[
op
]])
append
(
OPCODES
[
O
P_IGNORE
[
op
]])
else
:
else
:
append
(
CODES
[
op
])
append
(
OP
CODES
[
op
])
append
(
av
)
append
(
av
-
1
)
elif
op
is
IN
:
elif
op
is
IN
:
if
"i"
in
flags
:
if
flags
&
SRE_FLAG_IGNORECASE
:
append
(
CODES
[
MA
P_IGNORE
[
op
]])
append
(
OPCODES
[
O
P_IGNORE
[
op
]])
def
fixup
(
literal
):
def
fixup
(
literal
):
return
ord
(
_lower
(
literal
))
return
ord
(
literal
.
lower
(
))
else
:
else
:
append
(
CODES
[
op
])
append
(
OP
CODES
[
op
])
fixup
=
ord
fixup
=
ord
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
for
op
,
av
in
av
:
for
op
,
av
in
av
:
append
(
CODES
[
op
])
append
(
OP
CODES
[
op
])
if
op
is
NEGATE
:
if
op
is
NEGATE
:
pass
pass
elif
op
is
LITERAL
:
elif
op
is
LITERAL
:
...
@@ -111,58 +114,60 @@ def _compile(code, pattern, flags):
...
@@ -111,58 +114,60 @@ def _compile(code, pattern, flags):
append
(
fixup
(
av
[
0
]))
append
(
fixup
(
av
[
0
]))
append
(
fixup
(
av
[
1
]))
append
(
fixup
(
av
[
1
]))
elif
op
is
CATEGORY
:
elif
op
is
CATEGORY
:
append
(
CATEGORIES
[
av
])
if
flags
&
SRE_FLAG_LOCALE
:
append
(
CH_LOCALE
[
CHCODES
[
av
]])
else
:
append
(
CHCODES
[
av
])
else
:
else
:
raise
ValueError
,
"unsupported set operator"
raise
ValueError
,
"unsupported set operator"
append
(
CODES
[
FAILURE
])
append
(
OP
CODES
[
FAILURE
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
if
"i"
in
flags
:
if
flags
&
SRE_FLAG_IGNORECASE
:
append
(
CODES
[
MA
P_IGNORE
[
op
]])
append
(
OPCODES
[
O
P_IGNORE
[
op
]])
append
(
ord
(
_lower
(
av
)))
append
(
ord
(
av
.
lower
(
)))
else
:
else
:
append
(
CODES
[
op
])
append
(
OP
CODES
[
op
])
append
(
ord
(
av
))
append
(
ord
(
av
))
elif
op
is
MARK
:
elif
op
is
MARK
:
append
(
CODES
[
op
])
append
(
OP
CODES
[
op
])
append
(
av
)
append
(
av
)
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
lo
,
hi
=
av
[
2
].
getwidth
()
lo
,
hi
=
av
[
2
].
getwidth
()
if
lo
==
0
:
if
lo
==
0
:
raise
SyntaxError
,
"cannot repeat zero-width items"
raise
SyntaxError
,
"cannot repeat zero-width items"
if
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
if
lo
==
hi
==
1
and
op
is
MAX_REPEAT
:
append
(
CODES
[
MAX_REPEAT_ONE
])
append
(
OP
CODES
[
MAX_REPEAT_ONE
])
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
0
])
append
(
av
[
1
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
_compile
(
code
,
av
[
2
],
flags
,
level
+
1
)
append
(
CODES
[
SUCCESS
])
append
(
OP
CODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
else
:
else
:
append
(
CODES
[
op
])
append
(
OP
CODES
[
op
])
skip
=
len
(
code
);
append
(
0
)
skip
=
len
(
code
);
append
(
0
)
append
(
av
[
0
])
append
(
av
[
0
])
append
(
av
[
1
])
append
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
_compile
(
code
,
av
[
2
],
flags
,
level
+
1
)
if
op
is
MIN_REPEAT
:
if
op
is
MIN_REPEAT
:
append
(
CODES
[
MIN_UNTIL
])
append
(
OP
CODES
[
MIN_UNTIL
])
else
:
else
:
# FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
append
(
OPCODES
[
MAX_UNTIL
])
append
(
CODES
[
MAX_UNTIL
])
code
[
skip
]
=
len
(
code
)
-
skip
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
SUBPATTERN
:
elif
op
is
SUBPATTERN
:
##
group = av[0]
group
=
av
[
0
]
##
if group:
if
group
:
## append(
CODES[MARK])
append
(
OP
CODES
[
MARK
])
##
append((group-1)*2)
append
((
group
-
1
)
*
2
)
_compile
(
code
,
av
[
1
],
flags
)
_compile
(
code
,
av
[
1
],
flags
,
level
+
1
)
##
if group:
if
group
:
## append(
CODES[MARK])
append
(
OP
CODES
[
MARK
])
##
append((group-1)*2+1)
append
((
group
-
1
)
*
2
+
1
)
else
:
else
:
raise
ValueError
,
(
"unsupported operand type"
,
op
)
raise
ValueError
,
(
"unsupported operand type"
,
op
)
def
compile
(
p
,
flags
=
()
):
def
compile
(
p
,
flags
=
0
):
# convert pattern list to internal format
# convert pattern list to internal format
if
type
(
p
)
in
(
type
(
""
),
type
(
u""
)):
if
type
(
p
)
in
(
type
(
""
),
type
(
u""
)):
import
sre_parse
import
sre_parse
...
@@ -170,12 +175,10 @@ def compile(p, flags=()):
...
@@ -170,12 +175,10 @@ def compile(p, flags=()):
p
=
sre_parse
.
parse
(
p
)
p
=
sre_parse
.
parse
(
p
)
else
:
else
:
pattern
=
None
pattern
=
None
# print p.getwidth()
flags
=
p
.
pattern
.
flags
|
flags
# print p
code
=
Code
()
code
=
Code
()
_compile
(
code
,
p
.
data
,
p
.
pattern
.
flags
)
_compile
(
code
,
p
.
data
,
flags
)
code
.
append
(
CODES
[
SUCCESS
])
code
.
append
(
OPCODES
[
SUCCESS
])
# print list(code.data)
data
=
code
.
todata
()
data
=
code
.
todata
()
if
0
:
# debugging
if
0
:
# debugging
print
print
...
@@ -183,5 +186,8 @@ def compile(p, flags=()):
...
@@ -183,5 +186,8 @@ def compile(p, flags=()):
import
sre_disasm
import
sre_disasm
sre_disasm
.
disasm
(
data
)
sre_disasm
.
disasm
(
data
)
print
"-"
*
68
print
"-"
*
68
# print len(data), p.pattern.groups, len(p.pattern.groupdict)
return
_sre
.
compile
(
return
_sre
.
compile
(
pattern
,
data
,
p
.
pattern
.
groups
-
1
,
p
.
pattern
.
groupdict
)
pattern
,
flags
,
data
,
p
.
pattern
.
groups
-
1
,
p
.
pattern
.
groupdict
)
Lib/sre_constants.py
View file @
b1aa1951
...
@@ -48,20 +48,31 @@ SUBPATTERN = "subpattern"
...
@@ -48,20 +48,31 @@ SUBPATTERN = "subpattern"
# positions
# positions
AT_BEGINNING
=
"at_beginning"
AT_BEGINNING
=
"at_beginning"
AT_BEGINNING_LINE
=
"at_beginning_line"
AT_BOUNDARY
=
"at_boundary"
AT_BOUNDARY
=
"at_boundary"
AT_NON_BOUNDARY
=
"at_non_boundary"
AT_NON_BOUNDARY
=
"at_non_boundary"
AT_END
=
"at_end"
AT_END
=
"at_end"
AT_END_LINE
=
"at_end_line"
# categories
# categories
CATEGORY_DIGIT
=
"category_digit"
CATEGORY_DIGIT
=
"category_digit"
CATEGORY_NOT_DIGIT
=
"category_not_digit"
CATEGORY_NOT_DIGIT
=
"category_not_digit"
CATEGORY_SPACE
=
"category_space"
CATEGORY_SPACE
=
"category_space"
CATEGORY_NOT_SPACE
=
"category_not_space"
CATEGORY_NOT_SPACE
=
"category_not_space"
CATEGORY_WORD
=
"category_word"
CATEGORY_WORD
=
"category_word"
CATEGORY_NOT_WORD
=
"category_not_word"
CATEGORY_NOT_WORD
=
"category_not_word"
CATEGORY_LINEBREAK
=
"category_linebreak"
CATEGORY_NOT_LINEBREAK
=
"category_not_linebreak"
CATEGORY_LOC_DIGIT
=
"category_loc_digit"
CATEGORY_LOC_NOT_DIGIT
=
"category_loc_not_digit"
CATEGORY_LOC_SPACE
=
"category_loc_space"
CATEGORY_LOC_NOT_SPACE
=
"category_loc_not_space"
CATEGORY_LOC_WORD
=
"category_loc_word"
CATEGORY_LOC_NOT_WORD
=
"category_loc_not_word"
CATEGORY_LOC_LINEBREAK
=
"category_loc_linebreak"
CATEGORY_LOC_NOT_LINEBREAK
=
"category_loc_not_linebreak"
CODES
=
[
OP
CODES
=
[
# failure=0 success=1 (just because it looks better that way :-)
# failure=0 success=1 (just because it looks better that way :-)
FAILURE
,
SUCCESS
,
FAILURE
,
SUCCESS
,
...
@@ -87,45 +98,75 @@ CODES = [
...
@@ -87,45 +98,75 @@ CODES = [
]
]
# convert to dictionary
ATCODES
=
[
c
=
{}
AT_BEGINNING
,
AT_BEGINNING_LINE
,
AT_BOUNDARY
,
i
=
0
AT_NON_BOUNDARY
,
AT_END
,
AT_END_LINE
for
code
in
CODES
:
]
c
[
code
]
=
i
CHCODES
=
[
CATEGORY_DIGIT
,
CATEGORY_NOT_DIGIT
,
CATEGORY_SPACE
,
CATEGORY_NOT_SPACE
,
CATEGORY_WORD
,
CATEGORY_NOT_WORD
,
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
,
CATEGORY_LOC_DIGIT
,
CATEGORY_LOC_NOT_DIGIT
,
CATEGORY_LOC_SPACE
,
CATEGORY_LOC_NOT_SPACE
,
CATEGORY_LOC_WORD
,
CATEGORY_LOC_NOT_WORD
,
CATEGORY_LOC_LINEBREAK
,
CATEGORY_LOC_NOT_LINEBREAK
]
def
makedict
(
list
):
d
=
{}
i
=
0
for
item
in
list
:
d
[
item
]
=
i
i
=
i
+
1
i
=
i
+
1
CODES
=
c
return
d
OPCODES
=
makedict
(
OPCODES
)
ATCODES
=
makedict
(
ATCODES
)
CHCODES
=
makedict
(
CHCODES
)
# replacement operations for "ignore case" mode
# replacement operations for "ignore case" mode
MA
P_IGNORE
=
{
O
P_IGNORE
=
{
GROUP
:
GROUP_IGNORE
,
GROUP
:
GROUP_IGNORE
,
IN
:
IN_IGNORE
,
IN
:
IN_IGNORE
,
LITERAL
:
LITERAL_IGNORE
,
LITERAL
:
LITERAL_IGNORE
,
NOT_LITERAL
:
NOT_LITERAL_IGNORE
NOT_LITERAL
:
NOT_LITERAL_IGNORE
}
}
POSITIONS
=
{
AT_MULTILINE
=
{
AT_BEGINNING
:
ord
(
"a"
),
AT_BEGINNING
:
AT_BEGINNING_LINE
,
AT_BOUNDARY
:
ord
(
"b"
),
AT_END
:
AT_END_LINE
AT_NON_BOUNDARY
:
ord
(
"B"
),
AT_END
:
ord
(
"z"
),
}
}
CATEGORIES
=
{
CH_LOCALE
=
{
CATEGORY_DIGIT
:
ord
(
"d"
),
CATEGORY_DIGIT
:
CATEGORY_LOC_DIGIT
,
CATEGORY_NOT_DIGIT
:
ord
(
"D"
),
CATEGORY_NOT_DIGIT
:
CATEGORY_LOC_NOT_DIGIT
,
CATEGORY_SPACE
:
ord
(
"s"
),
CATEGORY_SPACE
:
CATEGORY_LOC_SPACE
,
CATEGORY_NOT_SPACE
:
ord
(
"S"
),
CATEGORY_NOT_SPACE
:
CATEGORY_LOC_NOT_SPACE
,
CATEGORY_WORD
:
ord
(
"w"
),
CATEGORY_WORD
:
CATEGORY_LOC_WORD
,
CATEGORY_NOT_WORD
:
ord
(
"W"
),
CATEGORY_NOT_WORD
:
CATEGORY_LOC_NOT_WORD
,
CATEGORY_LINEBREAK
:
CATEGORY_LOC_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
:
CATEGORY_LOC_NOT_LINEBREAK
}
}
# flags
SRE_FLAG_TEMPLATE
=
1
# NYI
SRE_FLAG_IGNORECASE
=
2
SRE_FLAG_LOCALE
=
4
SRE_FLAG_MULTILINE
=
8
SRE_FLAG_DOTALL
=
16
SRE_FLAG_VERBOSE
=
32
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
import
string
import
string
items
=
CODES
.
items
()
def
dump
(
f
,
d
,
prefix
):
items
=
d
.
items
()
items
.
sort
(
lambda
a
,
b
:
cmp
(
a
[
1
],
b
[
1
]))
items
.
sort
(
lambda
a
,
b
:
cmp
(
a
[
1
],
b
[
1
]))
f
=
open
(
"sre_constants.h"
,
"w"
)
f
.
write
(
"/* generated by sre_constants.py */
\
n
"
)
for
k
,
v
in
items
:
for
k
,
v
in
items
:
f
.
write
(
"#define SRE_OP_"
+
string
.
upper
(
k
)
+
" "
+
str
(
v
)
+
"
\
n
"
)
f
.
write
(
"#define %s_%s %s
\
n
"
%
(
prefix
,
string
.
upper
(
k
),
v
))
f
=
open
(
"sre_constants.h"
,
"w"
)
f
.
write
(
"/* generated from sre_constants.py */
\
n
"
)
dump
(
f
,
OPCODES
,
"SRE_OP"
)
dump
(
f
,
ATCODES
,
"SRE"
)
dump
(
f
,
CHCODES
,
"SRE"
)
f
.
close
()
f
.
close
()
print
"done"
print
"done"
Modules/_sre.c
View file @
b1aa1951
This diff is collapsed.
Click to expand it.
Modules/sre.h
View file @
b1aa1951
...
@@ -14,17 +14,18 @@
...
@@ -14,17 +14,18 @@
#include "sre_constants.h"
#include "sre_constants.h"
/* Python objects */
typedef
struct
{
typedef
struct
{
PyObject_HEAD
PyObject_HEAD
PyObject
*
code
;
/* link to the code string object */
PyObject
*
code
;
/* link to the code string object */
PyObject
*
pattern
;
/* link to the pattern source (or None) */
int
groups
;
int
groups
;
PyObject
*
groupindex
;
PyObject
*
groupindex
;
/* compatibility */
PyObject
*
pattern
;
/* pattern source (or None) */
int
flags
;
/* flags used when compiling pattern source */
}
PatternObject
;
}
PatternObject
;
#define PatternObject_GetCode(o) ((void*) PyString_AS_STRING((o)->code))
#define PatternObject_GetCode(o)\
((void*) PyString_AS_STRING(((PatternObject*)(o))->code))
typedef
struct
{
typedef
struct
{
PyObject_HEAD
PyObject_HEAD
...
@@ -34,5 +35,28 @@ typedef struct {
...
@@ -34,5 +35,28 @@ typedef struct {
int
mark
[
2
];
int
mark
[
2
];
}
MatchObject
;
}
MatchObject
;
#endif
typedef
struct
{
/* string pointers */
void
*
ptr
;
/* current position (also end of current slice) */
void
*
beginning
;
/* start of original string */
void
*
start
;
/* start of current slice */
void
*
end
;
/* end of original string */
/* character size */
int
charsize
;
/* registers */
int
marks
;
void
*
mark
[
64
];
/* FIXME: <fl> should be dynamically allocated! */
/* backtracking stack */
void
**
stack
;
int
stacksize
;
int
stackbase
;
}
SRE_STATE
;
typedef
struct
{
PyObject_HEAD
PyObject
*
pattern
;
PyObject
*
string
;
SRE_STATE
state
;
}
CursorObject
;
#endif
Modules/sre_constants.h
View file @
b1aa1951
/* generated
by
sre_constants.py */
/* generated
from
sre_constants.py */
#define SRE_OP_FAILURE 0
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
#define SRE_OP_ANY 2
...
@@ -25,3 +25,25 @@
...
@@ -25,3 +25,25 @@
#define SRE_OP_NEGATE 23
#define SRE_OP_NEGATE 23
#define SRE_OP_RANGE 24
#define SRE_OP_RANGE 24
#define SRE_OP_REPEAT 25
#define SRE_OP_REPEAT 25
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
#define SRE_AT_BOUNDARY 2
#define SRE_AT_NON_BOUNDARY 3
#define SRE_AT_END 4
#define SRE_AT_END_LINE 5
#define SRE_CATEGORY_DIGIT 0
#define SRE_CATEGORY_NOT_DIGIT 1
#define SRE_CATEGORY_SPACE 2
#define SRE_CATEGORY_NOT_SPACE 3
#define SRE_CATEGORY_WORD 4
#define SRE_CATEGORY_NOT_WORD 5
#define SRE_CATEGORY_LINEBREAK 6
#define SRE_CATEGORY_NOT_LINEBREAK 7
#define SRE_CATEGORY_LOC_DIGIT 8
#define SRE_CATEGORY_LOC_NOT_DIGIT 9
#define SRE_CATEGORY_LOC_SPACE 10
#define SRE_CATEGORY_LOC_NOT_SPACE 11
#define SRE_CATEGORY_LOC_WORD 12
#define SRE_CATEGORY_LOC_NOT_WORD 13
#define SRE_CATEGORY_LOC_LINEBREAK 14
#define SRE_CATEGORY_LOC_NOT_LINEBREAK 15
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment