Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
c7f7d389
Commit
c7f7d389
authored
Nov 09, 2014
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #22434: Constants in sre_constants are now named constants (enum-like).
parent
bf764a19
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
130 additions
and
189 deletions
+130
-189
Lib/sre_compile.py
Lib/sre_compile.py
+37
-38
Lib/sre_constants.py
Lib/sre_constants.py
+73
-130
Lib/sre_parse.py
Lib/sre_parse.py
+4
-5
Lib/test/test_re.py
Lib/test/test_re.py
+16
-16
No files found.
Lib/sre_compile.py
View file @
c7f7d389
...
...
@@ -13,7 +13,6 @@
import
_sre
import
sre_parse
from
sre_constants
import
*
from
_sre
import
MAXREPEAT
assert
_sre
.
MAGIC
==
MAGIC
,
"SRE module mismatch"
...
...
@@ -38,65 +37,65 @@ def _compile(code, pattern, flags):
for
op
,
av
in
pattern
:
if
op
in
LITERAL_CODES
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OP
CODES
[
OP_IGNORE
[
op
]
])
emit
(
OP
_IGNORE
[
op
])
emit
(
_sre
.
getlower
(
av
,
flags
))
else
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
emit
(
av
)
elif
op
is
IN
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OP
CODES
[
OP_IGNORE
[
op
]
])
emit
(
OP
_IGNORE
[
op
])
def
fixup
(
literal
,
flags
=
flags
):
return
_sre
.
getlower
(
literal
,
flags
)
else
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
fixup
=
None
skip
=
_len
(
code
);
emit
(
0
)
_compile_charset
(
av
,
flags
,
code
,
fixup
)
code
[
skip
]
=
_len
(
code
)
-
skip
elif
op
is
ANY
:
if
flags
&
SRE_FLAG_DOTALL
:
emit
(
OPCODES
[
ANY_ALL
]
)
emit
(
ANY_ALL
)
else
:
emit
(
OPCODES
[
ANY
]
)
emit
(
ANY
)
elif
op
in
REPEATING_CODES
:
if
flags
&
SRE_FLAG_TEMPLATE
:
raise
error
(
"internal: unsupported template operator"
)
elif
_simple
(
av
)
and
op
is
not
REPEAT
:
if
op
is
MAX_REPEAT
:
emit
(
OPCODES
[
REPEAT_ONE
]
)
emit
(
REPEAT_ONE
)
else
:
emit
(
OPCODES
[
MIN_REPEAT_ONE
]
)
emit
(
MIN_REPEAT_ONE
)
skip
=
_len
(
code
);
emit
(
0
)
emit
(
av
[
0
])
emit
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
emit
(
OPCODES
[
SUCCESS
]
)
emit
(
SUCCESS
)
code
[
skip
]
=
_len
(
code
)
-
skip
else
:
emit
(
OPCODES
[
REPEAT
]
)
emit
(
REPEAT
)
skip
=
_len
(
code
);
emit
(
0
)
emit
(
av
[
0
])
emit
(
av
[
1
])
_compile
(
code
,
av
[
2
],
flags
)
code
[
skip
]
=
_len
(
code
)
-
skip
if
op
is
MAX_REPEAT
:
emit
(
OPCODES
[
MAX_UNTIL
]
)
emit
(
MAX_UNTIL
)
else
:
emit
(
OPCODES
[
MIN_UNTIL
]
)
emit
(
MIN_UNTIL
)
elif
op
is
SUBPATTERN
:
if
av
[
0
]:
emit
(
OPCODES
[
MARK
]
)
emit
(
MARK
)
emit
((
av
[
0
]
-
1
)
*
2
)
# _compile_info(code, av[1], flags)
_compile
(
code
,
av
[
1
],
flags
)
if
av
[
0
]:
emit
(
OPCODES
[
MARK
]
)
emit
(
MARK
)
emit
((
av
[
0
]
-
1
)
*
2
+
1
)
elif
op
in
SUCCESS_CODES
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
elif
op
in
ASSERT_CODES
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
skip
=
_len
(
code
);
emit
(
0
)
if
av
[
0
]
>=
0
:
emit
(
0
)
# look ahead
...
...
@@ -106,57 +105,57 @@ def _compile(code, pattern, flags):
raise
error
(
"look-behind requires fixed-width pattern"
)
emit
(
lo
)
# look behind
_compile
(
code
,
av
[
1
],
flags
)
emit
(
OPCODES
[
SUCCESS
]
)
emit
(
SUCCESS
)
code
[
skip
]
=
_len
(
code
)
-
skip
elif
op
is
CALL
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
skip
=
_len
(
code
);
emit
(
0
)
_compile
(
code
,
av
,
flags
)
emit
(
OPCODES
[
SUCCESS
]
)
emit
(
SUCCESS
)
code
[
skip
]
=
_len
(
code
)
-
skip
elif
op
is
AT
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
if
flags
&
SRE_FLAG_MULTILINE
:
av
=
AT_MULTILINE
.
get
(
av
,
av
)
if
flags
&
SRE_FLAG_LOCALE
:
av
=
AT_LOCALE
.
get
(
av
,
av
)
elif
flags
&
SRE_FLAG_UNICODE
:
av
=
AT_UNICODE
.
get
(
av
,
av
)
emit
(
ATCODES
[
av
]
)
emit
(
av
)
elif
op
is
BRANCH
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
tail
=
[]
tailappend
=
tail
.
append
for
av
in
av
[
1
]:
skip
=
_len
(
code
);
emit
(
0
)
# _compile_info(code, av, flags)
_compile
(
code
,
av
,
flags
)
emit
(
OPCODES
[
JUMP
]
)
emit
(
JUMP
)
tailappend
(
_len
(
code
));
emit
(
0
)
code
[
skip
]
=
_len
(
code
)
-
skip
emit
(
0
)
# end of branch
for
tail
in
tail
:
code
[
tail
]
=
_len
(
code
)
-
tail
elif
op
is
CATEGORY
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
if
flags
&
SRE_FLAG_LOCALE
:
av
=
CH_LOCALE
[
av
]
elif
flags
&
SRE_FLAG_UNICODE
:
av
=
CH_UNICODE
[
av
]
emit
(
CHCODES
[
av
]
)
emit
(
av
)
elif
op
is
GROUPREF
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OP
CODES
[
OP_IGNORE
[
op
]
])
emit
(
OP
_IGNORE
[
op
])
else
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
emit
(
av
-
1
)
elif
op
is
GROUPREF_EXISTS
:
emit
(
OPCODES
[
op
]
)
emit
(
op
)
emit
(
av
[
0
]
-
1
)
skipyes
=
_len
(
code
);
emit
(
0
)
_compile
(
code
,
av
[
1
],
flags
)
if
av
[
2
]:
emit
(
OPCODES
[
JUMP
]
)
emit
(
JUMP
)
skipno
=
_len
(
code
);
emit
(
0
)
code
[
skipyes
]
=
_len
(
code
)
-
skipyes
+
1
_compile
(
code
,
av
[
2
],
flags
)
...
...
@@ -170,7 +169,7 @@ def _compile_charset(charset, flags, code, fixup=None):
# compile charset subprogram
emit
=
code
.
append
for
op
,
av
in
_optimize_charset
(
charset
,
fixup
):
emit
(
OPCODES
[
op
]
)
emit
(
op
)
if
op
is
NEGATE
:
pass
elif
op
is
LITERAL
:
...
...
@@ -184,14 +183,14 @@ def _compile_charset(charset, flags, code, fixup=None):
code
.
extend
(
av
)
elif
op
is
CATEGORY
:
if
flags
&
SRE_FLAG_LOCALE
:
emit
(
CH
CODES
[
CH_LOCALE
[
av
]
])
emit
(
CH
_LOCALE
[
av
])
elif
flags
&
SRE_FLAG_UNICODE
:
emit
(
CH
CODES
[
CH_UNICODE
[
av
]
])
emit
(
CH
_UNICODE
[
av
])
else
:
emit
(
CHCODES
[
av
]
)
emit
(
av
)
else
:
raise
error
(
"internal: unsupported set operator"
)
emit
(
OPCODES
[
FAILURE
]
)
emit
(
FAILURE
)
def
_optimize_charset
(
charset
,
fixup
):
# internal: optimize character set
...
...
@@ -414,7 +413,7 @@ def _compile_info(code, pattern, flags):
## print "*** CHARSET", charset
# add an info block
emit
=
code
.
append
emit
(
OPCODES
[
INFO
]
)
emit
(
INFO
)
skip
=
len
(
code
);
emit
(
0
)
# literal flag
mask
=
0
...
...
@@ -460,7 +459,7 @@ def _code(p, flags):
# compile the pattern
_compile
(
code
,
p
.
data
,
flags
)
code
.
append
(
OPCODES
[
SUCCESS
]
)
code
.
append
(
SUCCESS
)
return
code
...
...
@@ -475,7 +474,7 @@ def compile(p, flags=0):
code
=
_code
(
p
,
flags
)
# print
code
# print
(code)
# map in either direction
groupindex
=
p
.
pattern
.
groupdict
...
...
Lib/sre_constants.py
View file @
c7f7d389
...
...
@@ -23,138 +23,81 @@ from _sre import MAXREPEAT, MAXGROUPS
class
error
(
Exception
):
pass
# operators
FAILURE
=
"failure"
SUCCESS
=
"success"
ANY
=
"any"
ANY_ALL
=
"any_all"
ASSERT
=
"assert"
ASSERT_NOT
=
"assert_not"
AT
=
"at"
BIGCHARSET
=
"bigcharset"
BRANCH
=
"branch"
CALL
=
"call"
CATEGORY
=
"category"
CHARSET
=
"charset"
GROUPREF
=
"groupref"
GROUPREF_IGNORE
=
"groupref_ignore"
GROUPREF_EXISTS
=
"groupref_exists"
IN
=
"in"
IN_IGNORE
=
"in_ignore"
INFO
=
"info"
JUMP
=
"jump"
LITERAL
=
"literal"
LITERAL_IGNORE
=
"literal_ignore"
MARK
=
"mark"
MAX_REPEAT
=
"max_repeat"
MAX_UNTIL
=
"max_until"
MIN_REPEAT
=
"min_repeat"
MIN_UNTIL
=
"min_until"
NEGATE
=
"negate"
NOT_LITERAL
=
"not_literal"
NOT_LITERAL_IGNORE
=
"not_literal_ignore"
RANGE
=
"range"
RANGE_IGNORE
=
"range_ignore"
REPEAT
=
"repeat"
REPEAT_ONE
=
"repeat_one"
SUBPATTERN
=
"subpattern"
MIN_REPEAT_ONE
=
"min_repeat_one"
class
_NamedIntConstant
(
int
):
def
__new__
(
cls
,
value
,
name
):
self
=
super
(
_NamedIntConstant
,
cls
).
__new__
(
cls
,
value
)
self
.
name
=
name
return
self
def
__str__
(
self
):
return
self
.
name
__repr__
=
__str__
MAXREPEAT
=
_NamedIntConstant
(
MAXREPEAT
,
'MAXREPEAT'
)
def
_makecodes
(
names
):
names
=
names
.
strip
().
split
()
items
=
[
_NamedIntConstant
(
i
,
name
)
for
i
,
name
in
enumerate
(
names
)]
globals
().
update
({
item
.
name
:
item
for
item
in
items
})
return
items
# operators
# failure=0 success=1 (just because it looks better that way :-)
OPCODES
=
_makecodes
(
"""
FAILURE SUCCESS
ANY ANY_ALL
ASSERT ASSERT_NOT
AT
BRANCH
CALL
CATEGORY
CHARSET BIGCHARSET
GROUPREF GROUPREF_EXISTS GROUPREF_IGNORE
IN IN_IGNORE
INFO
JUMP
LITERAL LITERAL_IGNORE
MARK
MAX_UNTIL
MIN_UNTIL
NOT_LITERAL NOT_LITERAL_IGNORE
NEGATE
RANGE
REPEAT
REPEAT_ONE
SUBPATTERN
MIN_REPEAT_ONE
RANGE_IGNORE
MIN_REPEAT MAX_REPEAT
"""
)
del
OPCODES
[
-
2
:]
# remove MIN_REPEAT and MAX_REPEAT
# positions
AT_BEGINNING
=
"at_beginning"
AT_BEGINNING_LINE
=
"at_beginning_line"
AT_BEGINNING_STRING
=
"at_beginning_string"
AT_BOUNDARY
=
"at_boundary"
AT_NON_BOUNDARY
=
"at_non_boundary"
AT_END
=
"at_end"
AT_END_LINE
=
"at_end_line"
AT_END_STRING
=
"at_end_string"
AT_LOC_BOUNDARY
=
"at_loc_boundary"
AT_LOC_NON_BOUNDARY
=
"at_loc_non_boundary"
AT_UNI_BOUNDARY
=
"at_uni_boundary"
AT_UNI_NON_BOUNDARY
=
"at_uni_non_boundary"
ATCODES
=
_makecodes
(
"""
AT_BEGINNING AT_BEGINNING_LINE AT_BEGINNING_STRING
AT_BOUNDARY AT_NON_BOUNDARY
AT_END AT_END_LINE AT_END_STRING
AT_LOC_BOUNDARY AT_LOC_NON_BOUNDARY
AT_UNI_BOUNDARY AT_UNI_NON_BOUNDARY
"""
)
# categories
CATEGORY_DIGIT
=
"category_digit"
CATEGORY_NOT_DIGIT
=
"category_not_digit"
CATEGORY_SPACE
=
"category_space"
CATEGORY_NOT_SPACE
=
"category_not_space"
CATEGORY_WORD
=
"category_word"
CATEGORY_NOT_WORD
=
"category_not_word"
CATEGORY_LINEBREAK
=
"category_linebreak"
CATEGORY_NOT_LINEBREAK
=
"category_not_linebreak"
CATEGORY_LOC_WORD
=
"category_loc_word"
CATEGORY_LOC_NOT_WORD
=
"category_loc_not_word"
CATEGORY_UNI_DIGIT
=
"category_uni_digit"
CATEGORY_UNI_NOT_DIGIT
=
"category_uni_not_digit"
CATEGORY_UNI_SPACE
=
"category_uni_space"
CATEGORY_UNI_NOT_SPACE
=
"category_uni_not_space"
CATEGORY_UNI_WORD
=
"category_uni_word"
CATEGORY_UNI_NOT_WORD
=
"category_uni_not_word"
CATEGORY_UNI_LINEBREAK
=
"category_uni_linebreak"
CATEGORY_UNI_NOT_LINEBREAK
=
"category_uni_not_linebreak"
OPCODES
=
[
# failure=0 success=1 (just because it looks better that way :-)
FAILURE
,
SUCCESS
,
ANY
,
ANY_ALL
,
ASSERT
,
ASSERT_NOT
,
AT
,
BRANCH
,
CALL
,
CATEGORY
,
CHARSET
,
BIGCHARSET
,
GROUPREF
,
GROUPREF_EXISTS
,
GROUPREF_IGNORE
,
IN
,
IN_IGNORE
,
INFO
,
JUMP
,
LITERAL
,
LITERAL_IGNORE
,
MARK
,
MAX_UNTIL
,
MIN_UNTIL
,
NOT_LITERAL
,
NOT_LITERAL_IGNORE
,
NEGATE
,
RANGE
,
REPEAT
,
REPEAT_ONE
,
SUBPATTERN
,
MIN_REPEAT_ONE
,
RANGE_IGNORE
,
]
ATCODES
=
[
AT_BEGINNING
,
AT_BEGINNING_LINE
,
AT_BEGINNING_STRING
,
AT_BOUNDARY
,
AT_NON_BOUNDARY
,
AT_END
,
AT_END_LINE
,
AT_END_STRING
,
AT_LOC_BOUNDARY
,
AT_LOC_NON_BOUNDARY
,
AT_UNI_BOUNDARY
,
AT_UNI_NON_BOUNDARY
]
CHCODES
=
[
CATEGORY_DIGIT
,
CATEGORY_NOT_DIGIT
,
CATEGORY_SPACE
,
CATEGORY_NOT_SPACE
,
CATEGORY_WORD
,
CATEGORY_NOT_WORD
,
CATEGORY_LINEBREAK
,
CATEGORY_NOT_LINEBREAK
,
CATEGORY_LOC_WORD
,
CATEGORY_LOC_NOT_WORD
,
CATEGORY_UNI_DIGIT
,
CATEGORY_UNI_NOT_DIGIT
,
CATEGORY_UNI_SPACE
,
CATEGORY_UNI_NOT_SPACE
,
CATEGORY_UNI_WORD
,
CATEGORY_UNI_NOT_WORD
,
CATEGORY_UNI_LINEBREAK
,
CATEGORY_UNI_NOT_LINEBREAK
]
def
makedict
(
list
):
d
=
{}
i
=
0
for
item
in
list
:
d
[
item
]
=
i
i
=
i
+
1
return
d
OPCODES
=
makedict
(
OPCODES
)
ATCODES
=
makedict
(
ATCODES
)
CHCODES
=
makedict
(
CHCODES
)
CHCODES
=
_makecodes
(
"""
CATEGORY_DIGIT CATEGORY_NOT_DIGIT
CATEGORY_SPACE CATEGORY_NOT_SPACE
CATEGORY_WORD CATEGORY_NOT_WORD
CATEGORY_LINEBREAK CATEGORY_NOT_LINEBREAK
CATEGORY_LOC_WORD CATEGORY_LOC_NOT_WORD
CATEGORY_UNI_DIGIT CATEGORY_UNI_NOT_DIGIT
CATEGORY_UNI_SPACE CATEGORY_UNI_NOT_SPACE
CATEGORY_UNI_WORD CATEGORY_UNI_NOT_WORD
CATEGORY_UNI_LINEBREAK CATEGORY_UNI_NOT_LINEBREAK
"""
)
# replacement operations for "ignore case" mode
OP_IGNORE
=
{
...
...
@@ -220,9 +163,9 @@ SRE_INFO_CHARSET = 4 # pattern starts with character from given set
if
__name__
==
"__main__"
:
def
dump
(
f
,
d
,
prefix
):
items
=
sorted
(
d
.
items
(),
key
=
lambda
a
:
a
[
1
]
)
for
k
,
v
in
items
:
f
.
write
(
"#define %s_%s %
s
\
n
"
%
(
prefix
,
k
.
upper
(),
v
))
items
=
sorted
(
d
)
for
item
in
items
:
f
.
write
(
"#define %s_%s %
d
\
n
"
%
(
prefix
,
item
,
item
))
f
=
open
(
"sre_constants.h"
,
"w"
)
f
.
write
(
"""
\
/*
...
...
Lib/sre_parse.py
View file @
c7f7d389
...
...
@@ -13,7 +13,6 @@
# XXX: show string offset and offending character for all errors
from
sre_constants
import
*
from
_sre
import
MAXREPEAT
SPECIAL_CHARS
=
".
\
\
[{()*+?^$|"
REPEAT_CHARS
=
"*+?{"
...
...
@@ -103,24 +102,24 @@ class SubPattern:
nl = True
seqtypes = (tuple, list)
for op, av in self.data:
print(level*"
" +
op
, end='')
print(level*"
" +
str(op)
, end='')
if op == IN:
# member sublanguage
print()
for op, a in av:
print((level+1)*"
" +
op
, a)
print((level+1)*"
" +
str(op)
, a)
elif op == BRANCH:
print()
for i, a in enumerate(av[1]):
if i:
print(level*"
" + "
or
")
print(level*"
" + "
OR
")
a.dump(level+1)
elif op == GROUPREF_EXISTS:
condgroup, item_yes, item_no = av
print('', condgroup)
item_yes.dump(level+1)
if item_no:
print(level*"
" + "
else
")
print(level*"
" + "
ELSE
")
item_no.dump(level+1)
elif isinstance(av, seqtypes):
nl = False
...
...
Lib/test/test_re.py
View file @
c7f7d389
...
...
@@ -1285,22 +1285,22 @@ class ReTests(unittest.TestCase):
with captured_stdout() as out:
re.compile(pat, re.DEBUG)
dump = '''
\
subpattern
1
literal
46
subpattern
None
branch
in
literal
99
literal
104
or
literal
112
literal
121
subpattern
None
groupref_exists
1
at at_end
else
literal
58
literal
32
SUBPATTERN
1
LITERAL
46
SUBPATTERN
None
BRANCH
IN
LITERAL
99
LITERAL
104
OR
LITERAL
112
LITERAL
121
SUBPATTERN
None
GROUPREF_EXISTS
1
AT AT_END
ELSE
LITERAL
58
LITERAL
32
'''
self.assertEqual(out.getvalue(), dump)
# Debug output is output again even a second time (bypassing
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment