Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
d3ec2572
Commit
d3ec2572
authored
Jun 30, 2000
by
Fredrik Lundh
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- fixed lookahead assertions (#10, #11, #12)
- untabified sre_constants.py
parent
355f8503
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
146 additions
and
90 deletions
+146
-90
Lib/sre_compile.py
Lib/sre_compile.py
+50
-50
Lib/sre_constants.py
Lib/sre_constants.py
+24
-8
Lib/sre_parse.py
Lib/sre_parse.py
+19
-0
Lib/test/output/test_sre
Lib/test/output/test_sre
+0
-3
Modules/_sre.c
Modules/_sre.c
+18
-7
Modules/sre_constants.h
Modules/sre_constants.h
+35
-22
No files found.
Lib/sre_compile.py
View file @
d3ec2572
...
...
@@ -26,52 +26,12 @@ def _compile(code, pattern, flags):
# internal: compile a (sub)pattern
emit
=
code
.
append
for
op
,
av
in
pattern
:
if
op
is
ANY
:
if
flags
&
SRE_FLAG_DOTALL
:
emit
(
OPCODES
[
op
])
else
:
emit
(
OPCODES
[
CATEGORY
])
emit
(
CHCODES
[
CATEGORY_NOT_LINEBREAK
])
elif
op
in
(
SUCCESS
,
FAILURE
):
emit
(
OPCODES
[
op
])
elif
op
is
AT
:
emit
(
OPCODES
[
op
])
if
flags
&
SRE_FLAG_MULTILINE
:
emit
(
ATCODES
[
AT_MULTILINE
[
av
]])
else
:
emit
(
ATCODES
[
av
])
elif
op
is
BRANCH
:
emit
(
OPCODES
[
op
])
tail
=
[]
for
av
in
av
[
1
]:
skip
=
len
(
code
);
emit
(
0
)
_compile
(
code
,
av
,
flags
)
emit
(
OPCODES
[
JUMP
])
tail
.
append
(
len
(
code
));
emit
(
0
)
code
[
skip
]
=
len
(
code
)
-
skip
emit
(
0
)
# end of branch
for
tail
in
tail
:
code
[
tail
]
=
len
(
code
)
-
tail
elif
op
is
CALL
:
emit
(
OPCODES
[
op
])
skip
=
len
(
code
);
emit
(
0
)
_compile
(
code
,
av
,
flags
)
emit
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
CATEGORY
:
emit
(
OPCODES
[
op
])
if
flags
&
SRE_FLAG_LOCALE
:
emit
(
CHCODES
[
CH_LOCALE
[
av
]])
elif
flags
&
SRE_FLAG_UNICODE
:
emit
(
CHCODES
[
CH_UNICODE
[
av
]])
else
:
emit
(
CHCODES
[
av
])
elif
op
is
GROUP
:
if
op
in
(
LITERAL
,
NOT_LITERAL
):
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OPCODES
[
OP_IGNORE
[
op
]])
else
:
emit
(
OPCODES
[
op
])
emit
(
av
-
1
)
emit
(
ord
(
av
)
)
elif
op
is
IN
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OPCODES
[
OP_IGNORE
[
op
]])
...
...
@@ -101,15 +61,12 @@ def _compile(code, pattern, flags):
raise
error
,
"internal: unsupported set operator"
emit
(
OPCODES
[
FAILURE
])
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
in
(
LITERAL
,
NOT_LITERAL
):
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OPCODES
[
OP_IGNORE
[
op
]])
else
:
elif
op
is
ANY
:
if
flags
&
SRE_FLAG_DOTALL
:
emit
(
OPCODES
[
op
])
emit
(
ord
(
av
))
elif
op
is
MARK
:
emit
(
OPCODES
[
op
])
emit
(
av
)
else
:
emit
(
OPCODES
[
CATEGORY
])
emit
(
CHCODES
[
CATEGORY_NOT_LINEBREAK
])
elif
op
in
(
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
):
if
flags
&
SRE_FLAG_TEMPLATE
:
emit
(
OPCODES
[
REPEAT
])
...
...
@@ -150,6 +107,49 @@ def _compile(code, pattern, flags):
if
group
:
emit
(
OPCODES
[
MARK
])
emit
((
group
-
1
)
*
2
+
1
)
elif
op
in
(
SUCCESS
,
FAILURE
):
emit
(
OPCODES
[
op
])
elif
op
in
(
ASSERT
,
ASSERT_NOT
,
CALL
):
emit
(
OPCODES
[
op
])
skip
=
len
(
code
);
emit
(
0
)
_compile
(
code
,
av
,
flags
)
emit
(
OPCODES
[
SUCCESS
])
code
[
skip
]
=
len
(
code
)
-
skip
elif
op
is
AT
:
emit
(
OPCODES
[
op
])
if
flags
&
SRE_FLAG_MULTILINE
:
emit
(
ATCODES
[
AT_MULTILINE
[
av
]])
else
:
emit
(
ATCODES
[
av
])
elif
op
is
BRANCH
:
emit
(
OPCODES
[
op
])
tail
=
[]
for
av
in
av
[
1
]:
skip
=
len
(
code
);
emit
(
0
)
_compile
(
code
,
av
,
flags
)
emit
(
OPCODES
[
JUMP
])
tail
.
append
(
len
(
code
));
emit
(
0
)
code
[
skip
]
=
len
(
code
)
-
skip
emit
(
0
)
# end of branch
for
tail
in
tail
:
code
[
tail
]
=
len
(
code
)
-
tail
elif
op
is
CATEGORY
:
emit
(
OPCODES
[
op
])
if
flags
&
SRE_FLAG_LOCALE
:
emit
(
CHCODES
[
CH_LOCALE
[
av
]])
elif
flags
&
SRE_FLAG_UNICODE
:
emit
(
CHCODES
[
CH_UNICODE
[
av
]])
else
:
emit
(
CHCODES
[
av
])
elif
op
is
GROUP
:
if
flags
&
SRE_FLAG_IGNORECASE
:
emit
(
OPCODES
[
OP_IGNORE
[
op
]])
else
:
emit
(
OPCODES
[
op
])
emit
(
av
-
1
)
elif
op
is
MARK
:
emit
(
OPCODES
[
op
])
emit
(
av
)
else
:
raise
ValueError
,
(
"unsupported operand type"
,
op
)
...
...
Lib/sre_constants.py
View file @
d3ec2572
...
...
@@ -23,6 +23,7 @@ SUCCESS = "success"
ANY
=
"any"
ASSERT
=
"assert"
ASSERT_NOT
=
"assert_not"
AT
=
"at"
BRANCH
=
"branch"
CALL
=
"call"
...
...
@@ -81,7 +82,7 @@ OPCODES = [
FAILURE
,
SUCCESS
,
ANY
,
ASSERT
,
ASSERT
,
ASSERT_NOT
,
AT
,
BRANCH
,
CALL
,
...
...
@@ -121,8 +122,8 @@ def makedict(list):
d
=
{}
i
=
0
for
item
in
list
:
d
[
item
]
=
i
i
=
i
+
1
d
[
item
]
=
i
i
=
i
+
1
return
d
OPCODES
=
makedict
(
OPCODES
)
...
...
@@ -176,12 +177,27 @@ SRE_FLAG_VERBOSE = 64
if
__name__
==
"__main__"
:
import
string
def
dump
(
f
,
d
,
prefix
):
items
=
d
.
items
()
items
.
sort
(
lambda
a
,
b
:
cmp
(
a
[
1
],
b
[
1
]))
for
k
,
v
in
items
:
f
.
write
(
"#define %s_%s %s
\
n
"
%
(
prefix
,
string
.
upper
(
k
),
v
))
items
=
d
.
items
()
items
.
sort
(
lambda
a
,
b
:
cmp
(
a
[
1
],
b
[
1
]))
for
k
,
v
in
items
:
f
.
write
(
"#define %s_%s %s
\
n
"
%
(
prefix
,
string
.
upper
(
k
),
v
))
f
=
open
(
"sre_constants.h"
,
"w"
)
f
.
write
(
"/* generated from sre_constants.py */
\
n
"
)
f
.
write
(
"""
\
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* NOTE: This file is generated by sre_constants.py. If you need
* to change anything in here, edit sre_constants.py and run it.
*
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/
"""
)
dump
(
f
,
OPCODES
,
"SRE_OP"
)
dump
(
f
,
ATCODES
,
"SRE"
)
dump
(
f
,
CHCODES
,
"SRE"
)
...
...
Lib/sre_parse.py
View file @
d3ec2572
...
...
@@ -470,6 +470,25 @@ def _parse(source, state, flags=0):
if
source
.
next
is
None
or
source
.
next
==
")"
:
break
source
.
get
()
elif
source
.
next
in
(
"="
,
"!"
):
# lookahead assertions
char
=
source
.
get
()
b
=
[]
while
1
:
p
=
_parse
(
source
,
state
,
flags
)
if
source
.
next
==
")"
:
if
b
:
b
.
append
(
p
)
p
=
_branch
(
state
,
b
)
if
char
==
"="
:
subpattern
.
append
((
ASSERT
,
p
))
else
:
subpattern
.
append
((
ASSERT_NOT
,
p
))
break
elif
source
.
match
(
"|"
):
b
.
append
(
p
)
else
:
raise
error
,
"pattern not properly closed"
else
:
# flags
while
FLAGS
.
has_key
(
source
.
next
):
...
...
Lib/test/output/test_sre
View file @
d3ec2572
...
...
@@ -6,7 +6,4 @@ test_support -- test failed re module cPickle
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad')
=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad')
=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad')
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
Modules/_sre.c
View file @
d3ec2572
...
...
@@ -20,6 +20,7 @@
* 00-06-28 fl fixed findall (0.9.1)
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
* 00-06-30 fl tuning, fast search (0.9.3)
* 00-06-30 fl added assert (lookahead) primitives (0.9.4)
*
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
...
...
@@ -30,7 +31,7 @@
#ifndef SRE_RECURSIVE
char
copyright
[]
=
" SRE 0.9.
3
Copyright (c) 1997-2000 by Secret Labs AB "
;
char
copyright
[]
=
" SRE 0.9.
4
Copyright (c) 1997-2000 by Secret Labs AB "
;
#include "Python.h"
...
...
@@ -576,11 +577,10 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
pattern
+=
pattern
[
0
];
break
;
#if 0
case SRE_OP_CALL:
/* match subpattern, without backtracking */
case
SRE_OP_ASSERT
:
/* assert subpattern */
/* args: <skip> <pattern> */
TRACE(("%8d: subpattern\n", PTR(ptr)));
TRACE
((
"%8d:
assert
subpattern
\n
"
,
PTR
(
ptr
)));
state
->
ptr
=
ptr
;
i
=
SRE_MATCH
(
state
,
pattern
+
1
);
if
(
i
<
0
)
...
...
@@ -588,9 +588,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
if
(
!
i
)
goto
failure
;
pattern
+=
pattern
[
0
];
ptr = state->ptr;
break
;
#endif
case
SRE_OP_ASSERT_NOT
:
/* assert not subpattern */
/* args: <skip> <pattern> */
TRACE
((
"%8d: assert not subpattern
\n
"
,
PTR
(
ptr
)));
state
->
ptr
=
ptr
;
i
=
SRE_MATCH
(
state
,
pattern
+
1
);
if
(
i
<
0
)
return
i
;
if
(
i
)
goto
failure
;
pattern
+=
pattern
[
0
];
break
;
#if 0
case SRE_OP_MAX_REPEAT_ONE:
...
...
Modules/sre_constants.h
View file @
d3ec2572
/* generated from sre_constants.py */
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* NOTE: This file is generated by sre_constants.py. If you need
* to change anything in here, edit sre_constants.py and run it.
*
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
#define SRE_OP_ASSERT 3
#define SRE_OP_AT 4
#define SRE_OP_BRANCH 5
#define SRE_OP_CALL 6
#define SRE_OP_CATEGORY 7
#define SRE_OP_GROUP 8
#define SRE_OP_GROUP_IGNORE 9
#define SRE_OP_IN 10
#define SRE_OP_IN_IGNORE 11
#define SRE_OP_INFO 12
#define SRE_OP_JUMP 13
#define SRE_OP_LITERAL 14
#define SRE_OP_LITERAL_IGNORE 15
#define SRE_OP_MARK 16
#define SRE_OP_MAX_REPEAT 17
#define SRE_OP_MAX_REPEAT_ONE 18
#define SRE_OP_MIN_REPEAT 19
#define SRE_OP_NOT_LITERAL 20
#define SRE_OP_NOT_LITERAL_IGNORE 21
#define SRE_OP_NEGATE 22
#define SRE_OP_RANGE 23
#define SRE_OP_REPEAT 24
#define SRE_OP_ASSERT_NOT 4
#define SRE_OP_AT 5
#define SRE_OP_BRANCH 6
#define SRE_OP_CALL 7
#define SRE_OP_CATEGORY 8
#define SRE_OP_GROUP 9
#define SRE_OP_GROUP_IGNORE 10
#define SRE_OP_IN 11
#define SRE_OP_IN_IGNORE 12
#define SRE_OP_INFO 13
#define SRE_OP_JUMP 14
#define SRE_OP_LITERAL 15
#define SRE_OP_LITERAL_IGNORE 16
#define SRE_OP_MARK 17
#define SRE_OP_MAX_REPEAT 18
#define SRE_OP_MAX_REPEAT_ONE 19
#define SRE_OP_MIN_REPEAT 20
#define SRE_OP_NOT_LITERAL 21
#define SRE_OP_NOT_LITERAL_IGNORE 22
#define SRE_OP_NEGATE 23
#define SRE_OP_RANGE 24
#define SRE_OP_REPEAT 25
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
#define SRE_AT_BOUNDARY 2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment