Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
95e8053a
Commit
95e8053a
authored
Aug 13, 1997
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1.5a3 prerelease 1 from AMK
parent
a74ef66a
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
226 additions
and
114 deletions
+226
-114
Lib/re.py
Lib/re.py
+25
-10
Lib/test/re_tests.py
Lib/test/re_tests.py
+13
-2
Lib/test/regex_tests.py
Lib/test/regex_tests.py
+7
-1
Lib/test/test_re.py
Lib/test/test_re.py
+12
-1
Modules/regexmodule.c
Modules/regexmodule.c
+8
-4
Modules/regexpr.c
Modules/regexpr.c
+109
-60
Modules/regexpr.h
Modules/regexpr.h
+13
-25
Modules/reopmodule.c
Modules/reopmodule.c
+39
-11
No files found.
Lib/re.py
View file @
95e8053a
...
...
@@ -317,10 +317,19 @@ class Eol(Instruction):
class
Set
(
Instruction
):
name
=
'set'
def
__init__
(
self
,
set
):
def
__init__
(
self
,
set
,
flags
=
0
):
self
.
set
=
set
if
flags
&
IGNORECASE
:
self
.
set
=
map
(
string
.
lower
,
self
.
set
)
if
len
(
set
)
==
1
:
# If only one element, use the "exact" opcode (it'll be faster)
Instruction
.
__init__
(
self
,
chr
(
4
),
2
)
else
:
# Use the "set" opcode
Instruction
.
__init__
(
self
,
chr
(
3
),
33
)
def
assemble
(
self
,
position
,
labels
):
if
len
(
self
.
set
)
==
1
:
# If only one character in set, generate an "exact" opcode
return
self
.
opcode
+
self
.
set
[
0
]
result
=
self
.
opcode
temp
=
0
for
i
,
c
in
map
(
lambda
x
:
(
x
,
chr
(
x
)),
range
(
256
)):
...
...
@@ -333,14 +342,16 @@ class Set(Instruction):
def
__repr__
(
self
):
result
=
'%-15s'
%
(
self
.
name
)
self
.
set
.
sort
()
# XXX this should print more intelligently
for
char
in
self
.
set
:
result
=
result
+
char
return
result
class
Exact
(
Instruction
):
name
=
'exact'
def
__init__
(
self
,
char
):
def
__init__
(
self
,
char
,
flags
):
self
.
char
=
char
if
flags
&
IGNORECASE
:
self
.
char
=
string
.
lower
(
self
.
char
)
Instruction
.
__init__
(
self
,
chr
(
4
),
2
)
def
assemble
(
self
,
position
,
labels
):
return
self
.
opcode
+
self
.
char
...
...
@@ -881,7 +892,7 @@ def compile(pattern, flags=0):
escape_type
,
value
,
index
=
expand_escape
(
pattern
,
index
)
if
escape_type
==
CHAR
:
stack
.
append
([
Exact
(
value
)])
stack
.
append
([
Exact
(
value
,
flags
)])
lastop
=
'
\
\
'
+
value
elif
escape_type
==
MEMORY_REFERENCE
:
...
...
@@ -1306,7 +1317,7 @@ def compile(pattern, flags=0):
elif
char
==
'.'
:
if
flags
&
DOTALL
:
stack
.
append
([
Set
(
map
(
chr
,
range
(
256
)))])
stack
.
append
([
Set
(
map
(
chr
,
range
(
256
))
,
flags
)])
else
:
stack
.
append
([
AnyChar
()])
lastop
=
'.'
...
...
@@ -1336,12 +1347,12 @@ def compile(pattern, flags=0):
index
=
end
+
1
# do not change lastop
else
:
stack
.
append
([
Exact
(
char
)])
stack
.
append
([
Exact
(
char
,
flags
)])
lastop
=
'#'
elif
char
in
string
.
whitespace
:
if
not
(
flags
&
VERBOSE
):
stack
.
append
([
Exact
(
char
)])
stack
.
append
([
Exact
(
char
,
flags
)])
lastop
=
char
elif
char
==
'['
:
...
...
@@ -1449,22 +1460,25 @@ def compile(pattern, flags=0):
index
=
index
+
1
if
negate
:
# If case is being ignored, then both upper- and lowercase
# versions of the letters must be excluded.
if
flags
&
IGNORECASE
:
set
=
set
+
map
(
string
.
upper
,
set
)
notset
=
[]
for
char
in
map
(
chr
,
range
(
256
)):
if
char
not
in
set
:
notset
.
append
(
char
)
if
len
(
notset
)
==
0
:
raise
error
,
'empty negated set'
stack
.
append
([
Set
(
notset
)])
stack
.
append
([
Set
(
notset
,
flags
)])
else
:
if
len
(
set
)
==
0
:
raise
error
,
'empty set'
stack
.
append
([
Set
(
set
)])
stack
.
append
([
Set
(
set
,
flags
)])
lastop
=
'[]'
else
:
stack
.
append
([
Exact
(
char
)])
stack
.
append
([
Exact
(
char
,
flags
)])
lastop
=
char
code
=
[]
...
...
@@ -1485,6 +1499,7 @@ def compile(pattern, flags=0):
code
.
append
(
Label
(
label
))
label
=
label
+
1
code
.
append
(
End
())
# print code
return
RegexObject
(
pattern
,
flags
,
code
,
register
,
groupindex
)
# Replace expand_escape and _expand functions with their C equivalents.
...
...
Lib/test/re_tests.py
View file @
95e8053a
...
...
@@ -318,6 +318,7 @@ tests = [
# ('((((((((((a))))))))))\\41', 'aa', FAIL),
# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
(
'((((((((((a))))))))))
\
\
41'
,
''
,
SYNTAX_ERROR
),
(
'(?i)((((((((((a))))))))))
\
\
41'
,
''
,
SYNTAX_ERROR
),
(
'(((((((((a)))))))))'
,
'a'
,
SUCCEED
,
'found'
,
'a'
),
(
'multiple words of text'
,
'uh-uh'
,
FAIL
),
(
'multiple words'
,
'multiple words, yeah'
,
SUCCEED
,
'found'
,
'multiple words'
),
...
...
@@ -448,7 +449,6 @@ tests = [
(
'(?i)((((((((((a))))))))))
\
\
10'
,
'AA'
,
SUCCEED
,
'found'
,
'AA'
),
#('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
#('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
(
'(?i)((((((((((a))))))))))
\
\
41'
,
''
,
SYNTAX_ERROR
),
(
'(?i)(((((((((a)))))))))'
,
'A'
,
SUCCEED
,
'found'
,
'A'
),
(
'(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))'
,
'A'
,
SUCCEED
,
'g1'
,
'A'
),
(
'(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))'
,
'C'
,
SUCCEED
,
'g1'
,
'C'
),
...
...
@@ -506,10 +506,21 @@ xyzabc
(
'a.b'
,
'a
\
n
b'
,
FAIL
),
(
'(?s)a.b'
,
'a
\
n
b'
,
SUCCEED
,
'found'
,
'a
\
n
b'
),
# test \w, etc.
# test \w, etc.
both inside and outside character classes
(
'
\
\
w+'
,
'--ab_cd0123--'
,
SUCCEED
,
'found'
,
'ab_cd0123'
),
(
'[
\
\
w]+'
,
'--ab_cd0123--'
,
SUCCEED
,
'found'
,
'ab_cd0123'
),
(
'
\
\
D+'
,
'1234abc5678'
,
SUCCEED
,
'found'
,
'abc'
),
(
'[
\
\
D]+'
,
'1234abc5678'
,
SUCCEED
,
'found'
,
'abc'
),
(
'[
\
\
da-fA-F]+'
,
'123abc'
,
SUCCEED
,
'found'
,
'123abc'
),
(
'[
\
\
d-x]'
,
'-'
,
SYNTAX_ERROR
),
(
r'([\
s]*)([
\S]*)([\
s]*)
', '
testing
!
1972
', SUCCEED, '
g3
+
g2
+
g1
', '
testing
!
1972
'),
(r'
(
\
s
*
)(
\
S
*
)(
\
s
*
)
', '
testing
!
1972
', SUCCEED, '
g3
+
g2
+
g1
', '
testing
!
1972
'),
(r'
\
xff
', '
\
377
', SUCCEED, '
found
', chr(255)),
(r'
\
x00ff
', '
\
377
', SUCCEED, '
found
', chr(255)),
(r'
\
t
\
n
\
v
\
r
\
f
\
a
\
g
', '
\
t
\
n
\
v
\
r
\
f
\
ag
', SUCCEED, '
found
', '
\
t
\
n
\
v
\
r
\
f
\
ag
'),
('
\
t
\
n
\
v
\
r
\
f
\
a
\
g
', '
\
t
\
n
\
v
\
r
\
f
\
ag
', SUCCEED, '
found
', '
\
t
\
n
\
v
\
r
\
f
\
ag
'),
(r'
\
t
\
n
\
v
\
r
\
f
\
a
', '
\
t
\
n
\
v
\
r
\
f
\
a
', SUCCEED, '
found
', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
(r'
[
\
t
][
\
n
][
\
v
][
\
r
][
\
f
][
\
a
][
\
A
][
\
b
][
\
B
][
\
Z
][
\
g
]
', '
\
t
\
n
\
v
\
r
\
f
\
aA
\
bBZg
', SUCCEED, '
found
', '
\
t
\
n
\
v
\
r
\
f
\
aA
\
bBZg
'),
]
Lib/test/regex_tests.py
View file @
95e8053a
...
...
@@ -278,6 +278,12 @@ tests = [
(
'
\
\
([xyz]*
\
\
)x'
,
'abcx'
,
SUCCEED
,
'found+"-"+g1'
,
'x-'
),
(
'
\
\
(a
\
\
)+b
\
\
|aac'
,
'aac'
,
SUCCEED
,
'found+"-"+g1'
,
'aac-None'
)
'found+"-"+g1'
,
'aac-None'
),
(
'
\
<
a
'
,
'a'
,
SUCCEED
,
'found'
,
'a'
),
(
'
\
<
a
'
,
'!'
,
FAIL
),
(
'a
\
<
b
'
,
'ab'
,
FAIL
),
(
'a
\
>
'
, '
ab
', FAIL),
('
a
\
>
', '
a
!
', SUCCEED, '
found
', '
a
'),
('
a
\
>
', '
a
', SUCCEED, '
found
', '
a
'),
]
Lib/test/test_re.py
View file @
95e8053a
...
...
@@ -31,6 +31,10 @@ try:
assert re.sub('
(
?
P
<
a
>
x
)
', '
\
g
<
a
>
\
g
<
a
>
', '
xx
') == '
xxxx
'
assert re.sub('
a
', r'
\
t
\
n
\
v
\
r
\
f
\
a
\
b
\
B
\
Z
\
a
\
A
\
w
\
W
\
s
\
S
\
d
\
D
', '
a
') == '
\
t
\
n
\
v
\
r
\
f
\
a
\
bBZ
\
aAwWsSdD
'
assert re.sub('
a
', '
\
t
\
n
\
v
\
r
\
f
\
a
', '
a
') == '
\
t
\
n
\
v
\
r
\
f
\
a
'
assert re.sub('
a
', '
\
t
\
n
\
v
\
r
\
f
\
a
', '
a
') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))
except AssertionError:
raise TestFailed, "re.sub"
...
...
@@ -120,7 +124,6 @@ if verbose:
print '
Running
re_tests
test
suite
'
for t in tests:
print t
sys.stdout.flush()
pattern=s=outcome=repl=expected=None
if len(t)==5:
...
...
@@ -136,6 +139,7 @@ for t in tests:
if outcome==SYNTAX_ERROR: pass # Expected a syntax error
else:
print '
===
Syntax
error
:
', t
except KeyboardInterrupt: raise KeyboardInterrupt
except:
print '
***
Unexpected
error
***
'
if verbose:
...
...
@@ -182,3 +186,10 @@ for t in tests:
print repr(repl)+'
should
be
'+repr(expected)
else:
print '
===
Failed
incorrectly
', t
# Try the match with IGNORECASE enabled, and check that it
# still succeeds.
obj=re.compile(pattern, re.IGNORECASE)
result=obj.search(s)
if result==None:
print '
===
Fails
on
case
-
insensitive
match
', t
Modules/regexmodule.c
View file @
95e8053a
...
...
@@ -132,7 +132,9 @@ regobj_match(re, args)
re
->
re_lastok
=
NULL
;
result
=
re_match
(
&
re
->
re_patbuf
,
buffer
,
size
,
offset
,
&
re
->
re_regs
);
if
(
result
<
-
1
)
{
/* Failure like stack overflow */
/* Serious failure of some sort; if re_match didn't
set an exception, raise a generic error */
if
(
!
PyErr_Occurred
())
PyErr_SetString
(
RegexError
,
"match failure"
);
return
NULL
;
}
...
...
@@ -174,7 +176,9 @@ regobj_search(re, args)
result
=
re_search
(
&
re
->
re_patbuf
,
buffer
,
size
,
offset
,
range
,
&
re
->
re_regs
);
if
(
result
<
-
1
)
{
/* Failure like stack overflow */
/* Serious failure of some sort; if re_match didn't
set an exception, raise a generic error */
if
(
!
PyErr_Occurred
())
PyErr_SetString
(
RegexError
,
"match failure"
);
return
NULL
;
}
...
...
Modules/regexpr.c
View file @
95e8053a
...
...
@@ -33,6 +33,7 @@
#include "myproto.h"
/* For PROTO macro --Guido */
#include <stdio.h>
#include "Python.h"
#ifndef NDEBUG
#define NDEBUG 1
...
...
@@ -85,16 +86,16 @@ typedef union item_t
{
int
num
;
int
level
;
char
*
start
;
char
*
end
;
unsigned
char
*
start
;
unsigned
char
*
end
;
}
reg
;
struct
{
int
count
;
int
level
;
int
phantom
;
char
*
code
;
char
*
text
;
unsigned
char
*
code
;
unsigned
char
*
text
;
}
fail
;
struct
{
...
...
@@ -139,8 +140,8 @@ typedef struct match_state
* offsets from the beginning of the string before returning the
* registers to the calling program. */
char
*
start
[
NUM_REGISTERS
];
char
*
end
[
NUM_REGISTERS
];
unsigned
char
*
start
[
NUM_REGISTERS
];
unsigned
char
*
end
[
NUM_REGISTERS
];
/* Keeps track of whether a register has changed recently. */
...
...
@@ -422,7 +423,7 @@ enum regexp_compiled_ops /* opcodes for compiled regexp */
Cwordbound
,
/* match if at word boundary */
Cnotwordbound
,
/* match if not at word boundary */
Csyntaxspec
,
/* matches syntax code (1 byte follows) */
Cnotsyntaxspec
,
/* matches if syntax code does not match (1 byte foll
)
*/
Cnotsyntaxspec
,
/* matches if syntax code does not match (1 byte foll
ows)
*/
Crepeat1
};
...
...
@@ -469,7 +470,7 @@ static int regexp_ansi_sequences;
#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
char
re_syntax_table
[
256
];
unsigned
char
re_syntax_table
[
256
];
void
re_compile_initialize
(
void
)
{
...
...
@@ -593,11 +594,11 @@ static int hex_char_to_decimal(int ch)
return
16
;
}
static
void
re_compile_fastmap_aux
(
char
*
code
,
static
void
re_compile_fastmap_aux
(
unsigned
char
*
code
,
int
pos
,
char
*
visited
,
char
*
can_be_null
,
char
*
fastmap
)
unsigned
char
*
visited
,
unsigned
char
*
can_be_null
,
unsigned
char
*
fastmap
)
{
int
a
;
int
b
;
...
...
@@ -717,19 +718,20 @@ static void re_compile_fastmap_aux(char *code,
}
default:
{
abort
();
/* probably some opcode is missing from this switch */
PyErr_SetString
(
PyExc_SystemError
,
"Unknown regex opcode: memory corrupted?"
);
return
;
/*NOTREACHED*/
}
}
}
static
int
re_do_compile_fastmap
(
char
*
buffer
,
static
int
re_do_compile_fastmap
(
unsigned
char
*
buffer
,
int
used
,
int
pos
,
char
*
can_be_null
,
char
*
fastmap
)
unsigned
char
*
can_be_null
,
unsigned
char
*
fastmap
)
{
char
small_visited
[
512
],
*
visited
;
unsigned
char
small_visited
[
512
],
*
visited
;
if
(
used
<=
sizeof
(
small_visited
))
visited
=
small_visited
;
...
...
@@ -759,6 +761,7 @@ void re_compile_fastmap(regexp_t bufp)
&
bufp
->
can_be_null
,
bufp
->
fastmap
))
return
;
if
(
PyErr_Occurred
())
return
;
if
(
bufp
->
buffer
[
0
]
==
Cbol
)
bufp
->
anchor
=
1
;
/* begline */
else
...
...
@@ -792,13 +795,13 @@ void re_compile_fastmap(regexp_t bufp)
*
*/
static
int
re_optimize_star_jump
(
regexp_t
bufp
,
char
*
code
)
static
int
re_optimize_star_jump
(
regexp_t
bufp
,
unsigned
char
*
code
)
{
char
map
[
256
];
char
can_be_null
;
char
*
p1
;
char
*
p2
;
char
ch
;
unsigned
char
map
[
256
];
unsigned
char
can_be_null
;
unsigned
char
*
p1
;
unsigned
char
*
p2
;
unsigned
char
ch
;
int
a
;
int
b
;
int
num_instructions
=
0
;
...
...
@@ -808,6 +811,13 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
a
=
(
int
)
SHORT
(
a
);
p1
=
code
+
a
+
3
;
/* skip the failure_jump */
/* Check that the jump is within the pattern */
if
(
p1
<
bufp
->
buffer
||
bufp
->
buffer
+
bufp
->
used
<
p1
)
{
PyErr_SetString
(
PyExc_SystemError
,
"Regex VM jump out of bounds (failure_jump opt)"
);
return
0
;
}
assert
(
p1
[
-
3
]
==
Cfailure_jump
);
p2
=
code
;
/* p1 points inside loop, p2 points to after loop */
...
...
@@ -923,7 +933,7 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
}
}
make_update_jump:
/* make_update_jump: */
code
-=
3
;
a
+=
3
;
/* jump to after the Cfailure_jump */
code
[
0
]
=
Cupdate_failure_jump
;
...
...
@@ -948,7 +958,7 @@ static int re_optimize_star_jump(regexp_t bufp, char *code)
static
int
re_optimize
(
regexp_t
bufp
)
{
char
*
code
;
unsigned
char
*
code
;
code
=
bufp
->
buffer
;
...
...
@@ -1073,7 +1083,7 @@ else \
#define GETHEX(var) \
{ \
char gethex_ch, gethex_value; \
unsigned
char gethex_ch, gethex_value; \
NEXTCHAR(gethex_ch); \
gethex_value = hex_char_to_decimal(gethex_ch); \
if (gethex_value == 16) \
...
...
@@ -1147,7 +1157,7 @@ else \
} \
}
char
*
re_compile_pattern
(
char
*
regex
,
int
size
,
regexp_t
bufp
)
unsigned
char
*
re_compile_pattern
(
unsigned
char
*
regex
,
int
size
,
regexp_t
bufp
)
{
int
a
;
int
pos
;
...
...
@@ -1161,8 +1171,8 @@ char *re_compile_pattern(char *regex, int size, regexp_t bufp)
int
future_jumps
[
MAX_NESTING
];
int
num_jumps
;
unsigned
char
ch
=
'\0'
;
char
*
pattern
;
char
*
translate
;
unsigned
char
*
pattern
;
unsigned
char
*
translate
;
int
next_register
;
int
paren_depth
;
int
num_open_registers
;
...
...
@@ -1580,23 +1590,23 @@ if (translate) \
var = translate[var]
int
re_match
(
regexp_t
bufp
,
char
*
string
,
unsigned
char
*
string
,
int
size
,
int
pos
,
regexp_registers_t
old_regs
)
{
char
*
code
;
char
*
translate
;
char
*
text
;
char
*
textstart
;
char
*
textend
;
unsigned
char
*
code
;
unsigned
char
*
translate
;
unsigned
char
*
text
;
unsigned
char
*
textstart
;
unsigned
char
*
textend
;
int
a
;
int
b
;
int
ch
;
int
reg
;
int
match_end
;
char
*
regstart
;
char
*
regend
;
unsigned
char
*
regstart
;
unsigned
char
*
regend
;
int
regsize
;
match_state
state
;
...
...
@@ -1738,18 +1748,36 @@ int re_match(regexp_t bufp,
a
=
(
unsigned
char
)
*
code
++
;
a
|=
(
unsigned
char
)
*
code
++
<<
8
;
code
+=
(
int
)
SHORT
(
a
);
if
(
code
<
bufp
->
buffer
||
bufp
->
buffer
+
bufp
->
used
<
code
)
{
PyErr_SetString
(
PyExc_SystemError
,
"Regex VM jump out of bounds (Cjump)"
);
FREE_STATE
(
state
);
return
-
2
;
}
goto
continue_matching
;
}
case
Cdummy_failure_jump
:
{
unsigned
char
*
failuredest
;
a
=
(
unsigned
char
)
*
code
++
;
a
|=
(
unsigned
char
)
*
code
++
<<
8
;
a
=
(
int
)
SHORT
(
a
);
assert
(
*
code
==
Cfailure_jump
);
b
=
(
unsigned
char
)
code
[
1
];
b
|=
(
unsigned
char
)
code
[
2
]
<<
8
;
PUSH_FAILURE
(
state
,
code
+
(
int
)
SHORT
(
b
)
+
3
,
NULL
,
goto
error
);
failuredest
=
code
+
(
int
)
SHORT
(
b
)
+
3
;
if
(
failuredest
<
bufp
->
buffer
||
bufp
->
buffer
+
bufp
->
used
<
failuredest
)
{
PyErr_SetString
(
PyExc_SystemError
,
"Regex VM jump out of bounds (Cdummy_failure_jump failuredest)"
);
FREE_STATE
(
state
);
return
-
2
;
}
PUSH_FAILURE
(
state
,
failuredest
,
NULL
,
goto
error
);
code
+=
a
;
if
(
code
<
bufp
->
buffer
||
bufp
->
buffer
+
bufp
->
used
<
code
)
{
PyErr_SetString
(
PyExc_SystemError
,
"Regex VM jump out of bounds (Cdummy_failure_jump code)"
);
FREE_STATE
(
state
);
return
-
2
;
}
goto
continue_matching
;
}
case
Cfailure_jump
:
...
...
@@ -1757,16 +1785,26 @@ int re_match(regexp_t bufp,
a
=
(
unsigned
char
)
*
code
++
;
a
|=
(
unsigned
char
)
*
code
++
<<
8
;
a
=
(
int
)
SHORT
(
a
);
if
(
code
+
a
<
bufp
->
buffer
||
bufp
->
buffer
+
bufp
->
used
<
code
+
a
)
{
PyErr_SetString
(
PyExc_SystemError
,
"Regex VM jump out of bounds (Cfailure_jump)"
);
FREE_STATE
(
state
);
return
-
2
;
}
PUSH_FAILURE
(
state
,
code
+
a
,
text
,
goto
error
);
goto
continue_matching
;
}
case
Crepeat1
:
{
char
*
pinst
;
unsigned
char
*
pinst
;
a
=
(
unsigned
char
)
*
code
++
;
a
|=
(
unsigned
char
)
*
code
++
<<
8
;
a
=
(
int
)
SHORT
(
a
);
pinst
=
code
+
a
;
if
(
pinst
<
bufp
->
buffer
||
bufp
->
buffer
+
bufp
->
used
<
pinst
)
{
PyErr_SetString
(
PyExc_SystemError
,
"Regex VM jump out of bounds (Crepeat1)"
);
FREE_STATE
(
state
);
return
-
2
;
}
/* pinst is sole instruction in loop, and it matches a
* single character. Since Crepeat1 was originally a
* Cupdate_failure_jump, we also know that backtracking
...
...
@@ -1858,7 +1896,9 @@ int re_match(regexp_t bufp,
}
default:
{
abort
();
FREE_STATE
(
state
);
PyErr_SetString
(
PyExc_SystemError
,
"Unknown regex opcode: memory corrupted?"
);
return
-
2
;
/*NOTREACHED*/
}
}
...
...
@@ -1900,9 +1940,9 @@ int re_match(regexp_t bufp,
goto
fail
;
if
(
text
==
textend
)
goto
continue_matching
;
if
(
SYNTAX
(
*
text
)
&
Sword
)
goto
fail
;
if
(
!
(
SYNTAX
(
*
text
)
&
Sword
))
goto
continue_matching
;
goto
fail
;
}
case
Cwordbound
:
{
...
...
@@ -1936,16 +1976,20 @@ int re_match(regexp_t bufp,
{
NEXTCHAR
(
ch
);
if
(
SYNTAX
(
ch
)
&
(
unsigned
char
)
*
code
++
)
break
;
goto
fail
;
goto
continue_matching
;
}
default:
{
abort
();
FREE_STATE
(
state
);
PyErr_SetString
(
PyExc_SystemError
,
"Unknown regex opcode: memory corrupted?"
);
return
-
2
;
/*NOTREACHED*/
}
}
#if 0 /* This line is never reached --Guido */
abort();
#endif
...
...
@@ -1953,6 +1997,7 @@ int re_match(regexp_t bufp,
*NOTREACHED
*/
/* Using "break;" in the above switch statement is equivalent to "goto fail;" */
fail:
POP_FAILURE
(
state
,
code
,
text
,
goto
done_matching
,
goto
error
);
goto
continue_matching
;
...
...
@@ -1970,32 +2015,36 @@ int re_match(regexp_t bufp,
return
-
2
;
}
#undef PREFETCH
#undef NEXTCHAR
int
re_search
(
regexp_t
bufp
,
char
*
string
,
unsigned
char
*
string
,
int
size
,
int
pos
,
int
range
,
regexp_registers_t
regs
)
{
char
*
fastmap
;
char
*
translate
;
char
*
text
;
char
*
partstart
;
char
*
partend
;
unsigned
char
*
fastmap
;
unsigned
char
*
translate
;
unsigned
char
*
text
;
unsigned
char
*
partstart
;
unsigned
char
*
partend
;
int
dir
;
int
ret
;
char
anchor
;
unsigned
char
anchor
;
assert
(
size
>=
0
&&
pos
>=
0
);
assert
(
pos
+
range
>=
0
&&
pos
+
range
<=
size
);
/* Bugfix by ylo */
fastmap
=
bufp
->
fastmap
;
translate
=
bufp
->
translate
;
if
(
fastmap
&&
!
bufp
->
fastmap_accurate
)
if
(
fastmap
&&
!
bufp
->
fastmap_accurate
)
{
re_compile_fastmap
(
bufp
);
if
(
PyErr_Occurred
())
return
-
2
;
}
anchor
=
bufp
->
anchor
;
if
(
bufp
->
can_be_null
==
1
)
/* can_be_null == 2: can match null at eob */
fastmap
=
NULL
;
...
...
Modules/regexpr.h
View file @
95e8053a
...
...
@@ -33,16 +33,16 @@ extern "C" {
typedef
struct
re_pattern_buffer
{
char
*
buffer
;
/* compiled pattern */
unsigned
char
*
buffer
;
/* compiled pattern */
int
allocated
;
/* allocated size of compiled pattern */
int
used
;
/* actual length of compiled pattern */
char
*
fastmap
;
/* fastmap[ch] is true if ch can start pattern */
char
*
translate
;
/* translation to apply during compilation/matching */
char
fastmap_accurate
;
/* true if fastmap is valid */
char
can_be_null
;
/* true if can match empty string */
char
uses_registers
;
/* registers are used and need to be initialized */
unsigned
char
*
fastmap
;
/* fastmap[ch] is true if ch can start pattern */
unsigned
char
*
translate
;
/* translation to apply during compilation/matching */
unsigned
char
fastmap_accurate
;
/* true if fastmap is valid */
unsigned
char
can_be_null
;
/* true if can match empty string */
unsigned
char
uses_registers
;
/* registers are used and need to be initialized */
int
num_registers
;
/* number of registers used */
char
anchor
;
/* anchor: 0=none 1=begline 2=begbuf */
unsigned
char
anchor
;
/* anchor: 0=none 1=begline 2=begbuf */
}
*
regexp_t
;
typedef
struct
re_registers
...
...
@@ -93,7 +93,7 @@ extern int re_syntax;
/* This is the actual syntax mask. It was added so that Python could do
* syntax-dependent munging of patterns before compilation. */
extern
char
re_syntax_table
[
256
];
extern
unsigned
char
re_syntax_table
[
256
];
void
re_compile_initialize
(
void
);
...
...
@@ -101,7 +101,7 @@ int re_set_syntax(int syntax);
/* This sets the syntax to use and returns the previous syntax. The
* syntax is specified by a bit mask of the above defined bits. */
char
*
re_compile_pattern
(
char
*
regex
,
int
regex_size
,
regexp_t
compiled
);
unsigned
char
*
re_compile_pattern
(
unsigned
char
*
regex
,
int
regex_size
,
regexp_t
compiled
);
/* This compiles the regexp (given in regex and length in regex_size).
* This returns NULL if the regexp compiled successfully, and an error
* message if an error was encountered. The buffer field must be
...
...
@@ -110,14 +110,14 @@ char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
* buffer is NULL). Also, the translate field must be set to point to a
* valid translation table, or NULL if it is not used. */
int
re_match
(
regexp_t
compiled
,
char
*
string
,
int
size
,
int
pos
,
int
re_match
(
regexp_t
compiled
,
unsigned
char
*
string
,
int
size
,
int
pos
,
regexp_registers_t
old_regs
);
/* This tries to match the regexp against the string. This returns the
* length of the matched portion, or -1 if the pattern could not be
* matched and -2 if an error (such as failure stack overflow) is
* encountered. */
int
re_search
(
regexp_t
compiled
,
char
*
string
,
int
size
,
int
startpos
,
int
re_search
(
regexp_t
compiled
,
unsigned
char
*
string
,
int
size
,
int
startpos
,
int
range
,
regexp_registers_t
regs
);
/* This rearches for a substring matching the regexp. This returns the
* first index at which a match is found. range specifies at how many
...
...
@@ -132,28 +132,16 @@ void re_compile_fastmap(regexp_t compiled);
* the calling program must have initialized the fastmap field to point
* to an array of 256 characters. */
char
*
re_comp
(
char
*
s
);
/* BSD 4.2 regex library routine re_comp. This compiles the regexp into
* an internal buffer. This returns NULL if the regexp was compiled
* successfully, and an error message if there was an error. */
int
re_exec
(
char
*
s
);
/* BSD 4.2 regexp library routine re_exec. This returns true if the
* string matches the regular expression (that is, a matching part is
* found anywhere in the string). */
#else
/* HAVE_PROTOTYPES */
extern
int
re_syntax
;
extern
char
re_syntax_table
[
256
];
extern
unsigned
char
re_syntax_table
[
256
];
void
re_compile_initialize
();
int
re_set_syntax
();
char
*
re_compile_pattern
();
unsigned
char
*
re_compile_pattern
();
int
re_match
();
int
re_search
();
void
re_compile_fastmap
();
char
*
re_comp
();
int
re_exec
();
#endif
/* HAVE_PROTOTYPES */
...
...
Modules/reopmodule.c
View file @
95e8053a
...
...
@@ -62,7 +62,7 @@ static PyObject *ReopError; /* Exception */
#define BEGINNING_OF_BUFFER 7
#define END_OF_BUFFER 8
static
char
*
reop_casefold
;
static
unsigned
char
*
reop_casefold
;
static
PyObject
*
makeresult
(
regs
,
num_regs
)
...
...
@@ -105,7 +105,7 @@ reop_match(self, args)
PyObject
*
self
;
PyObject
*
args
;
{
char
*
string
;
unsigned
char
*
string
;
int
fastmaplen
,
stringlen
;
int
can_be_null
,
anchor
,
i
;
int
flags
,
pos
,
result
;
...
...
@@ -163,8 +163,8 @@ reop_match(self, args)
if
(
result
<
-
1
)
{
/* Failure like stack overflow */
if
(
!
PyErr_Occurred
())
PyErr_SetString
(
ReopError
,
"match failure"
);
return
NULL
;
}
if
(
result
==
-
1
)
{
...
...
@@ -174,12 +174,38 @@ reop_match(self, args)
return
makeresult
(
&
re_regs
,
bufp
.
num_registers
);
}
#if 0
static PyObject *
reop_optimize(self, args)
PyObject *self;
PyObject *args;
{
unsigned char *buffer;
int buflen;
struct re_pattern_buffer bufp;
PyObject *opt_code;
if (!PyArg_Parse(args, "(s#)", &buffer, &buflen)) return NULL;
/* Create a new string for the optimized code */
opt_code=PyString_FromStringAndSize(buffer, buflen);
if (opt_code!=NULL)
{
bufp.buffer = PyString_AsString(opt_code);
bufp.used=bufp.allocated=buflen;
}
return opt_code;
}
#endif
static
PyObject
*
reop_search
(
self
,
args
)
PyObject
*
self
;
PyObject
*
args
;
{
char
*
string
;
unsigned
char
*
string
;
int
fastmaplen
,
stringlen
;
int
can_be_null
,
anchor
,
i
;
int
flags
,
pos
,
result
;
...
...
@@ -237,6 +263,7 @@ reop_search(self, args)
if
(
result
<
-
1
)
{
/* Failure like stack overflow */
if
(
!
PyErr_Occurred
())
PyErr_SetString
(
ReopError
,
"match failure"
);
return
NULL
;
}
...
...
@@ -626,7 +653,7 @@ reop__expand(self, args)
{
PyObject
*
results
,
*
match_obj
;
PyObject
*
repl_obj
,
*
newstring
;
char
*
repl
;
unsigned
char
*
repl
;
int
size
,
total_len
,
i
,
start
,
pos
;
if
(
!
PyArg_ParseTuple
(
args
,
"OS"
,
&
match_obj
,
&
repl_obj
))
...
...
@@ -810,7 +837,7 @@ internal_split(args, retain)
reopobject *pattern;
int maxsplit=0, count=0, length, next=0, result;
int match_end=0; /* match_start is defined below */
char *start;
unsigned
char *start;
if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
&maxsplit))
...
...
@@ -911,6 +938,7 @@ static struct PyMethodDef reop_global_methods[] = {
{
"expand_escape"
,
reop_expand_escape
,
1
},
{
"_expand"
,
reop__expand
,
1
},
#if 0
{"_optimize", reop_optimize, 0},
{"split", reop_split, 0},
{"splitx", reop_splitx, 0},
#endif
...
...
@@ -922,8 +950,8 @@ initreop()
{
PyObject
*
m
,
*
d
,
*
k
,
*
v
,
*
o
;
int
i
;
char
*
s
;
char
j
[
2
];
unsigned
char
*
s
;
unsigned
char
j
[
2
];
re_compile_initialize
();
...
...
@@ -936,7 +964,7 @@ initreop()
goto
finally
;
/* Initialize reop.casefold constant */
if
(
!
(
v
=
PyString_FromStringAndSize
((
char
*
)
NULL
,
256
)))
if
(
!
(
v
=
PyString_FromStringAndSize
((
unsigned
char
*
)
NULL
,
256
)))
goto
finally
;
if
(
!
(
s
=
PyString_AsString
(
v
)))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment