Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
95e8053a
Commit
95e8053a
authored
Aug 13, 1997
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1.5a3 prerelease 1 from AMK
parent
a74ef66a
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
226 additions
and
114 deletions
+226
-114
Lib/re.py
Lib/re.py
+25
-10
Lib/test/re_tests.py
Lib/test/re_tests.py
+13
-2
Lib/test/regex_tests.py
Lib/test/regex_tests.py
+7
-1
Lib/test/test_re.py
Lib/test/test_re.py
+12
-1
Modules/regexmodule.c
Modules/regexmodule.c
+8
-4
Modules/regexpr.c
Modules/regexpr.c
+109
-60
Modules/regexpr.h
Modules/regexpr.h
+13
-25
Modules/reopmodule.c
Modules/reopmodule.c
+39
-11
No files found.
Lib/re.py
View file @
95e8053a
...
...
@@ -317,10 +317,19 @@ class Eol(Instruction):
class
Set
(
Instruction
):
name
=
'set'
def
__init__
(
self
,
set
):
def
__init__
(
self
,
set
,
flags
=
0
):
self
.
set
=
set
Instruction
.
__init__
(
self
,
chr
(
3
),
33
)
if
flags
&
IGNORECASE
:
self
.
set
=
map
(
string
.
lower
,
self
.
set
)
if
len
(
set
)
==
1
:
# If only one element, use the "exact" opcode (it'll be faster)
Instruction
.
__init__
(
self
,
chr
(
4
),
2
)
else
:
# Use the "set" opcode
Instruction
.
__init__
(
self
,
chr
(
3
),
33
)
def
assemble
(
self
,
position
,
labels
):
if
len
(
self
.
set
)
==
1
:
# If only one character in set, generate an "exact" opcode
return
self
.
opcode
+
self
.
set
[
0
]
result
=
self
.
opcode
temp
=
0
for
i
,
c
in
map
(
lambda
x
:
(
x
,
chr
(
x
)),
range
(
256
)):
...
...
@@ -333,14 +342,16 @@ class Set(Instruction):
def
__repr__
(
self
):
result
=
'%-15s'
%
(
self
.
name
)
self
.
set
.
sort
()
# XXX this should print more intelligently
for
char
in
self
.
set
:
result
=
result
+
char
return
result
class
Exact
(
Instruction
):
name
=
'exact'
def
__init__
(
self
,
char
):
def
__init__
(
self
,
char
,
flags
):
self
.
char
=
char
if
flags
&
IGNORECASE
:
self
.
char
=
string
.
lower
(
self
.
char
)
Instruction
.
__init__
(
self
,
chr
(
4
),
2
)
def
assemble
(
self
,
position
,
labels
):
return
self
.
opcode
+
self
.
char
...
...
@@ -881,7 +892,7 @@ def compile(pattern, flags=0):
escape_type
,
value
,
index
=
expand_escape
(
pattern
,
index
)
if
escape_type
==
CHAR
:
stack
.
append
([
Exact
(
value
)])
stack
.
append
([
Exact
(
value
,
flags
)])
lastop
=
'
\
\
'
+
value
elif
escape_type
==
MEMORY_REFERENCE
:
...
...
@@ -1306,7 +1317,7 @@ def compile(pattern, flags=0):
elif
char
==
'.'
:
if
flags
&
DOTALL
:
stack
.
append
([
Set
(
map
(
chr
,
range
(
256
)))])
stack
.
append
([
Set
(
map
(
chr
,
range
(
256
))
,
flags
)])
else
:
stack
.
append
([
AnyChar
()])
lastop
=
'.'
...
...
@@ -1336,12 +1347,12 @@ def compile(pattern, flags=0):
index
=
end
+
1
# do not change lastop
else
:
stack
.
append
([
Exact
(
char
)])
stack
.
append
([
Exact
(
char
,
flags
)])
lastop
=
'#'
elif
char
in
string
.
whitespace
:
if
not
(
flags
&
VERBOSE
):
stack
.
append
([
Exact
(
char
)])
stack
.
append
([
Exact
(
char
,
flags
)])
lastop
=
char
elif
char
==
'['
:
...
...
@@ -1449,22 +1460,25 @@ def compile(pattern, flags=0):
index
=
index
+
1
if
negate
:
# If case is being ignored, then both upper- and lowercase
# versions of the letters must be excluded.
if
flags
&
IGNORECASE
:
set
=
set
+
map
(
string
.
upper
,
set
)
notset
=
[]
for
char
in
map
(
chr
,
range
(
256
)):
if
char
not
in
set
:
notset
.
append
(
char
)
if
len
(
notset
)
==
0
:
raise
error
,
'empty negated set'
stack
.
append
([
Set
(
notset
)])
stack
.
append
([
Set
(
notset
,
flags
)])
else
:
if
len
(
set
)
==
0
:
raise
error
,
'empty set'
stack
.
append
([
Set
(
set
)])
stack
.
append
([
Set
(
set
,
flags
)])
lastop
=
'[]'
else
:
stack
.
append
([
Exact
(
char
)])
stack
.
append
([
Exact
(
char
,
flags
)])
lastop
=
char
code
=
[]
...
...
@@ -1485,6 +1499,7 @@ def compile(pattern, flags=0):
code
.
append
(
Label
(
label
))
label
=
label
+
1
code
.
append
(
End
())
# print code
return
RegexObject
(
pattern
,
flags
,
code
,
register
,
groupindex
)
# Replace expand_escape and _expand functions with their C equivalents.
...
...
Lib/test/re_tests.py
View file @
95e8053a
...
...
@@ -318,6 +318,7 @@ tests = [
# ('((((((((((a))))))))))\\41', 'aa', FAIL),
# ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
(
'((((((((((a))))))))))
\
\
41'
,
''
,
SYNTAX_ERROR
),
(
'(?i)((((((((((a))))))))))
\
\
41'
,
''
,
SYNTAX_ERROR
),
(
'(((((((((a)))))))))'
,
'a'
,
SUCCEED
,
'found'
,
'a'
),
(
'multiple words of text'
,
'uh-uh'
,
FAIL
),
(
'multiple words'
,
'multiple words, yeah'
,
SUCCEED
,
'found'
,
'multiple words'
),
...
...
@@ -448,7 +449,6 @@ tests = [
(
'(?i)((((((((((a))))))))))
\
\
10'
,
'AA'
,
SUCCEED
,
'found'
,
'AA'
),
#('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
#('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
(
'(?i)((((((((((a))))))))))
\
\
41'
,
''
,
SYNTAX_ERROR
),
(
'(?i)(((((((((a)))))))))'
,
'A'
,
SUCCEED
,
'found'
,
'A'
),
(
'(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))'
,
'A'
,
SUCCEED
,
'g1'
,
'A'
),
(
'(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))'
,
'C'
,
SUCCEED
,
'g1'
,
'C'
),
...
...
@@ -506,10 +506,21 @@ xyzabc
(
'a.b'
,
'a
\
n
b'
,
FAIL
),
(
'(?s)a.b'
,
'a
\
n
b'
,
SUCCEED
,
'found'
,
'a
\
n
b'
),
# test \w, etc.
# test \w, etc.
both inside and outside character classes
(
'
\
\
w+'
,
'--ab_cd0123--'
,
SUCCEED
,
'found'
,
'ab_cd0123'
),
(
'[
\
\
w]+'
,
'--ab_cd0123--'
,
SUCCEED
,
'found'
,
'ab_cd0123'
),
(
'
\
\
D+'
,
'1234abc5678'
,
SUCCEED
,
'found'
,
'abc'
),
(
'[
\
\
D]+'
,
'1234abc5678'
,
SUCCEED
,
'found'
,
'abc'
),
(
'[
\
\
da-fA-F]+'
,
'123abc'
,
SUCCEED
,
'found'
,
'123abc'
),
(
'[
\
\
d-x]'
,
'-'
,
SYNTAX_ERROR
),
(
r'([\
s]*)([
\S]*)([\
s]*)
', '
testing
!
1972
', SUCCEED, '
g3
+
g2
+
g1
', '
testing
!
1972
'),
(r'
(
\
s
*
)(
\
S
*
)(
\
s
*
)
', '
testing
!
1972
', SUCCEED, '
g3
+
g2
+
g1
', '
testing
!
1972
'),
(r'
\
xff
', '
\
377
', SUCCEED, '
found
', chr(255)),
(r'
\
x00ff
', '
\
377
', SUCCEED, '
found
', chr(255)),
(r'
\
t
\
n
\
v
\
r
\
f
\
a
\
g
', '
\
t
\
n
\
v
\
r
\
f
\
ag
', SUCCEED, '
found
', '
\
t
\
n
\
v
\
r
\
f
\
ag
'),
('
\
t
\
n
\
v
\
r
\
f
\
a
\
g
', '
\
t
\
n
\
v
\
r
\
f
\
ag
', SUCCEED, '
found
', '
\
t
\
n
\
v
\
r
\
f
\
ag
'),
(r'
\
t
\
n
\
v
\
r
\
f
\
a
', '
\
t
\
n
\
v
\
r
\
f
\
a
', SUCCEED, '
found
', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
(r'
[
\
t
][
\
n
][
\
v
][
\
r
][
\
f
][
\
a
][
\
A
][
\
b
][
\
B
][
\
Z
][
\
g
]
', '
\
t
\
n
\
v
\
r
\
f
\
aA
\
bBZg
', SUCCEED, '
found
', '
\
t
\
n
\
v
\
r
\
f
\
aA
\
bBZg
'),
]
Lib/test/regex_tests.py
View file @
95e8053a
...
...
@@ -278,6 +278,12 @@ tests = [
(
'
\
\
([xyz]*
\
\
)x'
,
'abcx'
,
SUCCEED
,
'found+"-"+g1'
,
'x-'
),
(
'
\
\
(a
\
\
)+b
\
\
|aac'
,
'aac'
,
SUCCEED
,
'found+"-"+g1'
,
'aac-None'
)
'found+"-"+g1'
,
'aac-None'
),
(
'
\
<
a
'
,
'a'
,
SUCCEED
,
'found'
,
'a'
),
(
'
\
<
a
'
,
'!'
,
FAIL
),
(
'a
\
<
b
'
,
'ab'
,
FAIL
),
(
'a
\
>
'
, '
ab
', FAIL),
('
a
\
>
', '
a
!
', SUCCEED, '
found
', '
a
'),
('
a
\
>
', '
a
', SUCCEED, '
found
', '
a
'),
]
Lib/test/test_re.py
View file @
95e8053a
...
...
@@ -31,6 +31,10 @@ try:
assert re.sub('
(
?
P
<
a
>
x
)
', '
\
g
<
a
>
\
g
<
a
>
', '
xx
') == '
xxxx
'
assert re.sub('
a
', r'
\
t
\
n
\
v
\
r
\
f
\
a
\
b
\
B
\
Z
\
a
\
A
\
w
\
W
\
s
\
S
\
d
\
D
', '
a
') == '
\
t
\
n
\
v
\
r
\
f
\
a
\
bBZ
\
aAwWsSdD
'
assert re.sub('
a
', '
\
t
\
n
\
v
\
r
\
f
\
a
', '
a
') == '
\
t
\
n
\
v
\
r
\
f
\
a
'
assert re.sub('
a
', '
\
t
\
n
\
v
\
r
\
f
\
a
', '
a
') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))
except AssertionError:
raise TestFailed, "re.sub"
...
...
@@ -120,7 +124,6 @@ if verbose:
print '
Running
re_tests
test
suite
'
for t in tests:
print t
sys.stdout.flush()
pattern=s=outcome=repl=expected=None
if len(t)==5:
...
...
@@ -136,6 +139,7 @@ for t in tests:
if outcome==SYNTAX_ERROR: pass # Expected a syntax error
else:
print '
===
Syntax
error
:
', t
except KeyboardInterrupt: raise KeyboardInterrupt
except:
print '
***
Unexpected
error
***
'
if verbose:
...
...
@@ -182,3 +186,10 @@ for t in tests:
print repr(repl)+'
should
be
'+repr(expected)
else:
print '
===
Failed
incorrectly
', t
# Try the match with IGNORECASE enabled, and check that it
# still succeeds.
obj=re.compile(pattern, re.IGNORECASE)
result=obj.search(s)
if result==None:
print '
===
Fails
on
case
-
insensitive
match
', t
Modules/regexmodule.c
View file @
95e8053a
...
...
@@ -132,8 +132,10 @@ regobj_match(re, args)
re
->
re_lastok
=
NULL
;
result
=
re_match
(
&
re
->
re_patbuf
,
buffer
,
size
,
offset
,
&
re
->
re_regs
);
if
(
result
<
-
1
)
{
/* Failure like stack overflow */
PyErr_SetString
(
RegexError
,
"match failure"
);
/* Serious failure of some sort; if re_match didn't
set an exception, raise a generic error */
if
(
!
PyErr_Occurred
())
PyErr_SetString
(
RegexError
,
"match failure"
);
return
NULL
;
}
if
(
result
>=
0
)
{
...
...
@@ -174,8 +176,10 @@ regobj_search(re, args)
result
=
re_search
(
&
re
->
re_patbuf
,
buffer
,
size
,
offset
,
range
,
&
re
->
re_regs
);
if
(
result
<
-
1
)
{
/* Failure like stack overflow */
PyErr_SetString
(
RegexError
,
"match failure"
);
/* Serious failure of some sort; if re_match didn't
set an exception, raise a generic error */
if
(
!
PyErr_Occurred
())
PyErr_SetString
(
RegexError
,
"match failure"
);
return
NULL
;
}
if
(
result
>=
0
)
{
...
...
Modules/regexpr.c
View file @
95e8053a
This diff is collapsed.
Click to expand it.
Modules/regexpr.h
View file @
95e8053a
...
...
@@ -33,16 +33,16 @@ extern "C" {
typedef
struct
re_pattern_buffer
{
char
*
buffer
;
/* compiled pattern */
unsigned
char
*
buffer
;
/* compiled pattern */
int
allocated
;
/* allocated size of compiled pattern */
int
used
;
/* actual length of compiled pattern */
char
*
fastmap
;
/* fastmap[ch] is true if ch can start pattern */
char
*
translate
;
/* translation to apply during compilation/matching */
char
fastmap_accurate
;
/* true if fastmap is valid */
char
can_be_null
;
/* true if can match empty string */
char
uses_registers
;
/* registers are used and need to be initialized */
unsigned
char
*
fastmap
;
/* fastmap[ch] is true if ch can start pattern */
unsigned
char
*
translate
;
/* translation to apply during compilation/matching */
unsigned
char
fastmap_accurate
;
/* true if fastmap is valid */
unsigned
char
can_be_null
;
/* true if can match empty string */
unsigned
char
uses_registers
;
/* registers are used and need to be initialized */
int
num_registers
;
/* number of registers used */
char
anchor
;
/* anchor: 0=none 1=begline 2=begbuf */
unsigned
char
anchor
;
/* anchor: 0=none 1=begline 2=begbuf */
}
*
regexp_t
;
typedef
struct
re_registers
...
...
@@ -93,7 +93,7 @@ extern int re_syntax;
/* This is the actual syntax mask. It was added so that Python could do
* syntax-dependent munging of patterns before compilation. */
extern
char
re_syntax_table
[
256
];
extern
unsigned
char
re_syntax_table
[
256
];
void
re_compile_initialize
(
void
);
...
...
@@ -101,7 +101,7 @@ int re_set_syntax(int syntax);
/* This sets the syntax to use and returns the previous syntax. The
* syntax is specified by a bit mask of the above defined bits. */
char
*
re_compile_pattern
(
char
*
regex
,
int
regex_size
,
regexp_t
compiled
);
unsigned
char
*
re_compile_pattern
(
unsigned
char
*
regex
,
int
regex_size
,
regexp_t
compiled
);
/* This compiles the regexp (given in regex and length in regex_size).
* This returns NULL if the regexp compiled successfully, and an error
* message if an error was encountered. The buffer field must be
...
...
@@ -110,14 +110,14 @@ char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
* buffer is NULL). Also, the translate field must be set to point to a
* valid translation table, or NULL if it is not used. */
int
re_match
(
regexp_t
compiled
,
char
*
string
,
int
size
,
int
pos
,
int
re_match
(
regexp_t
compiled
,
unsigned
char
*
string
,
int
size
,
int
pos
,
regexp_registers_t
old_regs
);
/* This tries to match the regexp against the string. This returns the
* length of the matched portion, or -1 if the pattern could not be
* matched and -2 if an error (such as failure stack overflow) is
* encountered. */
int
re_search
(
regexp_t
compiled
,
char
*
string
,
int
size
,
int
startpos
,
int
re_search
(
regexp_t
compiled
,
unsigned
char
*
string
,
int
size
,
int
startpos
,
int
range
,
regexp_registers_t
regs
);
/* This rearches for a substring matching the regexp. This returns the
* first index at which a match is found. range specifies at how many
...
...
@@ -132,28 +132,16 @@ void re_compile_fastmap(regexp_t compiled);
* the calling program must have initialized the fastmap field to point
* to an array of 256 characters. */
char
*
re_comp
(
char
*
s
);
/* BSD 4.2 regex library routine re_comp. This compiles the regexp into
* an internal buffer. This returns NULL if the regexp was compiled
* successfully, and an error message if there was an error. */
int
re_exec
(
char
*
s
);
/* BSD 4.2 regexp library routine re_exec. This returns true if the
* string matches the regular expression (that is, a matching part is
* found anywhere in the string). */
#else
/* HAVE_PROTOTYPES */
extern
int
re_syntax
;
extern
char
re_syntax_table
[
256
];
extern
unsigned
char
re_syntax_table
[
256
];
void
re_compile_initialize
();
int
re_set_syntax
();
char
*
re_compile_pattern
();
unsigned
char
*
re_compile_pattern
();
int
re_match
();
int
re_search
();
void
re_compile_fastmap
();
char
*
re_comp
();
int
re_exec
();
#endif
/* HAVE_PROTOTYPES */
...
...
Modules/reopmodule.c
View file @
95e8053a
...
...
@@ -62,7 +62,7 @@ static PyObject *ReopError; /* Exception */
#define BEGINNING_OF_BUFFER 7
#define END_OF_BUFFER 8
static
char
*
reop_casefold
;
static
unsigned
char
*
reop_casefold
;
static
PyObject
*
makeresult
(
regs
,
num_regs
)
...
...
@@ -105,7 +105,7 @@ reop_match(self, args)
PyObject
*
self
;
PyObject
*
args
;
{
char
*
string
;
unsigned
char
*
string
;
int
fastmaplen
,
stringlen
;
int
can_be_null
,
anchor
,
i
;
int
flags
,
pos
,
result
;
...
...
@@ -163,8 +163,8 @@ reop_match(self, args)
if
(
result
<
-
1
)
{
/* Failure like stack overflow */
PyErr_SetString
(
ReopError
,
"match failure"
);
if
(
!
PyErr_Occurred
())
PyErr_SetString
(
ReopError
,
"match failure"
);
return
NULL
;
}
if
(
result
==
-
1
)
{
...
...
@@ -174,12 +174,38 @@ reop_match(self, args)
return
makeresult
(
&
re_regs
,
bufp
.
num_registers
);
}
#if 0
static PyObject *
reop_optimize(self, args)
PyObject *self;
PyObject *args;
{
unsigned char *buffer;
int buflen;
struct re_pattern_buffer bufp;
PyObject *opt_code;
if (!PyArg_Parse(args, "(s#)", &buffer, &buflen)) return NULL;
/* Create a new string for the optimized code */
opt_code=PyString_FromStringAndSize(buffer, buflen);
if (opt_code!=NULL)
{
bufp.buffer = PyString_AsString(opt_code);
bufp.used=bufp.allocated=buflen;
}
return opt_code;
}
#endif
static
PyObject
*
reop_search
(
self
,
args
)
PyObject
*
self
;
PyObject
*
args
;
{
char
*
string
;
unsigned
char
*
string
;
int
fastmaplen
,
stringlen
;
int
can_be_null
,
anchor
,
i
;
int
flags
,
pos
,
result
;
...
...
@@ -237,7 +263,8 @@ reop_search(self, args)
if
(
result
<
-
1
)
{
/* Failure like stack overflow */
PyErr_SetString
(
ReopError
,
"match failure"
);
if
(
!
PyErr_Occurred
())
PyErr_SetString
(
ReopError
,
"match failure"
);
return
NULL
;
}
...
...
@@ -626,7 +653,7 @@ reop__expand(self, args)
{
PyObject
*
results
,
*
match_obj
;
PyObject
*
repl_obj
,
*
newstring
;
char
*
repl
;
unsigned
char
*
repl
;
int
size
,
total_len
,
i
,
start
,
pos
;
if
(
!
PyArg_ParseTuple
(
args
,
"OS"
,
&
match_obj
,
&
repl_obj
))
...
...
@@ -810,7 +837,7 @@ internal_split(args, retain)
reopobject *pattern;
int maxsplit=0, count=0, length, next=0, result;
int match_end=0; /* match_start is defined below */
char *start;
unsigned
char *start;
if (!PyArg_ParseTuple(args, "s#Oi", &start, &length, &pattern,
&maxsplit))
...
...
@@ -911,6 +938,7 @@ static struct PyMethodDef reop_global_methods[] = {
{
"expand_escape"
,
reop_expand_escape
,
1
},
{
"_expand"
,
reop__expand
,
1
},
#if 0
{"_optimize", reop_optimize, 0},
{"split", reop_split, 0},
{"splitx", reop_splitx, 0},
#endif
...
...
@@ -922,8 +950,8 @@ initreop()
{
PyObject
*
m
,
*
d
,
*
k
,
*
v
,
*
o
;
int
i
;
char
*
s
;
char
j
[
2
];
unsigned
char
*
s
;
unsigned
char
j
[
2
];
re_compile_initialize
();
...
...
@@ -936,7 +964,7 @@ initreop()
goto
finally
;
/* Initialize reop.casefold constant */
if
(
!
(
v
=
PyString_FromStringAndSize
((
char
*
)
NULL
,
256
)))
if
(
!
(
v
=
PyString_FromStringAndSize
((
unsigned
char
*
)
NULL
,
256
)))
goto
finally
;
if
(
!
(
s
=
PyString_AsString
(
v
)))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment