Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
4217bb3c
Commit
4217bb3c
authored
Jul 10, 1997
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Straight from Jeffrey Ollie's web page.
parent
cd934069
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
1137 additions
and
0 deletions
+1137
-0
Lib/re.py
Lib/re.py
+1137
-0
No files found.
Lib/re.py
0 → 100644
View file @
4217bb3c
#!/usr/bin/env python
# -*- mode: python -*-
# $Id$
import
string
import
reop
error
=
're error'
# compilation flags
IGNORECASE
=
I
=
0x01
MULTILINE
=
M
=
0x02
DOTALL
=
S
=
0x04
VERBOSE
=
X
=
0x08
#
# Initialize syntax table. This information should really come from the
# syntax table in regexpr.c rather than being duplicated here.
#
syntax_table
=
{}
for
char
in
map
(
chr
,
range
(
0
,
256
)):
syntax_table
[
char
]
=
[]
for
char
in
string
.
lowercase
:
syntax_table
[
char
].
append
(
'word'
)
for
char
in
string
.
uppercase
:
syntax_table
[
char
].
append
(
'word'
)
for
char
in
string
.
digits
:
syntax_table
[
char
].
append
(
'word'
)
syntax_table
[
char
].
append
(
'digit'
)
for
char
in
string
.
whitespace
:
syntax_table
[
char
].
append
(
'whitespace'
)
syntax_table
[
'_'
].
append
(
'word'
)
#
#
#
def
match
(
pattern
,
string
,
flags
=
0
):
return
compile
(
pattern
,
flags
).
match
(
string
)
def
search
(
pattern
,
string
,
flags
=
0
):
return
compile
(
pattern
,
flags
).
search
(
string
)
def
sub
(
pattern
,
repl
,
string
,
count
=
0
):
pass
def
subn
(
pattern
,
repl
,
string
,
count
=
0
):
pass
def
split
(
string
,
pattern
,
maxsplit
=
0
):
pass
#
#
#
class
RegexObject
:
def
__init__
(
self
,
pattern
,
flags
,
code
,
num_regs
,
groupindex
,
callouts
):
self
.
code
=
code
self
.
num_regs
=
num_regs
self
.
flags
=
flags
self
.
pattern
=
pattern
self
.
groupindex
=
groupindex
self
.
callouts
=
callouts
self
.
fastmap
=
build_fastmap
(
code
)
if
code
[
0
].
name
==
'bol'
:
self
.
anchor
=
1
elif
code
[
0
].
name
==
'begbuf'
:
self
.
anchor
=
2
else
:
self
.
anchor
=
0
self
.
buffer
=
assemble
(
code
)
def
match
(
self
,
string
,
pos
=
0
):
regs
=
reop
.
match
(
self
.
buffer
,
self
.
num_regs
,
self
.
flags
,
self
.
fastmap
.
can_be_null
,
self
.
fastmap
.
fastmap
(),
self
.
anchor
,
string
,
pos
)
if
regs
is
None
:
return
None
return
MatchObject
(
self
,
string
,
pos
,
regs
)
def
search
(
self
,
string
,
pos
=
0
):
regs
=
reop
.
search
(
self
.
buffer
,
self
.
num_regs
,
self
.
flags
,
self
.
fastmap
.
can_be_null
,
self
.
fastmap
.
fastmap
(),
self
.
anchor
,
string
,
pos
)
if
regs
is
None
:
return
None
return
MatchObject
(
self
,
string
,
pos
,
regs
)
class
MatchObject
:
def
__init__
(
self
,
re
,
string
,
pos
,
regs
):
self
.
re
=
re
self
.
string
=
string
self
.
pos
=
pos
self
.
regs
=
regs
def
start
(
self
,
i
):
if
type
(
i
)
==
type
(
''
):
try
:
i
=
self
.
re
.
groupindex
[
i
]
except
(
KeyError
,
TypeError
):
raise
IndexError
return
self
.
regs
[
i
][
0
]
def
end
(
self
,
i
):
if
type
(
i
)
==
type
(
''
):
try
:
i
=
self
.
re
.
groupindex
[
i
]
except
(
KeyError
,
TypeError
):
raise
IndexError
return
self
.
regs
[
i
][
1
]
def
span
(
self
,
i
):
if
type
(
i
)
==
type
(
''
):
try
:
i
=
self
.
re
.
groupindex
[
i
]
except
(
KeyError
,
TypeError
):
raise
IndexError
return
self
.
regs
[
i
]
def
group
(
i
):
if
type
(
i
)
==
type
(
''
):
try
:
i
=
self
.
re
.
groupindex
[
i
]
except
(
KeyError
,
TypeError
):
raise
IndexError
return
self
.
string
[
self
.
regs
[
i
][
0
]:
self
.
regs
[
i
][
1
]]
#
# A set of classes to make assembly a bit easier, if a bit verbose.
#
class
Instruction
:
def
__init__
(
self
,
opcode
,
size
=
1
):
self
.
opcode
=
opcode
self
.
size
=
size
def
assemble
(
self
,
position
,
labels
):
return
self
.
opcode
def
__repr__
(
self
):
return
'%-15s'
%
(
self
.
name
)
class
FunctionCallout
(
Instruction
):
name
=
'function'
def
__init__
(
self
,
function
):
self
.
function
=
function
Instruction
.
__init__
(
self
,
chr
(
22
),
2
+
len
(
self
.
function
))
def
assemble
(
self
,
position
,
labels
):
return
self
.
opcode
+
chr
(
len
(
self
.
function
))
+
self
.
function
def
__repr__
(
self
):
return
'%-15s %-10s'
%
(
self
.
name
,
self
.
function
)
class
End
(
Instruction
):
name
=
'end'
def
__init__
(
self
):
Instruction
.
__init__
(
self
,
chr
(
0
))
class
Bol
(
Instruction
):
name
=
'bol'
def
__init__
(
self
):
self
.
name
=
'bol'
Instruction
.
__init__
(
self
,
chr
(
1
))
class
Eol
(
Instruction
):
name
=
'eol'
def
__init__
(
self
):
Instruction
.
__init__
(
self
,
chr
(
2
))
class
Set
(
Instruction
):
name
=
'set'
def
__init__
(
self
,
set
):
self
.
set
=
set
Instruction
.
__init__
(
self
,
chr
(
3
),
33
)
def
assemble_set
(
self
,
position
,
labels
):
result
=
self
.
opcode
temp
=
0
for
i
,
c
in
map
(
lambda
x
:
(
x
,
chr
(
x
)),
range
(
256
)):
if
c
in
self
.
set
[
2
]:
temp
=
temp
|
(
1
<<
(
i
&
7
))
if
(
i
%
8
)
==
7
:
result
=
result
+
chr
(
temp
)
temp
=
0
return
result
def
__repr__
(
self
):
result
=
'%-15s'
%
(
self
.
name
)
self
.
set
.
sort
()
for
char
in
self
.
set
:
result
=
result
+
`char`
return
result
class
Exact
(
Instruction
):
name
=
'exact'
def
__init__
(
self
,
char
):
self
.
char
=
char
Instruction
.
__init__
(
self
,
chr
(
4
),
2
)
def
assemble
(
self
,
position
,
labels
):
return
self
.
opcode
+
self
.
char
def
__repr__
(
self
):
return
'%-15s %s'
%
(
self
.
name
,
`self.char`
)
class
AnyChar
(
Instruction
):
name
=
'anychar'
def
__init__
(
self
):
Instruction
.
__init__
(
self
,
chr
(
5
))
def
assemble
(
self
,
position
,
labels
):
return
self
.
opcode
class
MemoryInstruction
(
Instruction
):
def
__init__
(
self
,
opcode
,
register
):
self
.
register
=
register
Instruction
.
__init__
(
self
,
opcode
,
2
)
def
assemble
(
self
,
position
,
labels
):
return
self
.
opcode
+
chr
(
self
.
register
)
def
__repr__
(
self
):
return
'%-15s %i'
%
(
self
.
name
,
self
.
register
)
class
StartMemory
(
MemoryInstruction
):
name
=
'start_memory'
def
__init__
(
self
,
register
):
MemoryInstruction
.
__init__
(
self
,
chr
(
6
),
register
)
class
EndMemory
(
MemoryInstruction
):
name
=
'end_memory'
def
__init__
(
self
,
register
):
MemoryInstruction
.
__init__
(
self
,
chr
(
7
),
register
)
class
MatchMemory
(
MemoryInstruction
):
name
=
'match_memory'
def
__init__
(
self
,
register
):
MemoryInstruction
.
__init__
(
self
,
chr
(
8
),
register
)
class
JumpInstruction
(
Instruction
):
def
__init__
(
self
,
opcode
,
label
):
self
.
label
=
label
Instruction
.
__init__
(
self
,
opcode
,
3
)
def
compute_offset
(
self
,
start
,
dest
):
return
dest
-
(
start
+
3
)
def
pack_offset
(
self
,
offset
):
if
offset
>
32767
:
raise
error
,
'offset out of range (pos)'
elif
offset
<
-
32768
:
raise
error
,
'offset out of range (neg)'
elif
offset
<
0
:
offset
=
offset
+
65536
return
chr
(
offset
&
0xff
)
+
chr
((
offset
>>
8
)
&
0xff
)
def
assemble
(
self
,
position
,
labels
):
return
self
.
opcode
+
\
self
.
pack_offset
(
self
.
compute_offset
(
position
,
labels
[
self
.
label
]))
def
__repr__
(
self
):
return
'%-15s %i'
%
(
self
.
name
,
self
.
label
)
class
Jump
(
JumpInstruction
):
name
=
'jump'
def
__init__
(
self
,
label
):
JumpInstruction
.
__init__
(
self
,
chr
(
9
),
label
)
class
StarJump
(
JumpInstruction
):
name
=
'star_jump'
def
__init__
(
self
,
label
):
JumpInstruction
.
__init__
(
self
,
chr
(
10
),
label
)
class
FailureJump
(
JumpInstruction
):
name
=
'failure_jump'
def
__init__
(
self
,
label
):
JumpInstruction
.
__init__
(
self
,
chr
(
11
),
label
)
class
UpdateFailureJump
(
JumpInstruction
):
name
=
'update_failure_jump'
def
__init__
(
self
,
label
):
JumpInstruction
.
__init__
(
self
,
chr
(
12
),
label
)
class
DummyFailureJump
(
JumpInstruction
):
name
=
'update_failure_jump'
def
__init__
(
self
,
label
):
JumpInstruction
.
__init__
(
self
,
chr
(
13
),
label
)
class
BegBuf
(
Instruction
):
name
=
'begbuf'
def
__init__
(
self
):
Instruction
.
__init__
(
self
,
chr
(
14
))
class
EndBuf
(
Instruction
):
name
=
'endbuf'
def
__init__
(
self
):
Instruction
.
__init__
(
self
,
chr
(
15
))
class
WordBeg
(
Instruction
):
name
=
'wordbeg'
def
__init__
(
self
):
Instruction
.
__init__
(
self
,
chr
(
16
))
class
WordEnd
(
Instruction
):
name
=
'wordend'
def
__init__
(
self
):
Instruction
.
__init__
(
self
,
chr
(
17
))
class
WordBound
(
Instruction
):
name
=
'wordbound'
def
__init__
(
self
):
Instruction
.
__init__
(
self
,
chr
(
18
))
class
NotWordBound
(
Instruction
):
name
=
'notwordbound'
def
__init__
(
self
):
Instruction
.
__init__
(
self
,
chr
(
18
))
class
SyntaxSpec
(
Instruction
):
name
=
'syntaxspec'
def
__init__
(
self
,
syntax
):
self
.
syntax
=
syntax
Instruction
.
__init__
(
self
,
chr
(
20
),
2
)
def
assemble
(
self
,
postition
,
labels
):
return
self
.
opcode
+
chr
(
self
.
syntax
)
class
SyntaxSpec
(
Instruction
):
name
=
'notsyntaxspec'
def
__init__
(
self
,
syntax
):
self
.
syntax
=
syntax
Instruction
.
__init__
(
self
,
chr
(
21
),
2
)
def
assemble
(
self
,
postition
,
labels
):
return
self
.
opcode
+
chr
(
self
.
syntax
)
class
Label
(
Instruction
):
name
=
'label'
def
__init__
(
self
,
label
):
self
.
label
=
label
Instruction
.
__init__
(
self
,
''
,
0
)
def
__repr__
(
self
):
return
'%-15s %i'
%
(
self
.
name
,
self
.
label
)
class
OpenParen
(
Instruction
):
name
=
'('
def
__init__
(
self
,
register
):
self
.
register
=
register
Instruction
.
__init__
(
self
,
''
,
0
)
class
Alternation
(
Instruction
):
name
=
'|'
def
__init__
(
self
):
Instruction
.
__init__
(
self
,
''
,
0
)
#
#
#
def
assemble
(
instructions
):
labels
=
{}
position
=
0
pass1
=
[]
for
instruction
in
instructions
:
if
instruction
.
name
==
'label'
:
labels
[
instruction
.
label
]
=
position
else
:
pass1
.
append
((
position
,
instruction
))
position
=
position
+
instruction
.
size
pass2
=
''
for
position
,
instruction
in
pass1
:
pass2
=
pass2
+
instruction
.
assemble
(
position
,
labels
)
return
pass2
#
#
#
def
escape
(
pattern
):
result
=
''
for
char
in
pattern
:
if
'word'
not
in
syntax_table
[
char
]:
result
=
result
+
'
\
\
'
+
char
else
:
result
=
result
+
char
return
result
#
#
#
def
registers_used
(
instructions
):
result
=
[]
for
instruction
in
instructions
:
if
(
instruction
.
name
in
[
'set_memory'
,
'end_memory'
])
and
\
(
instruction
.
register
not
in
result
):
result
.
append
(
instruction
.
register
)
return
result
#
#
#
class
Fastmap
:
def
__init__
(
self
):
self
.
map
=
[
'
\
000
'
]
*
256
self
.
can_be_null
=
0
def
add
(
self
,
char
):
self
.
map
[
ord
(
char
)]
=
'
\
001
'
def
fastmap
(
self
):
return
string
.
join
(
self
.
map
,
''
)
def
__getitem__
(
self
,
char
):
return
ord
(
self
.
map
[
ord
(
char
)])
def
__repr__
(
self
):
self
.
map
.
sort
()
return
'Fastmap('
+
`self.can_be_null`
+
', '
+
`self.map`
+
')'
#
#
#
def
find_label
(
code
,
label
):
line
=
0
for
instruction
in
code
:
if
(
instruction
.
name
==
'label'
)
and
(
instruction
.
label
==
label
):
return
line
+
1
line
=
line
+
1
def
build_fastmap_aux
(
code
,
pos
,
visited
,
fastmap
):
if
visited
[
pos
]:
return
while
1
:
instruction
=
code
[
pos
]
visited
[
pos
]
=
1
pos
=
pos
+
1
if
instruction
.
name
==
'end'
:
fastmap
.
can_be_null
=
1
return
elif
instruction
.
name
==
'syntaxspec'
:
for
char
in
map
(
chr
,
range
(
256
)):
if
instruction
.
syntax
in
syntax_table
[
char
]:
fastmap
.
add
(
char
)
return
elif
instruction
.
name
==
'notsyntaxspec'
:
for
char
in
map
(
chr
,
range
(
256
)):
if
instruction
.
syntax
not
in
syntax_table
[
char
]:
fastmap
.
add
(
char
)
return
elif
instruction
.
name
==
'eol'
:
fastmap
.
add
(
'
\
n
'
)
if
fastmap
.
can_be_null
==
0
:
fastmap
.
can_be_null
=
2
return
elif
instruction
.
name
==
'set'
:
for
char
in
instruction
.
set
:
fastmap
.
add
(
char
)
return
elif
instruction
.
name
==
'exact'
:
fastmap
.
add
(
instruction
.
char
)
elif
instruction
.
name
==
'anychar'
:
for
char
in
map
(
chr
,
range
(
256
)):
if
char
!=
'
\
n
'
:
fastmap
.
add
(
char
)
return
elif
instruction
.
name
==
'match_memory'
:
for
char
in
map
(
chr
,
range
(
256
)):
fastmap
.
add
(
char
)
fastmap
.
can_be_null
=
1
return
elif
instruction
.
name
in
[
'jump'
,
'dummy_failure_jump'
,
\
'update_failure_jump'
,
'star_jump'
]:
pos
=
find_label
(
code
,
instruction
.
label
)
if
visited
[
pos
]:
return
visited
[
pos
]
=
1
elif
instruction
.
name
==
'failure_jump'
:
build_fastmap_aux
(
code
,
find_label
(
code
,
instruction
.
label
),
visited
,
fastmap
)
elif
instruction
.
name
==
'function'
:
for
char
in
map
(
chr
,
range
(
256
)):
fastmap
.
add
(
char
)
fastmap
.
can_be_null
=
1
return
def
build_fastmap
(
code
,
pos
=
0
):
visited
=
[
0
]
*
len
(
code
)
fastmap
=
Fastmap
()
build_fastmap_aux
(
code
,
pos
,
visited
,
fastmap
)
return
fastmap
#
#
#
def
compile
(
pattern
,
flags
=
0
):
stack
=
[]
index
=
0
label
=
0
register
=
1
groupindex
=
{}
callouts
=
[]
while
(
index
<
len
(
pattern
)):
char
=
pattern
[
index
]
index
=
index
+
1
if
char
==
'
\
\
'
:
if
index
<
len
(
pattern
):
next
=
pattern
[
index
]
index
=
index
+
1
if
next
==
't'
:
stack
.
append
([
Exact
(
chr
(
9
))])
elif
next
==
'n'
:
stack
.
append
([
Exact
(
chr
(
10
))])
elif
next
==
'r'
:
stack
.
append
([
Exact
(
chr
(
13
))])
elif
next
==
'f'
:
stack
.
append
([
Exact
(
chr
(
12
))])
elif
next
==
'a'
:
stack
.
append
([
Exact
(
chr
(
7
))])
elif
next
==
'e'
:
stack
.
append
([
Exact
(
chr
(
27
))])
elif
next
in
'0123456789'
:
value
=
next
while
(
index
<
len
(
pattern
))
and
\
(
pattern
[
index
]
in
string
.
digits
):
value
=
value
+
pattern
[
index
]
index
=
index
+
1
if
(
len
(
value
)
==
3
)
or
\
((
len
(
value
)
==
2
)
and
(
value
[
0
]
==
'0'
)):
value
=
string
.
atoi
(
value
,
8
)
if
value
>
255
:
raise
error
,
'octal char out of range'
stack
.
append
([
Exact
(
chr
(
value
))])
elif
value
==
'0'
:
stack
.
append
([
Exact
(
chr
(
0
))])
elif
len
(
value
)
>
3
:
raise
error
,
'too many digits'
else
:
value
=
string
.
atoi
(
value
)
if
value
>=
register
:
raise
error
,
(
'cannot reference a register '
'not yet used'
)
elif
value
==
0
:
raise
error
,
(
'register 0 cannot be used '
'during match'
)
stack
.
append
([
MatchMemory
(
value
)])
elif
next
==
'x'
:
value
=
''
while
(
index
<
len
(
pattern
))
and
\
(
pattern
[
index
]
in
string
.
hexdigits
):
value
=
value
+
pattern
[
index
]
index
=
index
+
1
value
=
string
.
atoi
(
value
,
16
)
if
value
>
255
:
raise
error
,
'hex char out of range'
stack
.
append
([
Exact
(
chr
(
value
))])
elif
next
==
'c'
:
if
index
>=
len
(
pattern
):
raise
error
,
'
\
\
c at end of re'
elif
pattern
[
index
]
in
'abcdefghijklmnopqrstuvwxyz'
:
stack
.
append
(
Exact
(
chr
(
ord
(
pattern
[
index
])
-
ord
(
'a'
)
+
1
)))
else
:
stack
.
append
(
Exact
(
chr
(
ord
(
pattern
[
index
])
^
64
)))
index
=
index
+
1
elif
next
==
'A'
:
stack
.
append
([
BegBuf
()])
elif
next
==
'Z'
:
stack
.
append
([
EndBuf
()])
elif
next
==
'b'
:
stack
.
append
([
WordBound
()])
elif
next
==
'B'
:
stack
.
append
([
NotWordBound
()])
elif
next
==
'w'
:
stack
.
append
([
SyntaxSpec
(
'word'
)])
elif
next
==
'W'
:
stack
.
append
([
NotSyntaxSpec
(
'word'
)])
elif
next
==
's'
:
stack
.
append
([
SyntaxSpec
(
'whitespace'
)])
elif
next
==
'S'
:
stack
.
append
([
NotSyntaxSpec
(
'whitespace'
)])
elif
next
==
'd'
:
stack
.
append
([
SyntaxSpec
(
'digit'
)])
elif
next
==
'D'
:
stack
.
append
([
NotSyntaxSpec
(
'digit'
)])
elif
next
in
'GluLUQE'
:
# some perl-isms that we don't support
raise
error
,
'
\
\
'
+
next
+
' not supported'
else
:
stack
.
append
([
Exact
(
pattern
[
index
])])
else
:
raise
error
,
'backslash at the end of a string'
elif
char
==
'|'
:
if
len
(
stack
)
==
0
:
raise
error
,
'nothing to alternate'
expr
=
[]
while
(
len
(
stack
)
!=
0
)
and
\
(
stack
[
-
1
][
0
].
name
!=
'('
)
and
\
(
stack
[
-
1
][
0
].
name
!=
'|'
):
expr
=
stack
[
-
1
]
+
expr
del
stack
[
-
1
]
stack
.
append
([
FailureJump
(
label
)]
+
\
expr
+
\
[
Jump
(
-
1
),
Label
(
label
)])
stack
.
append
([(
'|'
,)])
label
=
label
+
1
elif
char
==
'('
:
if
index
>=
len
(
pattern
):
raise
error
,
'no matching close paren'
elif
pattern
[
index
]
==
'?'
:
# Perl style (?...) extensions
index
=
index
+
1
if
index
>=
len
(
pattern
):
raise
error
,
'extension ends prematurely'
elif
pattern
[
index
]
==
'P'
:
# Python extensions
index
=
index
+
1
if
index
>=
len
(
pattern
):
raise
error
,
'extension ends prematurely'
elif
pattern
[
index
]
==
'<'
:
# Handle Python symbolic group names (?<...>...)
index
=
index
+
1
end
=
string
.
find
(
pattern
,
'>'
,
index
)
if
end
==
-
1
:
raise
error
,
'no end to symbolic group name'
name
=
pattern
[
index
:
end
]
# XXX check syntax of name
index
=
end
+
1
groupindex
[
name
]
=
register
stack
.
append
([
OpenParen
(
register
)])
register
=
register
+
1
elif
pattern
[
index
]
==
'='
:
# backreference to symbolic group name
if
index
>=
len
(
pattern
):
raise
error
,
'(?P= at the end of the pattern'
start
=
index
+
1
end
=
string
.
find
(
pattern
,
')'
,
start
)
if
end
==
-
1
:
raise
error
,
'no ) to end symbolic group name'
name
=
pattern
[
start
:
end
]
if
name
not
in
groupindex
:
raise
error
,
(
'symbolic group name '
+
name
+
\
' has not been used yet'
)
stack
.
append
([
MatchMemory
(
groupindex
[
name
])])
index
=
end
+
1
elif
pattern
[
index
]
==
'!'
:
# function callout
if
index
>=
len
(
pattern
):
raise
error
,
'no function callout name'
start
=
index
+
1
end
=
string
.
find
(
pattern
,
')'
,
start
)
if
end
==
-
1
:
raise
error
,
'no ) to end function callout name'
name
=
pattern
[
start
:
end
]
if
name
not
in
callouts
:
raise
error
,
(
'function callout name not listed '
'in callouts dict'
)
stack
.
append
([
FunctionCallout
(
name
)])
else
:
raise
error
,
'unknown Python extension'
elif
pattern
[
index
]
==
':'
:
# grouping, but no registers
index
=
index
+
1
stack
.
append
([(
'('
,
-
1
)])
elif
pattern
[
index
]
==
'#'
:
# comment
index
=
index
+
1
end
=
string
.
find
(
pattern
,
')'
,
index
)
if
end
==
-
1
:
raise
error
,
'no end to comment'
index
=
end
+
1
elif
pattern
[
index
]
==
'='
:
raise
error
,
(
'zero-width positive lookahead '
'assertion is unsupported'
)
elif
pattern
[
index
]
==
'!'
:
raise
error
,
(
'zero-width negative lookahead '
'assertion is unsupported'
)
elif
pattern
[
index
]
in
'iImMsSxX'
:
while
(
index
<
len
(
pattern
))
and
(
pattern
[
index
]
!=
')'
):
if
pattern
[
index
]
==
'iI'
:
flags
=
flags
|
IGNORECASE
elif
pattern
[
index
]
==
'mM'
:
flags
=
flags
|
MULTILINE
elif
pattern
[
index
]
==
'sS'
:
flags
=
flags
|
DOTALL
elif
pattern
[
index
]
in
'xX'
:
flags
=
flags
|
VERBOSE
else
:
raise
error
,
'unknown flag'
index
=
index
+
1
index
=
index
+
1
else
:
raise
error
,
'unknown extension'
else
:
stack
.
append
([
OpenParen
(
register
)])
register
=
register
+
1
elif
char
==
')'
:
# make one expression out of everything on the stack up to
# the marker left by the last parenthesis
expr
=
[]
while
(
len
(
stack
)
>
0
)
and
(
stack
[
-
1
][
0
].
name
!=
'('
):
expr
=
stack
[
-
1
]
+
expr
del
stack
[
-
1
]
if
len
(
stack
)
==
0
:
raise
error
,
'too many close parens'
if
len
(
expr
)
==
0
:
raise
error
,
'nothing inside parens'
# check to see if alternation used correctly
if
(
expr
[
-
1
].
name
==
'|'
):
raise
error
,
'alternation with nothing on the right'
# remove markers left by alternation
expr
=
filter
(
lambda
x
:
x
.
name
!=
'|'
,
expr
)
# clean up jumps inserted by alternation
need_label
=
0
for
i
in
range
(
len
(
expr
)):
if
(
expr
[
i
].
name
==
'jump'
)
and
(
expr
[
i
].
label
==
-
1
):
expr
[
i
]
=
JumpOpcode
(
label
)
need_label
=
1
if
need_label
:
expr
.
append
(
Label
(
label
))
label
=
label
+
1
if
stack
[
-
1
][
0
][
1
]
>
0
:
expr
=
[
StartMemory
(
stack
[
-
1
][
0
].
register
)]
+
\
expr
+
\
[
EndMemory
(
stack
[
-
1
][
0
].
register
)]
del
stack
[
-
1
]
stack
.
append
(
expr
)
elif
char
==
'{'
:
if
len
(
stack
)
==
0
:
raise
error
,
'no expression to repeat'
end
=
string
.
find
(
pattern
,
'}'
,
index
)
if
end
==
-
1
:
raise
error
,
(
'no close curly bracket to match'
' open curly bracket'
)
fields
=
map
(
string
.
strip
,
string
.
split
(
pattern
[
index
:
end
],
','
))
index
=
end
+
1
minimal
=
0
if
(
index
<
len
(
pattern
))
and
(
pattern
[
index
]
==
'?'
):
minimal
=
1
index
=
index
+
1
if
len
(
fields
)
==
1
:
# {n} or {n}? (there's really no difference)
try
:
count
=
string
.
atoi
(
fields
[
0
])
except
ValueError
:
raise
error
,
(
'count must be an integer '
'inside curly braces'
)
if
count
>
65535
:
raise
error
,
'repeat count out of range'
expr
=
[]
while
count
>
0
:
expr
=
expr
+
stack
[
-
1
]
count
=
count
-
1
del
stack
[
-
1
]
stack
.
append
(
expr
)
elif
len
(
fields
)
==
2
:
# {n,} or {n,m}
if
fields
[
1
]
==
''
:
# {n,}
try
:
min
=
string
.
atoi
(
fields
[
0
])
except
ValueError
:
raise
error
,
(
'minimum must be an integer '
'inside curly braces'
)
if
min
>
65535
:
raise
error
,
'minimum repeat count out of range'
expr
=
[]
while
min
>
0
:
expr
=
expr
+
stack
[
-
1
]
min
=
min
-
1
registers
=
registers_used
(
stack
[
-
1
])
if
minimal
:
expr
=
expr
+
\
([
Jump
(
label
+
1
),
Label
(
label
)]
+
\
stack
[
-
1
]
+
\
[
Label
(
label
+
1
),
FailureJump
(
label
,
registers
)])
else
:
expr
=
expr
+
\
([
Label
(
label
),
FailureJump
(
label
+
1
,
registers
)]
+
stack
[
-
1
]
+
[
StarJump
(
label
),
Label
(
label
+
1
)])
del
stack
[
-
1
]
stack
.
append
(
expr
)
label
=
label
+
2
else
:
# {n,m}
try
:
min
=
string
.
atoi
(
fields
[
0
])
except
ValueError
:
raise
error
,
(
'minimum must be an integer '
'inside curly braces'
)
try
:
max
=
string
.
atoi
(
fields
[
1
])
except
ValueError
:
raise
error
,
(
'maximum must be an integer '
'inside curly braces'
)
if
min
>
65535
:
raise
error
,
(
'minumim repeat count out '
'of range'
)
if
max
>
65535
:
raise
error
,
(
'maximum repeat count out '
'of range'
)
if
min
>
max
:
raise
error
,
(
'minimum repeat count must be '
'less than the maximum '
'repeat count'
)
expr
=
[]
while
min
>
0
:
expr
=
expr
+
stack
[
-
1
]
min
=
min
-
1
max
=
max
-
1
if
minimal
:
while
max
>
0
:
expr
=
expr
+
\
[
FailureJump
(
label
),
Jump
(
label
+
1
),
Label
(
label
)]
+
\
stack
[
-
1
]
+
\
[
Label
(
label
+
1
)]
label
=
label
+
2
del
stack
[
-
1
]
stack
.
append
(
expr
)
else
:
while
max
>
0
:
expr
=
expr
+
\
[
FailureJump
(
label
)]
+
\
stack
[
-
1
]
max
=
max
-
1
del
stack
[
-
1
]
stack
.
append
(
expr
+
[
Label
(
label
)])
label
=
label
+
1
else
:
raise
error
,
(
'there need to be one or two fields '
'in a {} expression'
)
index
=
end
+
1
elif
char
==
'}'
:
raise
error
,
'unbalanced close curly brace'
elif
char
==
'*'
:
# Kleene closure
if
len
(
stack
)
==
0
:
raise
error
,
'the Kleene closure needs something to repeat'
registers
=
registers_used
(
stack
[
-
1
])
if
(
index
<
len
(
pattern
))
and
(
pattern
[
index
]
==
'?'
):
# non-greedy matching
expr
=
[
JumpInstructions
(
label
+
1
),
Label
(
label
)]
+
\
stack
[
-
1
]
+
\
[
Label
(
label
+
1
),
FailureJump
(
label
)]
index
=
index
+
1
else
:
# greedy matching
expr
=
[
Label
(
label
),
FailureJump
(
label
+
1
)]
+
\
stack
[
-
1
]
+
\
[
StarJump
(
label
),
Label
(
label
+
1
)]
del
stack
[
-
1
]
stack
.
append
(
expr
)
label
=
label
+
2
elif
char
==
'+'
:
# positive closure
if
len
(
stack
)
==
0
:
raise
error
,
'the positive closure needs something to repeat'
registers
=
registers_used
(
stack
[
-
1
])
if
(
index
<
len
(
pattern
))
and
(
pattern
[
index
]
==
'?'
):
# non-greedy
expr
=
[
Label
(
label
)]
+
\
stack
[
-
1
]
+
\
[
FailureJump
(
label
)]
label
=
label
+
1
index
=
index
+
1
else
:
# greedy
expr
=
[
DummyFailureJump
(
label
+
1
),
Label
(
label
),
FailureJump
(
label
+
2
),
Label
(
label
+
1
)]
+
\
stack
[
-
1
]
+
\
[
StarJump
(
label
),
Label
(
label
+
2
)]
label
=
label
+
3
del
stack
[
-
1
]
stack
.
append
(
expr
)
elif
char
==
'?'
:
if
len
(
stack
)
==
0
:
raise
error
,
'need something to be optional'
registers
=
registers_used
(
stack
[
-
1
])
if
(
index
<
len
(
pattern
))
and
(
pattern
[
index
]
==
'?'
):
# non-greedy matching
expr
=
[
FailureJump
(
label
),
Jump
(
label
+
1
),
Label
(
label
)]
+
\
stack
[
-
1
]
+
\
[
Label
(
label
+
1
)]
label
=
label
+
2
index
=
index
+
1
else
:
# greedy matching
expr
=
[
FailureJump
(
label
)]
+
\
stack
[
-
1
]
+
\
[
Label
(
label
)]
label
=
label
+
1
del
stack
[
-
1
]
stack
.
append
(
expr
)
elif
char
==
'.'
:
if
flags
&
DOTALL
:
stack
.
append
(
Set
(
map
(
chr
,
range
(
256
))))
else
:
stack
.
append
([
AnyChar
()])
elif
char
==
'^'
:
if
flags
&
MULTILINE
:
stack
.
append
([
Bol
()])
else
:
stack
.
append
([
BegBuf
()])
elif
char
==
'$'
:
if
flags
&
MULTILINE
:
stack
.
append
([
Eol
()])
else
:
stack
.
append
([
EndBuf
()])
elif
char
==
'#'
:
if
flags
&
VERBOSE
:
# comment
index
=
index
+
1
end
=
string
.
find
(
pattern
,
'
\
n
'
,
index
)
if
end
==
-
1
:
index
=
len
(
pattern
)
else
:
index
=
end
+
1
else
:
stack
.
append
([
Exact
(
char
)])
elif
char
in
string
.
whitespace
:
if
flags
&
VERBOSE
:
stack
.
append
([
Exact
(
char
)])
elif
char
==
'['
:
if
index
>=
len
(
pattern
):
raise
error
,
'incomplete set'
negate
=
0
last
=
''
set
=
[]
if
pattern
[
index
]
==
'^'
:
negate
=
1
index
=
index
+
1
if
index
>=
len
(
pattern
):
raise
error
,
'incomplete set'
if
pattern
[
index
]
in
']-'
:
set
.
append
(
pattern
[
index
])
last
=
pattern
[
index
]
index
=
index
+
1
while
(
index
<
len
(
pattern
))
and
(
pattern
[
index
]
!=
']'
):
next
=
pattern
[
index
]
index
=
index
+
1
if
next
==
'-'
:
if
(
index
>=
len
(
pattern
))
or
(
pattern
[
index
]
==
']'
):
raise
error
,
'incomplete range in set'
if
last
>
pattern
[
index
]:
raise
error
,
'range arguments out of order in set'
for
next
in
map
(
chr
,
\
range
(
ord
(
last
),
\
ord
(
pattern
[
index
])
+
1
)):
if
next
not
in
set
:
set
.
append
(
next
)
last
=
''
index
=
index
+
1
elif
next
==
'
\
\
'
:
# expand syntax meta-characters and add to set
if
index
>=
len
(
pattern
):
raise
error
,
'incomplete set'
elif
(
pattern
[
index
]
==
']'
):
raise
error
,
'backslash at the end of a set'
elif
pattern
[
index
]
==
'w'
:
for
next
in
syntax_table
.
keys
():
if
'word'
in
syntax_table
[
next
]:
set
.
append
(
next
)
elif
pattern
[
index
]
==
'W'
:
for
next
in
syntax_table
.
keys
():
if
'word'
not
in
syntax_table
[
next
]:
set
.
append
(
next
)
elif
pattern
[
index
]
==
'd'
:
for
next
in
syntax_table
.
keys
():
if
'digit'
in
syntax_table
[
next
]:
set
.
append
(
next
)
elif
pattern
[
index
]
==
'D'
:
for
next
in
syntax_table
.
keys
():
if
'digit'
not
in
syntax_table
[
next
]:
set
.
append
(
next
)
elif
pattern
[
index
]
==
's'
:
for
next
in
syntax_table
.
keys
():
if
'whitespace'
in
syntax_table
[
next
]:
set
.
append
(
next
)
elif
pattern
[
index
]
==
'S'
:
for
next
in
syntax_table
.
keys
():
if
'whitespace'
not
in
syntax_table
[
next
]:
set
.
append
(
next
)
else
:
raise
error
,
'unknown meta in set'
last
=
''
index
=
index
+
1
else
:
if
next
not
in
set
:
set
.
append
(
next
)
last
=
next
if
pattern
[
index
]
!=
']'
:
raise
error
,
'incomplete set'
index
=
index
+
1
if
negate
:
notset
=
[]
for
char
in
map
(
chr
,
range
(
256
)):
if
char
not
in
set
:
notset
.
append
(
char
)
stack
.
append
([
Set
(
notset
)])
else
:
stack
.
append
([
Set
(
set
)])
else
:
stack
.
append
([
Exact
(
char
)])
code
=
[]
while
len
(
stack
)
>
0
:
if
stack
[
-
1
][
0
].
name
==
'('
:
raise
error
,
'too many open parens'
code
=
stack
[
-
1
]
+
code
del
stack
[
-
1
]
if
len
(
code
)
==
0
:
raise
error
,
'no code generated'
if
(
code
[
-
1
].
name
==
'|'
):
raise
error
,
'alternation with nothing on the right'
code
=
filter
(
lambda
x
:
x
.
name
!=
'|'
,
code
)
need_label
=
0
for
i
in
range
(
len
(
code
)):
if
(
code
[
i
].
name
==
'jump'
)
and
(
code
[
i
].
label
==
-
1
):
code
[
i
]
=
Jump
(
label
)
need_label
=
1
if
need_label
:
code
.
append
(
Label
(
label
))
label
=
label
+
1
code
.
append
(
End
())
return
RegexObject
(
pattern
,
flags
,
code
,
register
,
groupindex
,
callouts
)
if
__name__
==
'__main__'
:
print
compile
(
'a(b)*'
)
print
compile
(
'a{3}'
)
print
compile
(
'(a){2}'
)
print
compile
(
'a{2,4}'
)
print
compile
(
'a|b'
)
print
compile
(
'a(b|c)'
)
print
compile
(
'a*'
)
print
compile
(
'a+'
)
print
compile
(
'a|b|c'
)
print
compile
(
'a(b|c)*'
)
print
compile
(
'
\
\
n'
)
print
compile
(
'a(?# huh huh)b'
)
print
compile
(
'[a-c
\
\
w]'
)
print
compile
(
'[[]'
)
print
compile
(
'[]]'
)
print
compile
(
'(<hello>a)'
)
print
compile
(
'
\
Q*
\
e'
)
print
compile
(
'a{0,}'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment