Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
ab140881
Commit
ab140881
authored
Nov 11, 2014
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Minor code clean up and improvements in the re module.
parent
9a64ccb9
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
13 additions
and
17 deletions
+13
-17
Lib/re.py
Lib/re.py
+1
-1
Lib/sre_compile.py
Lib/sre_compile.py
+6
-10
Lib/sre_parse.py
Lib/sre_parse.py
+4
-4
Lib/test/test_re.py
Lib/test/test_re.py
+2
-2
No files found.
Lib/re.py
View file @
ab140881
...
...
@@ -363,7 +363,7 @@ class Scanner:
append
=
result
.
append
match
=
self
.
scanner
.
scanner
(
string
).
match
i
=
0
while
1
:
while
True
:
m
=
match
()
if
not
m
:
break
...
...
Lib/sre_compile.py
View file @
ab140881
...
...
@@ -16,11 +16,6 @@ from sre_constants import *
assert
_sre
.
MAGIC
==
MAGIC
,
"SRE module mismatch"
if
_sre
.
CODESIZE
==
2
:
MAXCODE
=
65535
else
:
MAXCODE
=
0xFFFFFFFF
_LITERAL_CODES
=
{
LITERAL
,
NOT_LITERAL
}
_REPEATING_CODES
=
{
REPEAT
,
MIN_REPEAT
,
MAX_REPEAT
}
_SUCCESS_CODES
=
{
SUCCESS
,
FAILURE
}
...
...
@@ -191,7 +186,7 @@ def _compile(code, pattern, flags):
emit
(
JUMP
)
tailappend
(
_len
(
code
));
emit
(
0
)
code
[
skip
]
=
_len
(
code
)
-
skip
emit
(
0
)
# end of branch
emit
(
FAILURE
)
# end of branch
for
tail
in
tail
:
code
[
tail
]
=
_len
(
code
)
-
tail
elif
op
is
CATEGORY
:
...
...
@@ -374,6 +369,7 @@ def _optimize_charset(charset, fixup, fixes):
return
out
_CODEBITS
=
_sre
.
CODESIZE
*
8
MAXCODE
=
(
1
<<
_CODEBITS
)
-
1
_BITS_TRANS
=
b'0'
+
b'1'
*
255
def
_mk_bitmap
(
bits
,
_CODEBITS
=
_CODEBITS
,
_int
=
int
):
s
=
bits
.
translate
(
_BITS_TRANS
)[::
-
1
]
...
...
@@ -477,9 +473,9 @@ def _compile_info(code, pattern, flags):
elif
op
is
IN
:
charset
=
av
## if prefix:
## print
"*** PREFIX", prefix, prefix_skip
## print
("*** PREFIX", prefix, prefix_skip)
## if charset:
## print
"*** CHARSET", charset
## print
("*** CHARSET", charset)
# add an info block
emit
=
code
.
append
emit
(
INFO
)
...
...
@@ -489,9 +485,9 @@ def _compile_info(code, pattern, flags):
if
prefix
:
mask
=
SRE_INFO_PREFIX
if
len
(
prefix
)
==
prefix_skip
==
len
(
pattern
.
data
):
mask
=
mask
+
SRE_INFO_LITERAL
mask
=
mask
|
SRE_INFO_LITERAL
elif
charset
:
mask
=
mask
+
SRE_INFO_CHARSET
mask
=
mask
|
SRE_INFO_CHARSET
emit
(
mask
)
# pattern length
if
lo
<
MAXCODE
:
...
...
Lib/sre_parse.py
View file @
ab140881
...
...
@@ -103,18 +103,18 @@ class SubPattern:
seqtypes = (tuple, list)
for op, av in self.data:
print(level*"
" + str(op), end='')
if op
==
IN:
if op
is
IN:
# member sublanguage
print()
for op, a in av:
print((level+1)*"
" + str(op), a)
elif op
==
BRANCH:
elif op
is
BRANCH:
print()
for i, a in enumerate(av[1]):
if i:
print(level*"
" + "
OR
")
a.dump(level+1)
elif op
==
GROUPREF_EXISTS:
elif op
is
GROUPREF_EXISTS:
condgroup, item_yes, item_no = av
print('', condgroup)
item_yes.dump(level+1)
...
...
@@ -607,7 +607,7 @@ def _parse(source, state):
item
=
subpattern
[
-
1
:]
else
:
item
=
None
if
not
item
or
(
_len
(
item
)
==
1
and
item
[
0
][
0
]
==
AT
):
if
not
item
or
(
_len
(
item
)
==
1
and
item
[
0
][
0
]
is
AT
):
raise
source
.
error
(
"nothing to repeat"
,
source
.
tell
()
-
here
+
len
(
this
))
if
item
[
0
][
0
]
in
_REPEATCODES
:
...
...
Lib/test/test_re.py
View file @
ab140881
...
...
@@ -1101,8 +1101,8 @@ class ReTests(unittest.TestCase):
def test_inline_flags(self):
# Bug #1700
upper_char =
chr(0x1ea0) # Latin Capital Letter A with Dot Bel
low
lower_char =
chr(0x1ea1) # Latin Small Letter A with Dot Bel
low
upper_char =
'
\
u1ea0
' # Latin Capital Letter A with Dot Be
low
lower_char =
'
\
u1ea1
' # Latin Small Letter A with Dot Be
low
p = re.compile(upper_char, re.I | re.U)
q = p.match(lower_char)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment