Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
grumpy
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
grumpy
Commits
2ea88051
Commit
2ea88051
authored
Jan 16, 2017
by
YOU
Committed by
Dylan Trotter
Jan 16, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix several bugs on re module (#122)
parent
08f9c2ed
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
781 additions
and
92 deletions
+781
-92
third_party/pypy/_sre.py
third_party/pypy/_sre.py
+2
-1
third_party/pypy/re.py
third_party/pypy/re.py
+8
-4
third_party/pypy/re_test.py
third_party/pypy/re_test.py
+689
-0
third_party/pypy/sre_compile.py
third_party/pypy/sre_compile.py
+61
-78
third_party/pypy/sre_constants.py
third_party/pypy/sre_constants.py
+3
-1
third_party/pypy/sre_parse.py
third_party/pypy/sre_parse.py
+18
-8
No files found.
third_party/pypy/_sre.py
View file @
2ea88051
...
...
@@ -45,7 +45,8 @@ def compile(pattern, flags, code, groups=0, groupindex={}, indexgroup=[None]):
def
getlower
(
char_ord
,
flags
):
if
(
char_ord
<
128
)
or
(
flags
&
SRE_FLAG_UNICODE
)
\
or
(
flags
&
SRE_FLAG_LOCALE
and
char_ord
<
256
):
return
ord
(
unichr
(
char_ord
).
lower
())
# return ord(unichr(char_ord).lower())
return
ord
(
chr
(
char_ord
).
lower
())
else
:
return
char_ord
...
...
third_party/pypy/re.py
View file @
2ea88051
...
...
@@ -194,8 +194,10 @@ def compile(pattern, flags=0):
def
purge
():
"Clear the regular expression cache"
_cache
.
clear
()
_cache_repl
.
clear
()
# _cache.clear()
# _cache_repl.clear()
globals
()[
'_cache'
]
=
{}
globals
()[
'_cache_repl'
]
=
{}
def
template
(
pattern
,
flags
=
0
):
"Compile a template pattern, returning a pattern object"
...
...
@@ -250,7 +252,8 @@ def _compile(*key):
raise
error
,
v
# invalid expression
if
not
bypass_cache
:
if
len
(
_cache
)
>=
_MAXCACHE
:
_cache
.
clear
()
# _cache.clear()
globals
()[
'_cache'
]
=
{}
if
p
.
flags
&
LOCALE
:
if
not
_locale
:
return
p
...
...
@@ -271,7 +274,8 @@ def _compile_repl(*key):
except
error
,
v
:
raise
error
,
v
# invalid expression
if
len
(
_cache_repl
)
>=
_MAXCACHE
:
_cache_repl
.
clear
()
# _cache_repl.clear()
globals
()[
'_cache_repl'
]
=
{}
_cache_repl
[
key
]
=
p
return
p
...
...
third_party/pypy/re_test.py
0 → 100755
View file @
2ea88051
This diff is collapsed.
Click to expand it.
third_party/pypy/sre_compile.py
View file @
2ea88051
...
...
@@ -11,7 +11,8 @@
"""Internal support module for sre"""
import
_sre
,
sys
import
sys
import
_sre
import
sre_parse
# TODO: Support from foo import * syntax.
...
...
@@ -262,11 +263,9 @@ def _compile_charset(charset, flags, code, fixup=None, fixes=None):
def
_optimize_charset
(
charset
,
fixup
,
fixes
,
isunicode
):
# internal: optimize character set
out
=
[]
outappend
=
out
.
append
tail
=
[]
# charmap = bytearray(256)
charmap
=
[
0
]
*
256
for
op
,
av
in
charset
:
while
True
:
try
:
...
...
@@ -319,85 +318,50 @@ def _optimize_charset(charset, fixup, fixes, isunicode):
break
# compress character map
i
=
p
=
n
=
0
runs
=
[]
runsappend
=
runs
.
append
for
c
in
charmap
:
if
c
:
if
n
==
0
:
p
=
i
n
=
n
+
1
elif
n
:
runsappend
((
p
,
n
))
n
=
0
i
=
i
+
1
if
n
:
runsappend
((
p
,
n
))
if
len
(
runs
)
<=
2
:
q
=
0
def
char_find
(
l
,
s
,
start
):
i
=
start
while
i
<
len
(
l
):
if
l
[
i
]
==
s
:
return
i
i
+=
1
return
-
1
while
True
:
# p = charmap.find(b'\1', q)
p
=
char_find
(
charmap
,
1
,
q
)
if
p
<
0
:
break
if
len
(
runs
)
>=
2
:
runs
=
None
break
# q = charmap.find(b'\0', p)
q
=
char_find
(
charmap
,
0
,
p
)
if
q
<
0
:
runs
.
append
((
p
,
len
(
charmap
)))
break
runs
.
append
((
p
,
q
))
if
runs
is
not
None
:
# use literal/range
for
p
,
n
in
runs
:
if
n
==
1
:
outappend
((
LITERAL
,
p
))
for
p
,
q
in
runs
:
if
q
-
p
==
1
:
out
.
append
((
LITERAL
,
p
))
else
:
outappend
((
RANGE
,
(
p
,
p
+
n
-
1
)))
if
len
(
out
)
<
len
(
charset
):
out
.
append
((
RANGE
,
(
p
,
q
-
1
)))
out
+=
tail
# if the case was changed or new representation is more compact
if
fixup
or
len
(
out
)
<
len
(
charset
):
return
out
else
:
# use bitmap
# else original character set is good enough
return
charset
# use bitmap
if
len
(
charmap
)
==
256
:
data
=
_mk_bitmap
(
charmap
)
outappend
((
CHARSET
,
data
))
out
.
append
((
CHARSET
,
data
))
out
+=
tail
return
out
return
charset
# runs = []
# q = 0
# while True:
# p = charmap.find(b'\1', q)
# if p < 0:
# break
# if len(runs) >= 2:
# runs = None
# break
# q = charmap.find(b'\0', p)
# if q < 0:
# runs.append((p, len(charmap)))
# break
# runs.append((p, q))
# if runs is not None:
# # use literal/range
# for p, q in runs:
# if q - p == 1:
# out.append((LITERAL, p))
# else:
# out.append((RANGE, (p, q - 1)))
# out += tail
# # if the case was changed or new representation is more compact
# if fixup or len(out) < len(charset):
# return out
# # else original character set is good enough
# return charset
# # use bitmap
# if len(charmap) == 256:
# data = _mk_bitmap(charmap)
# out.append((CHARSET, data))
# out += tail
# return out
def
_mk_bitmap
(
bits
):
data
=
[]
dataappend
=
data
.
append
if
_sre
.
CODESIZE
==
2
:
start
=
(
1
,
0
)
else
:
start
=
(
1
,
0
)
m
,
v
=
start
for
c
in
bits
:
if
c
:
v
=
v
+
m
m
=
m
+
m
if
m
>
MAXCODE
:
dataappend
(
v
)
m
,
v
=
start
return
data
# To represent a big charset, first a bitmap of all characters in the
# set is constructed. Then, this bitmap is sliced into chunks of 256
# characters, duplicate chunks are eliminated, and each chunk is
...
...
@@ -422,7 +386,8 @@ def _mk_bitmap(bits):
# of the basic multilingual plane; an efficient representation
# for all of Unicode has not yet been developed.
charmap
=
bytes
(
charmap
)
# should be hashable
# charmap = bytes(charmap) # should be hashable
charmap
=
str
(
charmap
)
# should be hashable
comps
=
{}
# mapping = bytearray(256)
mapping
=
[
0
]
*
256
...
...
@@ -465,8 +430,26 @@ _CODEBITS = _sre.CODESIZE * 8
_BITS_TRANS
=
b'0'
+
b'1'
*
255
# def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
# s = bytes(bits).translate(_BITS_TRANS)[::-1]
# r
eturn
[_int(s[i - _CODEBITS: i], 2)
# r
=
[_int(s[i - _CODEBITS: i], 2)
# for i in range(len(s), 0, -_CODEBITS)]
# return r
def
_mk_bitmap
(
bits
):
data
=
[]
dataappend
=
data
.
append
# if _sre.CODESIZE == 2:
# start = (1, 0)
# else:
# start = (1, 0)
start
=
(
1
,
0
)
m
,
v
=
start
for
c
in
bits
:
if
c
:
v
=
v
+
m
m
=
m
+
m
if
m
>
MAXCODE
:
dataappend
(
v
)
m
,
v
=
start
return
data
def
_bytes_to_codes
(
b
):
return
b
[:]
...
...
third_party/pypy/sre_constants.py
View file @
2ea88051
...
...
@@ -31,13 +31,15 @@ __all__ = [
'SRE_FLAG_LOCALE'
,
'SRE_FLAG_MULTILINE'
,
'SRE_FLAG_TEMPLATE'
,
'SRE_FLAG_UNICODE'
,
'SRE_FLAG_VERBOSE'
,
'SRE_INFO_CHARSET'
,
'SRE_INFO_LITERAL'
,
'SRE_INFO_PREFIX'
,
'SUBPATTERN'
,
'SUCCESS'
,
'SRE_FLAG_DEBUG'
,
'error'
'SRE_FLAG_DEBUG'
,
'
MAXCODE'
,
'
error'
]
# update when constants are added or removed
MAGIC
=
20031017
MAXCODE
=
65535
# try:
# from _sre import MAXREPEAT
# except ImportError:
...
...
third_party/pypy/sre_parse.py
View file @
2ea88051
...
...
@@ -143,7 +143,8 @@ class SubPattern(object):
def __len__(self):
return len(self.data)
def __delitem__(self, index):
del self.data[index]
# del self.data[index]
self.data = self.data[:index] + self.data[index+1:]
def __getitem__(self, index):
if isinstance(index, slice):
return SubPattern(self.pattern, self.data[index])
...
...
@@ -345,7 +346,7 @@ def _parse_sub(source, state, nested=1):
# check if all items share a common prefix
while 1:
prefix
= Non
e
prefix
, common = None, Fals
e
for item in items:
if not item:
break
...
...
@@ -356,10 +357,16 @@ def _parse_sub(source, state, nested=1):
else:
# all subitems start with a common "
prefix
".
# move it out of the branch
for item in items:
del item[0]
# for item in items:
# print "
del
", item[0], items
# del item[0]
for i in range(len(items)):
items[i] = items[i][1:]
subpatternappend(prefix)
continue # check next one
# continue # check next one
common = True
if common:
continue
break
# check if the branch can be replaced by a character set
...
...
@@ -589,7 +596,8 @@ def _parse(source, state):
"%r"
%
name
)
gid
=
state
.
groupdict
.
get
(
name
)
if
gid
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
# msg = "unknown group name: {0!r}".format(name)
msg
=
"unknown group name: %s"
%
(
name
)
raise
error
(
msg
)
# if state.lookbehind:
# import warnings
...
...
@@ -651,7 +659,8 @@ def _parse(source, state):
if
isname
(
condname
):
condgroup
=
state
.
groupdict
.
get
(
condname
)
if
condgroup
is
None
:
msg
=
"unknown group name: {0!r}"
.
format
(
condname
)
# msg = "unknown group name: {0!r}".format(condname)
msg
=
"unknown group name: %s"
%
(
condname
)
raise
error
(
msg
)
else
:
try
:
...
...
@@ -783,7 +792,8 @@ def parse_template(source, pattern):
try
:
index
=
pattern
.
groupindex
[
name
]
except
KeyError
:
msg
=
"unknown group name: {0!r}"
.
format
(
name
)
# msg = "unknown group name: {0!r}".format(name)
msg
=
"unknown group name: %s"
%
(
name
)
raise
IndexError
(
msg
)
a
((
MARK
,
index
))
elif
c
==
"0"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment