Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
1e133cab
Commit
1e133cab
authored
Aug 12, 2014
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Plain Diff
Issue #21448: Fixed FeedParser feed() to avoid O(N**2) behavior when parsing long line.
Original patch by Raymond Hettinger.
parents
2a140fb5
320a1c0f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
80 additions
and
12 deletions
+80
-12
Lib/email/feedparser.py
Lib/email/feedparser.py
+18
-8
Lib/test/test_email/test_email.py
Lib/test/test_email/test_email.py
+59
-4
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/email/feedparser.py
View file @
1e133cab
...
...
@@ -50,8 +50,8 @@ class BufferedSubFile(object):
simple abstraction -- it parses until EOF closes the current message.
"""
def __init__(self):
#
T
he last partial line pushed into this object.
self._partial =
''
#
Chunks of t
he last partial line pushed into this object.
self._partial =
[]
# The list of full, pushed lines, in reverse order
self._lines = []
# The stack of false-EOF checking predicates.
...
...
@@ -67,8 +67,8 @@ class BufferedSubFile(object):
def close(self):
# Don'
t
forget
any
trailing
partial
line
.
self
.
_lines
.
append
(
self
.
_partial
)
self
.
_partial
=
''
self
.
pushlines
(
''
.
join
(
self
.
_partial
).
splitlines
(
True
)
)
self
.
_partial
=
[]
self
.
_closed
=
True
def
readline
(
self
):
...
...
@@ -96,16 +96,26 @@ class BufferedSubFile(object):
def
push
(
self
,
data
):
"""Push some new data into this object."""
# Handle any previous leftovers
data
,
self
.
_partial
=
self
.
_partial
+
data
,
''
# Crack into lines, but preserve the linesep characters on the end of each
parts
=
data
.
splitlines
(
True
)
if
not
parts
or
not
parts
[
0
].
endswith
((
'
\
n
'
,
'
\
r
'
)):
# No new complete lines, so just accumulate partials
self
.
_partial
+=
parts
return
if
self
.
_partial
:
# If there are previous leftovers, complete them now
self
.
_partial
.
append
(
parts
[
0
])
parts
[
0
:
1
]
=
''
.
join
(
self
.
_partial
).
splitlines
(
True
)
del
self
.
_partial
[:]
# If the last element of the list does not end in a newline, then treat
# it as a partial line. We only check for '\n' here because a line
# ending with '\r' might be a line that was split in the middle of a
# '\r\n' sequence (see bugs 1555570 and 1721862).
if
parts
and
not
parts
[
-
1
].
endswith
(
'
\
n
'
):
self
.
_partial
=
parts
.
pop
()
if
not
parts
[
-
1
].
endswith
(
'
\
n
'
):
self
.
_partial
=
[
parts
.
pop
()]
self
.
pushlines
(
parts
)
def
pushlines
(
self
,
lines
):
...
...
Lib/test/test_email/test_email.py
View file @
1e133cab
...
...
@@ -10,6 +10,7 @@ import textwrap
from
io
import
StringIO
,
BytesIO
from
itertools
import
chain
from
random
import
choice
import
email
import
email.policy
...
...
@@ -3353,16 +3354,70 @@ Do you like this message?
bsf
.
push
(
il
)
nt
+=
n
n1
=
0
while
True
:
ol
=
bsf
.
readline
()
if
ol
==
NeedMoreData
:
break
for
ol
in
iter
(
bsf
.
readline
,
NeedMoreData
):
om
.
append
(
ol
)
n1
+=
1
self
.
assertEqual
(
n
,
n1
)
self
.
assertEqual
(
len
(
om
),
nt
)
self
.
assertEqual
(
''
.
join
([
il
for
il
,
n
in
imt
]),
''
.
join
(
om
))
def
test_push_random
(
self
):
from
email.feedparser
import
BufferedSubFile
,
NeedMoreData
n
=
10000
chunksize
=
5
chars
=
'abcd
\
t
\
r
\
n
'
s
=
''
.
join
(
choice
(
chars
)
for
i
in
range
(
n
))
+
'
\
n
'
target
=
s
.
splitlines
(
True
)
bsf
=
BufferedSubFile
()
lines
=
[]
for
i
in
range
(
0
,
len
(
s
),
chunksize
):
chunk
=
s
[
i
:
i
+
chunksize
]
bsf
.
push
(
chunk
)
lines
.
extend
(
iter
(
bsf
.
readline
,
NeedMoreData
))
self
.
assertEqual
(
lines
,
target
)
class
TestFeedParsers
(
TestEmailBase
):
def
parse
(
self
,
chunks
):
from
email.feedparser
import
FeedParser
feedparser
=
FeedParser
()
for
chunk
in
chunks
:
feedparser
.
feed
(
chunk
)
return
feedparser
.
close
()
def
test_newlines
(
self
):
m
=
self
.
parse
([
'a:
\
n
b:
\
r
c:
\
r
\
n
d:
\
n
'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'b'
,
'c'
,
'd'
])
m
=
self
.
parse
([
'a:
\
n
b:
\
r
c:
\
r
\
n
d:'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'b'
,
'c'
,
'd'
])
m
=
self
.
parse
([
'a:
\
r
b'
,
'c:
\
n
'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'bc'
])
m
=
self
.
parse
([
'a:
\
r
'
,
'b:
\
n
'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'b'
])
m
=
self
.
parse
([
'a:
\
r
'
,
'
\
n
b:
\
n
'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'b'
])
m
=
self
.
parse
([
'a:
\
x85
b:
\
u2028
c:
\
n
'
])
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
'
\
x85
'
),
(
'b'
,
'
\
u2028
'
),
(
'c'
,
''
)])
m
=
self
.
parse
([
'a:
\
r
'
,
'b:
\
x85
'
,
'c:
\
n
'
])
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
''
),
(
'b'
,
'
\
x85
'
),
(
'c'
,
''
)])
def
test_long_lines
(
self
):
M
,
N
=
1000
,
100000
m
=
self
.
parse
([
'a:b
\
n
\
n
'
]
+
[
'x'
*
M
]
*
N
)
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
'b'
)])
self
.
assertEqual
(
m
.
get_payload
(),
'x'
*
M
*
N
)
m
=
self
.
parse
([
'a:b
\
r
\
r
'
]
+
[
'x'
*
M
]
*
N
)
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
'b'
)])
self
.
assertEqual
(
m
.
get_payload
(),
'x'
*
M
*
N
)
m
=
self
.
parse
([
'a:b
\
r
\
r
'
]
+
[
'x'
*
M
+
'
\
x85
'
]
*
N
)
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
'b'
)])
self
.
assertEqual
(
m
.
get_payload
(),
(
'x'
*
M
+
'
\
x85
'
)
*
N
)
m
=
self
.
parse
([
'a:
\
r
'
,
'b: '
]
+
[
'x'
*
M
]
*
N
)
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
''
),
(
'b'
,
'x'
*
M
*
N
)])
class
TestParsers
(
TestEmailBase
):
...
...
Misc/NEWS
View file @
1e133cab
...
...
@@ -115,6 +115,9 @@ Core and Builtins
Library
-------
-
Issue
#
21448
:
Changed
FeedParser
feed
()
to
avoid
O
(
N
**
2
)
behavior
when
parsing
long
line
.
Original
patch
by
Raymond
Hettinger
.
-
Issue
#
17923
:
glob
()
patterns
ending
with
a
slash
no
longer
match
non
-
dirs
on
AIX
.
Based
on
patch
by
Delhallt
.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment