Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
320a1c0f
Commit
320a1c0f
authored
Aug 12, 2014
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #21448: Fixed FeedParser feed() to avoid O(N**2) behavior when parsing long line.
Original patch by Raymond Hettinger.
parent
6f201707
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
80 additions
and
12 deletions
+80
-12
Lib/email/feedparser.py
Lib/email/feedparser.py
+18
-8
Lib/test/test_email/test_email.py
Lib/test/test_email/test_email.py
+59
-4
Misc/NEWS
Misc/NEWS
+3
-0
No files found.
Lib/email/feedparser.py
View file @
320a1c0f
...
@@ -50,8 +50,8 @@ class BufferedSubFile(object):
...
@@ -50,8 +50,8 @@ class BufferedSubFile(object):
simple abstraction -- it parses until EOF closes the current message.
simple abstraction -- it parses until EOF closes the current message.
"""
"""
def __init__(self):
def __init__(self):
#
T
he last partial line pushed into this object.
#
Chunks of t
he last partial line pushed into this object.
self._partial =
''
self._partial =
[]
# The list of full, pushed lines, in reverse order
# The list of full, pushed lines, in reverse order
self._lines = []
self._lines = []
# The stack of false-EOF checking predicates.
# The stack of false-EOF checking predicates.
...
@@ -67,8 +67,8 @@ class BufferedSubFile(object):
...
@@ -67,8 +67,8 @@ class BufferedSubFile(object):
def close(self):
def close(self):
# Don'
t
forget
any
trailing
partial
line
.
# Don'
t
forget
any
trailing
partial
line
.
self
.
_lines
.
append
(
self
.
_partial
)
self
.
pushlines
(
''
.
join
(
self
.
_partial
).
splitlines
(
True
)
)
self
.
_partial
=
''
self
.
_partial
=
[]
self
.
_closed
=
True
self
.
_closed
=
True
def
readline
(
self
):
def
readline
(
self
):
...
@@ -96,16 +96,26 @@ class BufferedSubFile(object):
...
@@ -96,16 +96,26 @@ class BufferedSubFile(object):
def
push
(
self
,
data
):
def
push
(
self
,
data
):
"""Push some new data into this object."""
"""Push some new data into this object."""
# Handle any previous leftovers
data
,
self
.
_partial
=
self
.
_partial
+
data
,
''
# Crack into lines, but preserve the linesep characters on the end of each
# Crack into lines, but preserve the linesep characters on the end of each
parts
=
data
.
splitlines
(
True
)
parts
=
data
.
splitlines
(
True
)
if
not
parts
or
not
parts
[
0
].
endswith
((
'
\
n
'
,
'
\
r
'
)):
# No new complete lines, so just accumulate partials
self
.
_partial
+=
parts
return
if
self
.
_partial
:
# If there are previous leftovers, complete them now
self
.
_partial
.
append
(
parts
[
0
])
parts
[
0
:
1
]
=
''
.
join
(
self
.
_partial
).
splitlines
(
True
)
del
self
.
_partial
[:]
# If the last element of the list does not end in a newline, then treat
# If the last element of the list does not end in a newline, then treat
# it as a partial line. We only check for '\n' here because a line
# it as a partial line. We only check for '\n' here because a line
# ending with '\r' might be a line that was split in the middle of a
# ending with '\r' might be a line that was split in the middle of a
# '\r\n' sequence (see bugs 1555570 and 1721862).
# '\r\n' sequence (see bugs 1555570 and 1721862).
if
parts
and
not
parts
[
-
1
].
endswith
(
'
\
n
'
):
if
not
parts
[
-
1
].
endswith
(
'
\
n
'
):
self
.
_partial
=
parts
.
pop
()
self
.
_partial
=
[
parts
.
pop
()]
self
.
pushlines
(
parts
)
self
.
pushlines
(
parts
)
def
pushlines
(
self
,
lines
):
def
pushlines
(
self
,
lines
):
...
...
Lib/test/test_email/test_email.py
View file @
320a1c0f
...
@@ -10,6 +10,7 @@ import textwrap
...
@@ -10,6 +10,7 @@ import textwrap
from
io
import
StringIO
,
BytesIO
from
io
import
StringIO
,
BytesIO
from
itertools
import
chain
from
itertools
import
chain
from
random
import
choice
import
email
import
email
import
email.policy
import
email.policy
...
@@ -3353,16 +3354,70 @@ Do you like this message?
...
@@ -3353,16 +3354,70 @@ Do you like this message?
bsf
.
push
(
il
)
bsf
.
push
(
il
)
nt
+=
n
nt
+=
n
n1
=
0
n1
=
0
while
True
:
for
ol
in
iter
(
bsf
.
readline
,
NeedMoreData
):
ol
=
bsf
.
readline
()
if
ol
==
NeedMoreData
:
break
om
.
append
(
ol
)
om
.
append
(
ol
)
n1
+=
1
n1
+=
1
self
.
assertEqual
(
n
,
n1
)
self
.
assertEqual
(
n
,
n1
)
self
.
assertEqual
(
len
(
om
),
nt
)
self
.
assertEqual
(
len
(
om
),
nt
)
self
.
assertEqual
(
''
.
join
([
il
for
il
,
n
in
imt
]),
''
.
join
(
om
))
self
.
assertEqual
(
''
.
join
([
il
for
il
,
n
in
imt
]),
''
.
join
(
om
))
def
test_push_random
(
self
):
from
email.feedparser
import
BufferedSubFile
,
NeedMoreData
n
=
10000
chunksize
=
5
chars
=
'abcd
\
t
\
r
\
n
'
s
=
''
.
join
(
choice
(
chars
)
for
i
in
range
(
n
))
+
'
\
n
'
target
=
s
.
splitlines
(
True
)
bsf
=
BufferedSubFile
()
lines
=
[]
for
i
in
range
(
0
,
len
(
s
),
chunksize
):
chunk
=
s
[
i
:
i
+
chunksize
]
bsf
.
push
(
chunk
)
lines
.
extend
(
iter
(
bsf
.
readline
,
NeedMoreData
))
self
.
assertEqual
(
lines
,
target
)
class
TestFeedParsers
(
TestEmailBase
):
def
parse
(
self
,
chunks
):
from
email.feedparser
import
FeedParser
feedparser
=
FeedParser
()
for
chunk
in
chunks
:
feedparser
.
feed
(
chunk
)
return
feedparser
.
close
()
def
test_newlines
(
self
):
m
=
self
.
parse
([
'a:
\
n
b:
\
r
c:
\
r
\
n
d:
\
n
'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'b'
,
'c'
,
'd'
])
m
=
self
.
parse
([
'a:
\
n
b:
\
r
c:
\
r
\
n
d:'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'b'
,
'c'
,
'd'
])
m
=
self
.
parse
([
'a:
\
r
b'
,
'c:
\
n
'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'bc'
])
m
=
self
.
parse
([
'a:
\
r
'
,
'b:
\
n
'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'b'
])
m
=
self
.
parse
([
'a:
\
r
'
,
'
\
n
b:
\
n
'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'b'
])
m
=
self
.
parse
([
'a:
\
x85
b:
\
u2028
c:
\
n
'
])
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
'
\
x85
'
),
(
'b'
,
'
\
u2028
'
),
(
'c'
,
''
)])
m
=
self
.
parse
([
'a:
\
r
'
,
'b:
\
x85
'
,
'c:
\
n
'
])
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
''
),
(
'b'
,
'
\
x85
'
),
(
'c'
,
''
)])
def
test_long_lines
(
self
):
M
,
N
=
1000
,
100000
m
=
self
.
parse
([
'a:b
\
n
\
n
'
]
+
[
'x'
*
M
]
*
N
)
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
'b'
)])
self
.
assertEqual
(
m
.
get_payload
(),
'x'
*
M
*
N
)
m
=
self
.
parse
([
'a:b
\
r
\
r
'
]
+
[
'x'
*
M
]
*
N
)
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
'b'
)])
self
.
assertEqual
(
m
.
get_payload
(),
'x'
*
M
*
N
)
m
=
self
.
parse
([
'a:b
\
r
\
r
'
]
+
[
'x'
*
M
+
'
\
x85
'
]
*
N
)
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
'b'
)])
self
.
assertEqual
(
m
.
get_payload
(),
(
'x'
*
M
+
'
\
x85
'
)
*
N
)
m
=
self
.
parse
([
'a:
\
r
'
,
'b: '
]
+
[
'x'
*
M
]
*
N
)
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
''
),
(
'b'
,
'x'
*
M
*
N
)])
class
TestParsers
(
TestEmailBase
):
class
TestParsers
(
TestEmailBase
):
...
...
Misc/NEWS
View file @
320a1c0f
...
@@ -27,6 +27,9 @@ Core and Builtins
...
@@ -27,6 +27,9 @@ Core and Builtins
Library
Library
-------
-------
- Issue #21448: Changed FeedParser feed() to avoid O(N**2) behavior when
parsing long line. Original patch by Raymond Hettinger.
- Issue #17923: glob() patterns ending with a slash no longer match non-dirs on
- Issue #17923: glob() patterns ending with a slash no longer match non-dirs on
AIX. Based on patch by Delhallt.
AIX. Based on patch by Delhallt.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment