Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
dc1650ca
Commit
dc1650ca
authored
Sep 07, 2016
by
R David Murray
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
#22233: Only split headers on \r and/or \n, per email RFCs.
Original patch by Martin Panter, new policy fixes by me.
parent
6b46ec77
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
104 additions
and
19 deletions
+104
-19
Lib/email/feedparser.py
Lib/email/feedparser.py
+17
-16
Lib/email/policy.py
Lib/email/policy.py
+8
-1
Lib/test/test_email/test_email.py
Lib/test/test_email/test_email.py
+4
-2
Lib/test/test_email/test_parser.py
Lib/test/test_email/test_parser.py
+41
-0
Lib/test/test_httplib.py
Lib/test/test_httplib.py
+30
-0
Misc/NEWS
Misc/NEWS
+4
-0
No files found.
Lib/email/feedparser.py
View file @
dc1650ca
...
...
@@ -27,6 +27,7 @@ from email import errors
from
email
import
message
from
email._policybase
import
compat32
from
collections
import
deque
from
io
import
StringIO
NLCRE
=
re
.
compile
(
'
\
r
\
n
|
\
r
|
\
n
'
)
NLCRE_bol
=
re
.
compile
(
'(
\
r
\
n
|
\
r
|
\
n
)'
)
...
...
@@ -51,8 +52,9 @@ class BufferedSubFile(object):
simple abstraction -- it parses until EOF closes the current message.
"""
def __init__(self):
# Chunks of the last partial line pushed into this object.
self._partial = []
# Text stream of the last partial line pushed into this object.
# See issue 22233 for why this is a text stream and not a list.
self._partial = StringIO(newline='')
# A deque of full, pushed lines
self._lines = deque()
# The stack of false-EOF checking predicates.
...
...
@@ -68,8 +70,10 @@ class BufferedSubFile(object):
def close(self):
# Don'
t
forget
any
trailing
partial
line
.
self
.
pushlines
(
''
.
join
(
self
.
_partial
).
splitlines
(
True
))
self
.
_partial
=
[]
self
.
_partial
.
seek
(
0
)
self
.
pushlines
(
self
.
_partial
.
readlines
())
self
.
_partial
.
seek
(
0
)
self
.
_partial
.
truncate
()
self
.
_closed
=
True
def
readline
(
self
):
...
...
@@ -97,26 +101,23 @@ class BufferedSubFile(object):
def
push
(
self
,
data
):
"""Push some new data into this object."""
# Crack into lines, but preserve the linesep characters on the end of each
parts
=
data
.
splitlines
(
True
)
if
not
parts
or
not
parts
[
0
].
endswith
((
'
\
n
'
,
'
\
r
'
)):
# No new complete lines, so just accumulate partials
self
.
_partial
+=
parts
self
.
_partial
.
write
(
data
)
if
'
\
n
'
not
in
data
and
'
\
r
'
not
in
data
:
# No new complete lines, wait for more.
return
if
self
.
_partial
:
# If there are previous leftovers, complete them now
self
.
_partial
.
append
(
parts
[
0
]
)
parts
[
0
:
1
]
=
''
.
join
(
self
.
_partial
).
splitlines
(
True
)
del
self
.
_partial
[:]
# Crack into lines, preserving the linesep characters.
self
.
_partial
.
seek
(
0
)
parts
=
self
.
_partial
.
readlines
(
)
self
.
_partial
.
seek
(
0
)
self
.
_partial
.
truncate
()
# If the last element of the list does not end in a newline, then treat
# it as a partial line. We only check for '\n' here because a line
# ending with '\r' might be a line that was split in the middle of a
# '\r\n' sequence (see bugs 1555570 and 1721862).
if
not
parts
[
-
1
].
endswith
(
'
\
n
'
):
self
.
_partial
=
[
parts
.
pop
()]
self
.
_partial
.
write
(
parts
.
pop
())
self
.
pushlines
(
parts
)
def
pushlines
(
self
,
lines
):
...
...
Lib/email/policy.py
View file @
dc1650ca
...
...
@@ -2,6 +2,7 @@
code that adds all the email6 features.
"""
import
re
from
email._policybase
import
Policy
,
Compat32
,
compat32
,
_extend_docstrings
from
email.utils
import
_has_surrogates
from
email.headerregistry
import
HeaderRegistry
as
HeaderRegistry
...
...
@@ -18,6 +19,8 @@ __all__ = [
'HTTP'
,
]
linesep_splitter
=
re
.
compile
(
r'\n|\r'
)
@
_extend_docstrings
class
EmailPolicy
(
Policy
):
...
...
@@ -135,6 +138,8 @@ class EmailPolicy(Policy):
if
hasattr
(
value
,
'name'
)
and
value
.
name
.
lower
()
==
name
.
lower
():
return
(
name
,
value
)
if
isinstance
(
value
,
str
)
and
len
(
value
.
splitlines
())
>
1
:
# XXX this error message isn't quite right when we use splitlines
# (see issue 22233), but I'm not sure what should happen here.
raise
ValueError
(
"Header values may not contain linefeed "
"or carriage return characters"
)
return
(
name
,
self
.
header_factory
(
name
,
value
))
...
...
@@ -150,7 +155,9 @@ class EmailPolicy(Policy):
"""
if
hasattr
(
value
,
'name'
):
return
value
return
self
.
header_factory
(
name
,
''
.
join
(
value
.
splitlines
()))
# We can't use splitlines here because it splits on more than \r and \n.
value
=
''
.
join
(
linesep_splitter
.
split
(
value
))
return
self
.
header_factory
(
name
,
value
)
def
fold
(
self
,
name
,
value
):
"""+
...
...
Lib/test/test_email/test_email.py
View file @
dc1650ca
...
...
@@ -3444,10 +3444,12 @@ class TestFeedParsers(TestEmailBase):
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'b'
])
m
=
self
.
parse
([
'a:
\
r
'
,
'
\
n
b:
\
n
'
])
self
.
assertEqual
(
m
.
keys
(),
[
'a'
,
'b'
])
# Only CR and LF should break header fields
m
=
self
.
parse
([
'a:
\
x85
b:
\
u2028
c:
\
n
'
])
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
'
\
x85
'
),
(
'b'
,
'
\
u2028
'
),
(
'c'
,
'
'
)])
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
'
\
x85
b:
\
u2028
c:
'
)])
m
=
self
.
parse
([
'a:
\
r
'
,
'b:
\
x85
'
,
'c:
\
n
'
])
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
''
),
(
'b'
,
'
\
x85
'
),
(
'c'
,
'
'
)])
self
.
assertEqual
(
m
.
items
(),
[(
'a'
,
''
),
(
'b'
,
'
\
x85
c:
'
)])
def
test_long_lines
(
self
):
# Expected peak memory use on 32-bit platform: 6*N*M bytes.
...
...
Lib/test/test_email/test_parser.py
View file @
dc1650ca
...
...
@@ -2,6 +2,7 @@ import io
import
email
import
unittest
from
email.message
import
Message
from
email.policy
import
default
from
test.test_email
import
TestEmailBase
...
...
@@ -32,5 +33,45 @@ class TestCustomMessage(TestEmailBase):
# XXX add tests for other functions that take Message arg.
class
TestParserBase
:
def
test_only_split_on_cr_lf
(
self
):
# The unicode line splitter splits on unicode linebreaks, which are
# more numerous than allowed by the email RFCs; make sure we are only
# splitting on those two.
msg
=
self
.
parser
(
"Next-Line: not
\
x85
broken
\
r
\
n
"
"Null: not
\
x00
broken
\
r
\
n
"
"Vertical-Tab: not
\
v
broken
\
r
\
n
"
"Form-Feed: not
\
f
broken
\
r
\
n
"
"File-Separator: not
\
x1C
broken
\
r
\
n
"
"Group-Separator: not
\
x1D
broken
\
r
\
n
"
"Record-Separator: not
\
x1E
broken
\
r
\
n
"
"Line-Separator: not
\
u2028
broken
\
r
\
n
"
"Paragraph-Separator: not
\
u2029
broken
\
r
\
n
"
"
\
r
\
n
"
,
policy
=
default
,
)
self
.
assertEqual
(
msg
.
items
(),
[
(
"Next-Line"
,
"not
\
x85
broken"
),
(
"Null"
,
"not
\
x00
broken"
),
(
"Vertical-Tab"
,
"not
\
v
broken"
),
(
"Form-Feed"
,
"not
\
f
broken"
),
(
"File-Separator"
,
"not
\
x1C
broken"
),
(
"Group-Separator"
,
"not
\
x1D
broken"
),
(
"Record-Separator"
,
"not
\
x1E
broken"
),
(
"Line-Separator"
,
"not
\
u2028
broken"
),
(
"Paragraph-Separator"
,
"not
\
u2029
broken"
),
])
self
.
assertEqual
(
msg
.
get_payload
(),
""
)
class
TestParser
(
TestParserBase
,
TestEmailBase
):
parser
=
staticmethod
(
email
.
message_from_string
)
class
TestBytesParser
(
TestParserBase
,
TestEmailBase
):
def
parser
(
self
,
s
,
*
args
,
**
kw
):
return
email
.
message_from_bytes
(
s
.
encode
(),
*
args
,
**
kw
)
if
__name__
==
'__main__'
:
unittest
.
main
()
Lib/test/test_httplib.py
View file @
dc1650ca
...
...
@@ -283,6 +283,36 @@ class HeaderTests(TestCase):
self
.
assertEqual
(
resp
.
getheader
(
'First'
),
'val'
)
self
.
assertEqual
(
resp
.
getheader
(
'Second'
),
'val'
)
def
test_parse_all_octets
(
self
):
# Ensure no valid header field octet breaks the parser
body
=
(
b'HTTP/1.1 200 OK
\
r
\
n
'
b"!#$%&'*+-.^_`|~: value
\
r
\
n
"
# Special token characters
b'VCHAR: '
+
bytes
(
range
(
0x21
,
0x7E
+
1
))
+
b'
\
r
\
n
'
b'obs-text: '
+
bytes
(
range
(
0x80
,
0xFF
+
1
))
+
b'
\
r
\
n
'
b'obs-fold: text
\
r
\
n
'
b' folded with space
\
r
\
n
'
b'
\
t
folded with tab
\
r
\
n
'
b'Content-Length: 0
\
r
\
n
'
b'
\
r
\
n
'
)
sock
=
FakeSocket
(
body
)
resp
=
client
.
HTTPResponse
(
sock
)
resp
.
begin
()
self
.
assertEqual
(
resp
.
getheader
(
'Content-Length'
),
'0'
)
self
.
assertEqual
(
resp
.
msg
[
'Content-Length'
],
'0'
)
self
.
assertEqual
(
resp
.
getheader
(
"!#$%&'*+-.^_`|~"
),
'value'
)
self
.
assertEqual
(
resp
.
msg
[
"!#$%&'*+-.^_`|~"
],
'value'
)
vchar
=
''
.
join
(
map
(
chr
,
range
(
0x21
,
0x7E
+
1
)))
self
.
assertEqual
(
resp
.
getheader
(
'VCHAR'
),
vchar
)
self
.
assertEqual
(
resp
.
msg
[
'VCHAR'
],
vchar
)
self
.
assertIsNotNone
(
resp
.
getheader
(
'obs-text'
))
self
.
assertIn
(
'obs-text'
,
resp
.
msg
)
for
folded
in
(
resp
.
getheader
(
'obs-fold'
),
resp
.
msg
[
'obs-fold'
]):
self
.
assertTrue
(
folded
.
startswith
(
'text'
))
self
.
assertIn
(
' folded with space'
,
folded
)
self
.
assertTrue
(
folded
.
endswith
(
'folded with tab'
))
def
test_invalid_headers
(
self
):
conn
=
client
.
HTTPConnection
(
'example.com'
)
conn
.
sock
=
FakeSocket
(
''
)
...
...
Misc/NEWS
View file @
dc1650ca
...
...
@@ -62,6 +62,10 @@ Core and Builtins
Library
-------
-
Issue
#
22233
:
Break
email
header
lines
*
only
*
on
the
RFC
specified
CR
and
LF
characters
,
not
on
arbitrary
unicode
line
breaks
.
This
also
fixes
a
bug
in
HTTP
header
parsing
.
-
Issue
27988
:
Fix
email
iter_attachments
incorrect
mutation
of
payload
list
.
-
Issue
#
27691
:
Fix
ssl
module
's parsing of GEN_RID subject alternative name
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment