Commit a112a8ae authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #22928: Disabled HTTP header injections in http.client.

Original patch by Demian Brecht.
parent c775ad61
...@@ -70,6 +70,7 @@ import email.parser ...@@ -70,6 +70,7 @@ import email.parser
import email.message import email.message
import io import io
import os import os
import re
import socket import socket
import collections import collections
from urllib.parse import urlsplit from urllib.parse import urlsplit
...@@ -217,6 +218,34 @@ MAXAMOUNT = 1048576 ...@@ -217,6 +218,34 @@ MAXAMOUNT = 1048576
_MAXLINE = 65536 _MAXLINE = 65536
_MAXHEADERS = 100 _MAXHEADERS = 100
# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
#
# VCHAR = %x21-7E
# obs-text = %x80-FF
# header-field = field-name ":" OWS field-value OWS
# field-name = token
# field-value = *( field-content / obs-fold )
# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
# field-vchar = VCHAR / obs-text
#
# obs-fold = CRLF 1*( SP / HTAB )
# ; obsolete line folding
# ; see Section 3.2.4
# token = 1*tchar
#
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
# / DIGIT / ALPHA
# ; any VCHAR, except delimiters
#
# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
# the patterns for both name and value are more leniant than RFC
# definitions to allow for backwards compatibility
_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
class HTTPMessage(email.message.Message): class HTTPMessage(email.message.Message):
# XXX The only usage of this method is in # XXX The only usage of this method is in
...@@ -1060,12 +1089,20 @@ class HTTPConnection: ...@@ -1060,12 +1089,20 @@ class HTTPConnection:
if hasattr(header, 'encode'): if hasattr(header, 'encode'):
header = header.encode('ascii') header = header.encode('ascii')
if not _is_legal_header_name(header):
raise ValueError('Invalid header name %r' % (header,))
values = list(values) values = list(values)
for i, one_value in enumerate(values): for i, one_value in enumerate(values):
if hasattr(one_value, 'encode'): if hasattr(one_value, 'encode'):
values[i] = one_value.encode('latin-1') values[i] = one_value.encode('latin-1')
elif isinstance(one_value, int): elif isinstance(one_value, int):
values[i] = str(one_value).encode('ascii') values[i] = str(one_value).encode('ascii')
if _is_illegal_header_value(values[i]):
raise ValueError('Invalid header value %r' % (values[i],))
value = b'\r\n\t'.join(values) value = b'\r\n\t'.join(values)
header = header + b': ' + value header = header + b': ' + value
self._output(header) self._output(header)
......
...@@ -148,6 +148,33 @@ class HeaderTests(TestCase): ...@@ -148,6 +148,33 @@ class HeaderTests(TestCase):
conn.putheader('Content-length', 42) conn.putheader('Content-length', 42)
self.assertIn(b'Content-length: 42', conn._buffer) self.assertIn(b'Content-length: 42', conn._buffer)
conn.putheader('Foo', ' bar ')
self.assertIn(b'Foo: bar ', conn._buffer)
conn.putheader('Bar', '\tbaz\t')
self.assertIn(b'Bar: \tbaz\t', conn._buffer)
conn.putheader('Authorization', 'Bearer mytoken')
self.assertIn(b'Authorization: Bearer mytoken', conn._buffer)
conn.putheader('IterHeader', 'IterA', 'IterB')
self.assertIn(b'IterHeader: IterA\r\n\tIterB', conn._buffer)
conn.putheader('LatinHeader', b'\xFF')
self.assertIn(b'LatinHeader: \xFF', conn._buffer)
conn.putheader('Utf8Header', b'\xc3\x80')
self.assertIn(b'Utf8Header: \xc3\x80', conn._buffer)
conn.putheader('C1-Control', b'next\x85line')
self.assertIn(b'C1-Control: next\x85line', conn._buffer)
conn.putheader('Embedded-Fold-Space', 'is\r\n allowed')
self.assertIn(b'Embedded-Fold-Space: is\r\n allowed', conn._buffer)
conn.putheader('Embedded-Fold-Tab', 'is\r\n\tallowed')
self.assertIn(b'Embedded-Fold-Tab: is\r\n\tallowed', conn._buffer)
conn.putheader('Key Space', 'value')
self.assertIn(b'Key Space: value', conn._buffer)
conn.putheader('KeySpace ', 'value')
self.assertIn(b'KeySpace : value', conn._buffer)
conn.putheader(b'Nonbreak\xa0Space', 'value')
self.assertIn(b'Nonbreak\xa0Space: value', conn._buffer)
conn.putheader(b'\xa0NonbreakSpace', 'value')
self.assertIn(b'\xa0NonbreakSpace: value', conn._buffer)
def test_ipv6host_header(self): def test_ipv6host_header(self):
# Default host header on IPv6 transaction should wrapped by [] if # Default host header on IPv6 transaction should wrapped by [] if
# its actual IPv6 address # its actual IPv6 address
...@@ -177,6 +204,36 @@ class HeaderTests(TestCase): ...@@ -177,6 +204,36 @@ class HeaderTests(TestCase):
self.assertEqual(resp.getheader('First'), 'val') self.assertEqual(resp.getheader('First'), 'val')
self.assertEqual(resp.getheader('Second'), 'val') self.assertEqual(resp.getheader('Second'), 'val')
def test_invalid_headers(self):
conn = client.HTTPConnection('example.com')
conn.sock = FakeSocket('')
conn.putrequest('GET', '/')
# http://tools.ietf.org/html/rfc7230#section-3.2.4, whitespace is no
# longer allowed in header names
cases = (
(b'Invalid\r\nName', b'ValidValue'),
(b'Invalid\rName', b'ValidValue'),
(b'Invalid\nName', b'ValidValue'),
(b'\r\nInvalidName', b'ValidValue'),
(b'\rInvalidName', b'ValidValue'),
(b'\nInvalidName', b'ValidValue'),
(b' InvalidName', b'ValidValue'),
(b'\tInvalidName', b'ValidValue'),
(b'Invalid:Name', b'ValidValue'),
(b':InvalidName', b'ValidValue'),
(b'ValidName', b'Invalid\r\nValue'),
(b'ValidName', b'Invalid\rValue'),
(b'ValidName', b'Invalid\nValue'),
(b'ValidName', b'InvalidValue\r\n'),
(b'ValidName', b'InvalidValue\r'),
(b'ValidName', b'InvalidValue\n'),
)
for name, value in cases:
with self.subTest((name, value)):
with self.assertRaisesRegex(ValueError, 'Invalid header'):
conn.putheader(name, value)
class BasicTest(TestCase): class BasicTest(TestCase):
def test_status_lines(self): def test_status_lines(self):
......
...@@ -18,6 +18,9 @@ Core and Builtins ...@@ -18,6 +18,9 @@ Core and Builtins
Library Library
------- -------
- Issue #22928: Disabled HTTP header injections in http.client.
Original patch by Demian Brecht.
- Issue #23615: Modules bz2, tarfile and tokenize now can be reloaded with - Issue #23615: Modules bz2, tarfile and tokenize now can be reloaded with
imp.reload(). Patch by Thomas Kluyver. imp.reload(). Patch by Thomas Kluyver.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment