Commit 236654b8 authored by Jeremy Hylton's avatar Jeremy Hylton

Fix some string encoding issues with entity bodies in HTTP requests.

RFC 2616 says that iso-8859-1 is the default charset for HTTP entity
bodies, but we encoded strings using ascii.  See
http://bugs.python.org/issue5314.  Changed docs and code to use
iso-8859-1.

Also fix some brokenness with passing a file as the body instead of a
string.

Add tests to show that some of this behavior actually works.
parent 98eb6c28
...@@ -351,14 +351,22 @@ HTTPConnection Objects ...@@ -351,14 +351,22 @@ HTTPConnection Objects
.. method:: HTTPConnection.request(method, url[, body[, headers]]) .. method:: HTTPConnection.request(method, url[, body[, headers]])
This will send a request to the server using the HTTP request method *method* This will send a request to the server using the HTTP request
and the selector *url*. If the *body* argument is present, it should be a method *method* and the selector *url*. If the *body* argument is
string of data to send after the headers are finished. Alternatively, it may present, it should be string or bytes object of data to send after
be an open file object, in which case the contents of the file is sent; this the headers are finished. Strings are encoded as ISO-8859-1, the
file object should support ``fileno()`` and ``read()`` methods. The header default charset for HTTP. To use other encodings, pass a bytes
Content-Length is automatically set to the correct value. The *headers* object. The Content-Length header is set to the length of the
argument should be a mapping of extra HTTP headers to send with the request. string.
The *body* may also be an open file object, in which case the
contents of the file is sent; this file object should support
``fileno()`` and ``read()`` methods. The header Content-Length is
automatically set to the length of the file as reported by
stat.
The *headers* argument should be a mapping of extra HTTP
headers to send with the request.
.. method:: HTTPConnection.getresponse() .. method:: HTTPConnection.getresponse()
......
...@@ -243,7 +243,6 @@ def parse_headers(fp, _class=HTTPMessage): ...@@ -243,7 +243,6 @@ def parse_headers(fp, _class=HTTPMessage):
if line in (b'\r\n', b'\n', b''): if line in (b'\r\n', b'\n', b''):
break break
hstring = b''.join(headers).decode('iso-8859-1') hstring = b''.join(headers).decode('iso-8859-1')
return email.parser.Parser(_class=_class).parsestr(hstring) return email.parser.Parser(_class=_class).parsestr(hstring)
class HTTPResponse(io.RawIOBase): class HTTPResponse(io.RawIOBase):
...@@ -675,13 +674,22 @@ class HTTPConnection: ...@@ -675,13 +674,22 @@ class HTTPConnection:
if self.debuglevel > 0: if self.debuglevel > 0:
print("send:", repr(str)) print("send:", repr(str))
try: try:
blocksize=8192 blocksize = 8192
if hasattr(str,'read') : if hasattr(str, "read") :
if self.debuglevel > 0: print("sendIng a read()able") if self.debuglevel > 0:
data=str.read(blocksize) print("sendIng a read()able")
while data: encode = False
if "b" not in str.mode:
encode = True
if self.debuglevel > 0:
print("encoding file using iso-8859-1")
while 1:
data = str.read(blocksize)
if not data:
break
if encode:
data = data.encode("iso-8859-1")
self.sock.sendall(data) self.sock.sendall(data)
data=str.read(blocksize)
else: else:
self.sock.sendall(str) self.sock.sendall(str)
except socket.error as v: except socket.error as v:
...@@ -713,8 +721,8 @@ class HTTPConnection: ...@@ -713,8 +721,8 @@ class HTTPConnection:
message_body = None message_body = None
self.send(msg) self.send(msg)
if message_body is not None: if message_body is not None:
#message_body was not a string (i.e. it is a file) and # message_body was not a string (i.e. it is a file), and
#we must run the risk of Nagle # we must run the risk of Nagle.
self.send(message_body) self.send(message_body)
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
...@@ -904,7 +912,9 @@ class HTTPConnection: ...@@ -904,7 +912,9 @@ class HTTPConnection:
for hdr, value in headers.items(): for hdr, value in headers.items():
self.putheader(hdr, value) self.putheader(hdr, value)
if isinstance(body, str): if isinstance(body, str):
body = body.encode('ascii') # RFC 2616 Section 3.7.1 says that text default has a
# default charset of iso-8859-1.
body = body.encode('iso-8859-1')
self.endheaders(body) self.endheaders(body)
def getresponse(self): def getresponse(self):
......
...@@ -272,9 +272,80 @@ class HTTPSTimeoutTest(TestCase): ...@@ -272,9 +272,80 @@ class HTTPSTimeoutTest(TestCase):
h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30) h = httplib.HTTPSConnection(HOST, TimeoutTest.PORT, timeout=30)
self.assertEqual(h.timeout, 30) self.assertEqual(h.timeout, 30)
class RequestBodyTest(TestCase):
"""Test cases where a request includes a message body."""
def setUp(self):
self.conn = httplib.HTTPConnection('example.com')
self.sock = FakeSocket("")
self.conn.sock = self.sock
def get_headers_and_fp(self):
f = io.BytesIO(self.sock.data)
f.readline() # read the request line
message = httplib.parse_headers(f)
return message, f
def test_manual_content_length(self):
# Set an incorrect content-length so that we can verify that
# it will not be over-ridden by the library.
self.conn.request("PUT", "/url", "body",
{"Content-Length": "42"})
message, f = self.get_headers_and_fp()
self.assertEqual("42", message.get("content-length"))
self.assertEqual(4, len(f.read()))
def test_ascii_body(self):
self.conn.request("PUT", "/url", "body")
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("4", message.get("content-length"))
self.assertEqual(b'body', f.read())
def test_latin1_body(self):
self.conn.request("PUT", "/url", "body\xc1")
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("5", message.get("content-length"))
self.assertEqual(b'body\xc1', f.read())
def test_bytes_body(self):
self.conn.request("PUT", "/url", b"body\xc1")
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("5", message.get("content-length"))
self.assertEqual(b'body\xc1', f.read())
def test_file_body(self):
f = open(support.TESTFN, "w")
f.write("body")
f.close()
f = open(support.TESTFN)
self.conn.request("PUT", "/url", f)
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("4", message.get("content-length"))
self.assertEqual(b'body', f.read())
def test_binary_file_body(self):
f = open(support.TESTFN, "wb")
f.write(b"body\xc1")
f.close()
f = open(support.TESTFN, "rb")
self.conn.request("PUT", "/url", f)
message, f = self.get_headers_and_fp()
self.assertEqual("text/plain", message.get_content_type())
self.assertEqual(None, message.get_charset())
self.assertEqual("5", message.get("content-length"))
self.assertEqual(b'body\xc1', f.read())
def test_main(verbose=None): def test_main(verbose=None):
support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest, support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest,
HTTPSTimeoutTest) HTTPSTimeoutTest, RequestBodyTest)
if __name__ == '__main__': if __name__ == '__main__':
test_main() test_main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment