Commit 41a08e55 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #22165: SimpleHTTPRequestHandler now supports undecodable file names.

parents f9e227e5 cb5bc408
...@@ -747,7 +747,12 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): ...@@ -747,7 +747,12 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
return None return None
list.sort(key=lambda a: a.lower()) list.sort(key=lambda a: a.lower())
r = [] r = []
displaypath = html.escape(urllib.parse.unquote(self.path)) try:
displaypath = urllib.parse.unquote(self.path,
errors='surrogatepass')
except UnicodeDecodeError:
displaypath = urllib.parse.unquote(path)
displaypath = html.escape(displaypath)
enc = sys.getfilesystemencoding() enc = sys.getfilesystemencoding()
title = 'Directory listing for %s' % displaypath title = 'Directory listing for %s' % displaypath
r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
...@@ -769,9 +774,11 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): ...@@ -769,9 +774,11 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
displayname = name + "@" displayname = name + "@"
# Note: a link to a directory displays with @ and links with / # Note: a link to a directory displays with @ and links with /
r.append('<li><a href="%s">%s</a></li>' r.append('<li><a href="%s">%s</a></li>'
% (urllib.parse.quote(linkname), html.escape(displayname))) % (urllib.parse.quote(linkname,
errors='surrogatepass'),
html.escape(displayname)))
r.append('</ul>\n<hr>\n</body>\n</html>\n') r.append('</ul>\n<hr>\n</body>\n</html>\n')
encoded = '\n'.join(r).encode(enc) encoded = '\n'.join(r).encode(enc, 'surrogateescape')
f = io.BytesIO() f = io.BytesIO()
f.write(encoded) f.write(encoded)
f.seek(0) f.seek(0)
...@@ -794,7 +801,11 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): ...@@ -794,7 +801,11 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
path = path.split('#',1)[0] path = path.split('#',1)[0]
# Don't forget explicit trailing slash when normalizing. Issue17324 # Don't forget explicit trailing slash when normalizing. Issue17324
trailing_slash = path.rstrip().endswith('/') trailing_slash = path.rstrip().endswith('/')
path = posixpath.normpath(urllib.parse.unquote(path)) try:
path = urllib.parse.unquote(path, errors='surrogatepass')
except UnicodeDecodeError:
path = urllib.parse.unquote(path)
path = posixpath.normpath(path)
words = path.split('/') words = path.split('/')
words = filter(None, words) words = filter(None, words)
path = os.getcwd() path = os.getcwd()
......
...@@ -14,6 +14,7 @@ import re ...@@ -14,6 +14,7 @@ import re
import base64 import base64
import shutil import shutil
import urllib.parse import urllib.parse
import html
import http.client import http.client
import tempfile import tempfile
from io import BytesIO from io import BytesIO
...@@ -266,6 +267,24 @@ class SimpleHTTPServerTestCase(BaseTestCase): ...@@ -266,6 +267,24 @@ class SimpleHTTPServerTestCase(BaseTestCase):
self.assertIsNotNone(response.reason) self.assertIsNotNone(response.reason)
if data: if data:
self.assertEqual(data, body) self.assertEqual(data, body)
return body
@unittest.skipUnless(support.TESTFN_UNDECODABLE,
'need support.TESTFN_UNDECODABLE')
def test_undecodable_filename(self):
filename = os.fsdecode(support.TESTFN_UNDECODABLE) + '.txt'
with open(os.path.join(self.tempdir, filename), 'wb') as f:
f.write(support.TESTFN_UNDECODABLE)
response = self.request(self.tempdir_name + '/')
body = self.check_status_and_reason(response, 200)
quotedname = urllib.parse.quote(filename, errors='surrogatepass')
self.assertIn(('href="%s"' % quotedname)
.encode('utf-8', 'surrogateescape'), body)
self.assertIn(('>%s<' % html.escape(filename))
.encode('utf-8', 'surrogateescape'), body)
response = self.request(self.tempdir_name + '/' + quotedname)
self.check_status_and_reason(response, 200,
data=support.TESTFN_UNDECODABLE)
def test_get(self): def test_get(self):
#constructs the path relative to the root directory of the HTTPServer #constructs the path relative to the root directory of the HTTPServer
......
...@@ -118,6 +118,8 @@ Core and Builtins ...@@ -118,6 +118,8 @@ Core and Builtins
Library Library
------- -------
- Issue #22165: SimpleHTTPRequestHandler now supports undecodable file names.
- Issue #15381: Optimized line reading in io.BytesIO. - Issue #15381: Optimized line reading in io.BytesIO.
- Issue #20729: Restored the use of lazy iterkeys()/itervalues()/iteritems() - Issue #20729: Restored the use of lazy iterkeys()/itervalues()/iteritems()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment