Commit 7c3e6848 authored by Lars Gustäbel's avatar Lars Gustäbel

Issue #24838: Merge tarfile fix from 3.5.

parents 1d048926 0f450abe
...@@ -815,11 +815,11 @@ class TarInfo(object): ...@@ -815,11 +815,11 @@ class TarInfo(object):
""" """
info["magic"] = POSIX_MAGIC info["magic"] = POSIX_MAGIC
if len(info["linkname"]) > LENGTH_LINK: if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
raise ValueError("linkname is too long") raise ValueError("linkname is too long")
if len(info["name"]) > LENGTH_NAME: if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
info["prefix"], info["name"] = self._posix_split_name(info["name"]) info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
return self._create_header(info, USTAR_FORMAT, encoding, errors) return self._create_header(info, USTAR_FORMAT, encoding, errors)
...@@ -829,10 +829,10 @@ class TarInfo(object): ...@@ -829,10 +829,10 @@ class TarInfo(object):
info["magic"] = GNU_MAGIC info["magic"] = GNU_MAGIC
buf = b"" buf = b""
if len(info["linkname"]) > LENGTH_LINK: if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
if len(info["name"]) > LENGTH_NAME: if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
return buf + self._create_header(info, GNU_FORMAT, encoding, errors) return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
...@@ -892,19 +892,20 @@ class TarInfo(object): ...@@ -892,19 +892,20 @@ class TarInfo(object):
""" """
return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
def _posix_split_name(self, name): def _posix_split_name(self, name, encoding, errors):
"""Split a name longer than 100 chars into a prefix """Split a name longer than 100 chars into a prefix
and a name part. and a name part.
""" """
prefix = name[:LENGTH_PREFIX + 1] components = name.split("/")
while prefix and prefix[-1] != "/": for i in range(1, len(components)):
prefix = prefix[:-1] prefix = "/".join(components[:i])
name = "/".join(components[i:])
name = name[len(prefix):] if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
prefix = prefix[:-1] len(name.encode(encoding, errors)) <= LENGTH_NAME:
break
if not prefix or len(name) > LENGTH_NAME: else:
raise ValueError("name is too long") raise ValueError("name is too long")
return prefix, name return prefix, name
@staticmethod @staticmethod
......
...@@ -1667,9 +1667,7 @@ class PaxWriteTest(GNUWriteTest): ...@@ -1667,9 +1667,7 @@ class PaxWriteTest(GNUWriteTest):
tar.close() tar.close()
class UstarUnicodeTest(unittest.TestCase): class UnicodeTest:
format = tarfile.USTAR_FORMAT
def test_iso8859_1_filename(self): def test_iso8859_1_filename(self):
self._test_unicode_filename("iso8859-1") self._test_unicode_filename("iso8859-1")
...@@ -1750,7 +1748,86 @@ class UstarUnicodeTest(unittest.TestCase): ...@@ -1750,7 +1748,86 @@ class UstarUnicodeTest(unittest.TestCase):
tar.close() tar.close()
class GNUUnicodeTest(UstarUnicodeTest): class UstarUnicodeTest(UnicodeTest, unittest.TestCase):
format = tarfile.USTAR_FORMAT
# Test whether the utf-8 encoded version of a filename exceeds the 100
# bytes name field limit (every occurrence of '\xff' will be expanded to 2
# bytes).
def test_unicode_name1(self):
self._test_ustar_name("0123456789" * 10)
self._test_ustar_name("0123456789" * 10 + "0", ValueError)
self._test_ustar_name("0123456789" * 9 + "01234567\xff")
self._test_ustar_name("0123456789" * 9 + "012345678\xff", ValueError)
def test_unicode_name2(self):
self._test_ustar_name("0123456789" * 9 + "012345\xff\xff")
self._test_ustar_name("0123456789" * 9 + "0123456\xff\xff", ValueError)
# Test whether the utf-8 encoded version of a filename exceeds the 155
# bytes prefix + '/' + 100 bytes name limit.
def test_unicode_longname1(self):
self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 10)
self._test_ustar_name("0123456789" * 15 + "0123/4" + "0123456789" * 10, ValueError)
self._test_ustar_name("0123456789" * 15 + "012\xff/" + "0123456789" * 10)
self._test_ustar_name("0123456789" * 15 + "0123\xff/" + "0123456789" * 10, ValueError)
def test_unicode_longname2(self):
self._test_ustar_name("0123456789" * 15 + "01\xff/2" + "0123456789" * 10, ValueError)
self._test_ustar_name("0123456789" * 15 + "01\xff\xff/" + "0123456789" * 10, ValueError)
def test_unicode_longname3(self):
self._test_ustar_name("0123456789" * 15 + "01\xff\xff/2" + "0123456789" * 10, ValueError)
self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "01234567\xff")
self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "012345678\xff", ValueError)
def test_unicode_longname4(self):
self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "012345\xff\xff")
self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "0123456\xff\xff", ValueError)
def _test_ustar_name(self, name, exc=None):
with tarfile.open(tmpname, "w", format=self.format, encoding="utf-8") as tar:
t = tarfile.TarInfo(name)
if exc is None:
tar.addfile(t)
else:
self.assertRaises(exc, tar.addfile, t)
if exc is None:
with tarfile.open(tmpname, "r") as tar:
for t in tar:
self.assertEqual(name, t.name)
break
# Test the same as above for the 100 bytes link field.
def test_unicode_link1(self):
self._test_ustar_link("0123456789" * 10)
self._test_ustar_link("0123456789" * 10 + "0", ValueError)
self._test_ustar_link("0123456789" * 9 + "01234567\xff")
self._test_ustar_link("0123456789" * 9 + "012345678\xff", ValueError)
def test_unicode_link2(self):
self._test_ustar_link("0123456789" * 9 + "012345\xff\xff")
self._test_ustar_link("0123456789" * 9 + "0123456\xff\xff", ValueError)
def _test_ustar_link(self, name, exc=None):
with tarfile.open(tmpname, "w", format=self.format, encoding="utf-8") as tar:
t = tarfile.TarInfo("foo")
t.linkname = name
if exc is None:
tar.addfile(t)
else:
self.assertRaises(exc, tar.addfile, t)
if exc is None:
with tarfile.open(tmpname, "r") as tar:
for t in tar:
self.assertEqual(name, t.linkname)
break
class GNUUnicodeTest(UnicodeTest, unittest.TestCase):
format = tarfile.GNU_FORMAT format = tarfile.GNU_FORMAT
...@@ -1768,7 +1845,7 @@ class GNUUnicodeTest(UstarUnicodeTest): ...@@ -1768,7 +1845,7 @@ class GNUUnicodeTest(UstarUnicodeTest):
self.fail("unable to read bad GNU tar pax header") self.fail("unable to read bad GNU tar pax header")
class PAXUnicodeTest(UstarUnicodeTest): class PAXUnicodeTest(UnicodeTest, unittest.TestCase):
format = tarfile.PAX_FORMAT format = tarfile.PAX_FORMAT
......
...@@ -245,6 +245,9 @@ Core and Builtins ...@@ -245,6 +245,9 @@ Core and Builtins
Library Library
------- -------
- Issue #24838: tarfile's ustar and gnu formats now correctly calculate name
and link field limits for multibyte character encodings like utf-8.
- Issue #26657: Fix directory traversal vulnerability with http.server on - Issue #26657: Fix directory traversal vulnerability with http.server on
Windows. This fixes a regression that was introduced in 3.3.4rc1 and Windows. This fixes a regression that was introduced in 3.3.4rc1 and
3.4.0rc1. Based on patch by Philipp Hagemeister. 3.4.0rc1. Based on patch by Philipp Hagemeister.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment