Commit 9fc720e5 authored by David K. Hess's avatar David K. Hess Committed by Steve Dower

bpo-4963: Fix for initialization and non-deterministic behavior issues in mimetypes (GH-3062)

parent 8bd2872a
...@@ -93,6 +93,10 @@ behavior of the module. ...@@ -93,6 +93,10 @@ behavior of the module.
Specifying an empty list for *files* will prevent the system defaults from Specifying an empty list for *files* will prevent the system defaults from
being applied: only the well-known values will be present from a built-in list. being applied: only the well-known values will be present from a built-in list.
If *files* is ``None`` the internal data structure is completely rebuilt to its
initial default value. This is a stable operation and will produce the same results
when called multiple times.
.. versionchanged:: 3.2 .. versionchanged:: 3.2
Previously, Windows registry settings were ignored. Previously, Windows registry settings were ignored.
......
...@@ -66,13 +66,13 @@ class MimeTypes: ...@@ -66,13 +66,13 @@ class MimeTypes:
def __init__(self, filenames=(), strict=True): def __init__(self, filenames=(), strict=True):
if not inited: if not inited:
init() init()
self.encodings_map = encodings_map.copy() self.encodings_map = _encodings_map_default.copy()
self.suffix_map = suffix_map.copy() self.suffix_map = _suffix_map_default.copy()
self.types_map = ({}, {}) # dict for (non-strict, strict) self.types_map = ({}, {}) # dict for (non-strict, strict)
self.types_map_inv = ({}, {}) self.types_map_inv = ({}, {})
for (ext, type) in types_map.items(): for (ext, type) in _types_map_default.items():
self.add_type(type, ext, True) self.add_type(type, ext, True)
for (ext, type) in common_types.items(): for (ext, type) in _common_types_default.items():
self.add_type(type, ext, False) self.add_type(type, ext, False)
for name in filenames: for name in filenames:
self.read(name, strict) self.read(name, strict)
...@@ -346,11 +346,19 @@ def init(files=None): ...@@ -346,11 +346,19 @@ def init(files=None):
global suffix_map, types_map, encodings_map, common_types global suffix_map, types_map, encodings_map, common_types
global inited, _db global inited, _db
inited = True # so that MimeTypes.__init__() doesn't call us again inited = True # so that MimeTypes.__init__() doesn't call us again
db = MimeTypes()
if files is None: if files is None or _db is None:
db = MimeTypes()
if _winreg: if _winreg:
db.read_windows_registry() db.read_windows_registry()
files = knownfiles
if files is None:
files = knownfiles
else:
files = knownfiles + list(files)
else:
db = _db
for file in files: for file in files:
if os.path.isfile(file): if os.path.isfile(file):
db.read(file) db.read(file)
...@@ -374,12 +382,12 @@ def read_mime_types(file): ...@@ -374,12 +382,12 @@ def read_mime_types(file):
def _default_mime_types(): def _default_mime_types():
global suffix_map global suffix_map, _suffix_map_default
global encodings_map global encodings_map, _encodings_map_default
global types_map global types_map, _types_map_default
global common_types global common_types, _common_types_default
suffix_map = { suffix_map = _suffix_map_default = {
'.svgz': '.svg.gz', '.svgz': '.svg.gz',
'.tgz': '.tar.gz', '.tgz': '.tar.gz',
'.taz': '.tar.gz', '.taz': '.tar.gz',
...@@ -388,7 +396,7 @@ def _default_mime_types(): ...@@ -388,7 +396,7 @@ def _default_mime_types():
'.txz': '.tar.xz', '.txz': '.tar.xz',
} }
encodings_map = { encodings_map = _encodings_map_default = {
'.gz': 'gzip', '.gz': 'gzip',
'.Z': 'compress', '.Z': 'compress',
'.bz2': 'bzip2', '.bz2': 'bzip2',
...@@ -399,152 +407,155 @@ def _default_mime_types(): ...@@ -399,152 +407,155 @@ def _default_mime_types():
# at http://www.iana.org/assignments/media-types # at http://www.iana.org/assignments/media-types
# or extensions, i.e. using the x- prefix # or extensions, i.e. using the x- prefix
# If you add to these, please keep them sorted! # If you add to these, please keep them sorted by mime type.
types_map = { # Make sure the entry with the preferred file extension for a particular mime type
# appears before any others of the same mimetype.
types_map = _types_map_default = {
'.js' : 'application/javascript',
'.mjs' : 'application/javascript',
'.json' : 'application/json',
'.doc' : 'application/msword',
'.dot' : 'application/msword',
'.wiz' : 'application/msword',
'.bin' : 'application/octet-stream',
'.a' : 'application/octet-stream', '.a' : 'application/octet-stream',
'.dll' : 'application/octet-stream',
'.exe' : 'application/octet-stream',
'.o' : 'application/octet-stream',
'.obj' : 'application/octet-stream',
'.so' : 'application/octet-stream',
'.oda' : 'application/oda',
'.pdf' : 'application/pdf',
'.p7c' : 'application/pkcs7-mime',
'.ps' : 'application/postscript',
'.ai' : 'application/postscript', '.ai' : 'application/postscript',
'.aif' : 'audio/x-aiff', '.eps' : 'application/postscript',
'.aifc' : 'audio/x-aiff', '.m3u' : 'application/vnd.apple.mpegurl',
'.aiff' : 'audio/x-aiff', '.m3u8' : 'application/vnd.apple.mpegurl',
'.au' : 'audio/basic', '.xls' : 'application/vnd.ms-excel',
'.avi' : 'video/x-msvideo', '.xlb' : 'application/vnd.ms-excel',
'.bat' : 'text/plain', '.ppt' : 'application/vnd.ms-powerpoint',
'.pot' : 'application/vnd.ms-powerpoint',
'.ppa' : 'application/vnd.ms-powerpoint',
'.pps' : 'application/vnd.ms-powerpoint',
'.pwz' : 'application/vnd.ms-powerpoint',
'.wasm' : 'application/wasm',
'.bcpio' : 'application/x-bcpio', '.bcpio' : 'application/x-bcpio',
'.bin' : 'application/octet-stream',
'.bmp' : 'image/bmp',
'.c' : 'text/plain',
'.cdf' : 'application/x-netcdf',
'.cpio' : 'application/x-cpio', '.cpio' : 'application/x-cpio',
'.csh' : 'application/x-csh', '.csh' : 'application/x-csh',
'.css' : 'text/css',
'.csv' : 'text/csv',
'.dll' : 'application/octet-stream',
'.doc' : 'application/msword',
'.dot' : 'application/msword',
'.dvi' : 'application/x-dvi', '.dvi' : 'application/x-dvi',
'.eml' : 'message/rfc822',
'.eps' : 'application/postscript',
'.etx' : 'text/x-setext',
'.exe' : 'application/octet-stream',
'.gif' : 'image/gif',
'.gtar' : 'application/x-gtar', '.gtar' : 'application/x-gtar',
'.h' : 'text/plain',
'.hdf' : 'application/x-hdf', '.hdf' : 'application/x-hdf',
'.htm' : 'text/html',
'.html' : 'text/html',
'.ico' : 'image/vnd.microsoft.icon',
'.ief' : 'image/ief',
'.jpe' : 'image/jpeg',
'.jpeg' : 'image/jpeg',
'.jpg' : 'image/jpeg',
'.js' : 'application/javascript',
'.json' : 'application/json',
'.ksh' : 'text/plain',
'.latex' : 'application/x-latex', '.latex' : 'application/x-latex',
'.m1v' : 'video/mpeg',
'.m3u' : 'application/vnd.apple.mpegurl',
'.m3u8' : 'application/vnd.apple.mpegurl',
'.man' : 'application/x-troff-man',
'.me' : 'application/x-troff-me',
'.mht' : 'message/rfc822',
'.mhtml' : 'message/rfc822',
'.mif' : 'application/x-mif', '.mif' : 'application/x-mif',
'.mjs' : 'application/javascript', '.cdf' : 'application/x-netcdf',
'.mov' : 'video/quicktime',
'.movie' : 'video/x-sgi-movie',
'.mp2' : 'audio/mpeg',
'.mp3' : 'audio/mpeg',
'.mp4' : 'video/mp4',
'.mpa' : 'video/mpeg',
'.mpe' : 'video/mpeg',
'.mpeg' : 'video/mpeg',
'.mpg' : 'video/mpeg',
'.ms' : 'application/x-troff-ms',
'.nc' : 'application/x-netcdf', '.nc' : 'application/x-netcdf',
'.nws' : 'message/rfc822',
'.o' : 'application/octet-stream',
'.obj' : 'application/octet-stream',
'.oda' : 'application/oda',
'.p12' : 'application/x-pkcs12', '.p12' : 'application/x-pkcs12',
'.p7c' : 'application/pkcs7-mime',
'.pbm' : 'image/x-portable-bitmap',
'.pdf' : 'application/pdf',
'.pfx' : 'application/x-pkcs12', '.pfx' : 'application/x-pkcs12',
'.pgm' : 'image/x-portable-graymap', '.ram' : 'application/x-pn-realaudio',
'.pl' : 'text/plain',
'.png' : 'image/png',
'.pnm' : 'image/x-portable-anymap',
'.pot' : 'application/vnd.ms-powerpoint',
'.ppa' : 'application/vnd.ms-powerpoint',
'.ppm' : 'image/x-portable-pixmap',
'.pps' : 'application/vnd.ms-powerpoint',
'.ppt' : 'application/vnd.ms-powerpoint',
'.ps' : 'application/postscript',
'.pwz' : 'application/vnd.ms-powerpoint',
'.py' : 'text/x-python',
'.pyc' : 'application/x-python-code', '.pyc' : 'application/x-python-code',
'.pyo' : 'application/x-python-code', '.pyo' : 'application/x-python-code',
'.qt' : 'video/quicktime',
'.ra' : 'audio/x-pn-realaudio',
'.ram' : 'application/x-pn-realaudio',
'.ras' : 'image/x-cmu-raster',
'.rdf' : 'application/xml',
'.rgb' : 'image/x-rgb',
'.roff' : 'application/x-troff',
'.rtx' : 'text/richtext',
'.sgm' : 'text/x-sgml',
'.sgml' : 'text/x-sgml',
'.sh' : 'application/x-sh', '.sh' : 'application/x-sh',
'.shar' : 'application/x-shar', '.shar' : 'application/x-shar',
'.snd' : 'audio/basic', '.swf' : 'application/x-shockwave-flash',
'.so' : 'application/octet-stream',
'.src' : 'application/x-wais-source',
'.sv4cpio': 'application/x-sv4cpio', '.sv4cpio': 'application/x-sv4cpio',
'.sv4crc' : 'application/x-sv4crc', '.sv4crc' : 'application/x-sv4crc',
'.svg' : 'image/svg+xml',
'.swf' : 'application/x-shockwave-flash',
'.t' : 'application/x-troff',
'.tar' : 'application/x-tar', '.tar' : 'application/x-tar',
'.tcl' : 'application/x-tcl', '.tcl' : 'application/x-tcl',
'.tex' : 'application/x-tex', '.tex' : 'application/x-tex',
'.texi' : 'application/x-texinfo', '.texi' : 'application/x-texinfo',
'.texinfo': 'application/x-texinfo', '.texinfo': 'application/x-texinfo',
'.tif' : 'image/tiff', '.roff' : 'application/x-troff',
'.tiff' : 'image/tiff', '.t' : 'application/x-troff',
'.tr' : 'application/x-troff', '.tr' : 'application/x-troff',
'.tsv' : 'text/tab-separated-values', '.man' : 'application/x-troff-man',
'.txt' : 'text/plain', '.me' : 'application/x-troff-me',
'.ms' : 'application/x-troff-ms',
'.ustar' : 'application/x-ustar', '.ustar' : 'application/x-ustar',
'.vcf' : 'text/x-vcard', '.src' : 'application/x-wais-source',
'.wasm' : 'application/wasm', '.xsl' : 'application/xml',
'.wav' : 'audio/x-wav', '.rdf' : 'application/xml',
'.webm' : 'video/webm',
'.wiz' : 'application/msword',
'.wsdl' : 'application/xml', '.wsdl' : 'application/xml',
'.xbm' : 'image/x-xbitmap',
'.xlb' : 'application/vnd.ms-excel',
'.xls' : 'application/vnd.ms-excel',
'.xml' : 'text/xml',
'.xpdl' : 'application/xml', '.xpdl' : 'application/xml',
'.zip' : 'application/zip',
'.au' : 'audio/basic',
'.snd' : 'audio/basic',
'.mp3' : 'audio/mpeg',
'.mp2' : 'audio/mpeg',
'.aif' : 'audio/x-aiff',
'.aifc' : 'audio/x-aiff',
'.aiff' : 'audio/x-aiff',
'.ra' : 'audio/x-pn-realaudio',
'.wav' : 'audio/x-wav',
'.bmp' : 'image/bmp',
'.gif' : 'image/gif',
'.ief' : 'image/ief',
'.jpg' : 'image/jpeg',
'.jpe' : 'image/jpeg',
'.jpeg' : 'image/jpeg',
'.png' : 'image/png',
'.svg' : 'image/svg+xml',
'.tiff' : 'image/tiff',
'.tif' : 'image/tiff',
'.ico' : 'image/vnd.microsoft.icon',
'.ras' : 'image/x-cmu-raster',
'.bmp' : 'image/x-ms-bmp',
'.pnm' : 'image/x-portable-anymap',
'.pbm' : 'image/x-portable-bitmap',
'.pgm' : 'image/x-portable-graymap',
'.ppm' : 'image/x-portable-pixmap',
'.rgb' : 'image/x-rgb',
'.xbm' : 'image/x-xbitmap',
'.xpm' : 'image/x-xpixmap', '.xpm' : 'image/x-xpixmap',
'.xsl' : 'application/xml',
'.xwd' : 'image/x-xwindowdump', '.xwd' : 'image/x-xwindowdump',
'.zip' : 'application/zip', '.eml' : 'message/rfc822',
'.mht' : 'message/rfc822',
'.mhtml' : 'message/rfc822',
'.nws' : 'message/rfc822',
'.css' : 'text/css',
'.csv' : 'text/csv',
'.html' : 'text/html',
'.htm' : 'text/html',
'.txt' : 'text/plain',
'.bat' : 'text/plain',
'.c' : 'text/plain',
'.h' : 'text/plain',
'.ksh' : 'text/plain',
'.pl' : 'text/plain',
'.rtx' : 'text/richtext',
'.tsv' : 'text/tab-separated-values',
'.py' : 'text/x-python',
'.etx' : 'text/x-setext',
'.sgm' : 'text/x-sgml',
'.sgml' : 'text/x-sgml',
'.vcf' : 'text/x-vcard',
'.xml' : 'text/xml',
'.mp4' : 'video/mp4',
'.mpeg' : 'video/mpeg',
'.m1v' : 'video/mpeg',
'.mpa' : 'video/mpeg',
'.mpe' : 'video/mpeg',
'.mpg' : 'video/mpeg',
'.mov' : 'video/quicktime',
'.qt' : 'video/quicktime',
'.webm' : 'video/webm',
'.avi' : 'video/x-msvideo',
'.movie' : 'video/x-sgi-movie',
} }
# These are non-standard types, commonly found in the wild. They will # These are non-standard types, commonly found in the wild. They will
# only match if strict=0 flag is given to the API methods. # only match if strict=0 flag is given to the API methods.
# Please sort these too # Please sort these too
common_types = { common_types = _common_types_default = {
'.jpg' : 'image/jpg', '.rtf' : 'application/rtf',
'.mid' : 'audio/midi',
'.midi': 'audio/midi', '.midi': 'audio/midi',
'.mid' : 'audio/midi',
'.jpg' : 'image/jpg',
'.pict': 'image/pict',
'.pct' : 'image/pict', '.pct' : 'image/pict',
'.pic' : 'image/pict', '.pic' : 'image/pict',
'.pict': 'image/pict', '.xul' : 'text/xul',
'.rtf' : 'application/rtf',
'.xul' : 'text/xul'
} }
......
...@@ -79,6 +79,57 @@ class MimeTypesTestCase(unittest.TestCase): ...@@ -79,6 +79,57 @@ class MimeTypesTestCase(unittest.TestCase):
strict=True) strict=True)
self.assertEqual(exts, ['.g3', '.g\xb3']) self.assertEqual(exts, ['.g3', '.g\xb3'])
def test_init_reinitializes(self):
# Issue 4936: make sure an init starts clean
# First, put some poison into the types table
mimetypes.add_type('foo/bar', '.foobar')
self.assertEqual(mimetypes.guess_extension('foo/bar'), '.foobar')
# Reinitialize
mimetypes.init()
# Poison should be gone.
self.assertEqual(mimetypes.guess_extension('foo/bar'), None)
def test_preferred_extension(self):
def check_extensions():
self.assertEqual(mimetypes.guess_extension('application/octet-stream'), '.bin')
self.assertEqual(mimetypes.guess_extension('application/postscript'), '.ps')
self.assertEqual(mimetypes.guess_extension('application/vnd.apple.mpegurl'), '.m3u')
self.assertEqual(mimetypes.guess_extension('application/vnd.ms-excel'), '.xls')
self.assertEqual(mimetypes.guess_extension('application/vnd.ms-powerpoint'), '.ppt')
self.assertEqual(mimetypes.guess_extension('application/x-texinfo'), '.texi')
self.assertEqual(mimetypes.guess_extension('application/x-troff'), '.roff')
self.assertEqual(mimetypes.guess_extension('application/xml'), '.xsl')
self.assertEqual(mimetypes.guess_extension('audio/mpeg'), '.mp3')
self.assertEqual(mimetypes.guess_extension('image/jpeg'), '.jpg')
self.assertEqual(mimetypes.guess_extension('image/tiff'), '.tiff')
self.assertEqual(mimetypes.guess_extension('message/rfc822'), '.eml')
self.assertEqual(mimetypes.guess_extension('text/html'), '.html')
self.assertEqual(mimetypes.guess_extension('text/plain'), '.txt')
self.assertEqual(mimetypes.guess_extension('video/mpeg'), '.mpeg')
self.assertEqual(mimetypes.guess_extension('video/quicktime'), '.mov')
check_extensions()
mimetypes.init()
check_extensions()
def test_init_stability(self):
mimetypes.init()
suffix_map = mimetypes.suffix_map
encodings_map = mimetypes.encodings_map
types_map = mimetypes.types_map
common_types = mimetypes.common_types
mimetypes.init()
self.assertIsNot(suffix_map, mimetypes.suffix_map)
self.assertIsNot(encodings_map, mimetypes.encodings_map)
self.assertIsNot(types_map, mimetypes.types_map)
self.assertIsNot(common_types, mimetypes.common_types)
self.assertEqual(suffix_map, mimetypes.suffix_map)
self.assertEqual(encodings_map, mimetypes.encodings_map)
self.assertEqual(types_map, mimetypes.types_map)
self.assertEqual(common_types, mimetypes.common_types)
def test_path_like_ob(self): def test_path_like_ob(self):
filename = "LICENSE.txt" filename = "LICENSE.txt"
filepath = pathlib.Path(filename) filepath = pathlib.Path(filename)
......
Fixed non-deterministic behavior related to mimetypes extension mapping and
module reinitialization.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment