Skip to content

Update mimetypes from 3.13.5 #5879

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 88 additions & 23 deletions Lib/mimetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,20 @@
import sys
import posixpath
import urllib.parse

try:
from _winapi import _mimetypes_read_windows_registry
except ImportError:
_mimetypes_read_windows_registry = None

try:
import winreg as _winreg
except ImportError:
_winreg = None

__all__ = [
"knownfiles", "inited", "MimeTypes",
"guess_type", "guess_all_extensions", "guess_extension",
"guess_type", "guess_file_type", "guess_all_extensions", "guess_extension",
"add_type", "init", "read_mime_types",
"suffix_map", "encodings_map", "types_map", "common_types"
]
Expand Down Expand Up @@ -89,6 +95,8 @@ def add_type(self, type, ext, strict=True):
list of standard types, else to the list of non-standard
types.
"""
if not type:
return
self.types_map[strict][ext] = type
exts = self.types_map_inv[strict].setdefault(type, [])
if ext not in exts:
Expand All @@ -113,8 +121,14 @@ def guess_type(self, url, strict=True):
Optional `strict' argument when False adds a bunch of commonly found,
but non-standard types.
"""
# TODO: Deprecate accepting file paths (in particular path-like objects).
url = os.fspath(url)
scheme, url = urllib.parse._splittype(url)
p = urllib.parse.urlparse(url)
if p.scheme and len(p.scheme) > 1:
scheme = p.scheme
url = p.path
else:
return self.guess_file_type(url, strict=strict)
if scheme == 'data':
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
Expand All @@ -134,26 +148,36 @@ def guess_type(self, url, strict=True):
if '=' in type or '/' not in type:
type = 'text/plain'
return type, None # never compressed, so encoding is None
base, ext = posixpath.splitext(url)
while ext in self.suffix_map:
base, ext = posixpath.splitext(base + self.suffix_map[ext])
return self._guess_file_type(url, strict, posixpath.splitext)

def guess_file_type(self, path, *, strict=True):
"""Guess the type of a file based on its path.

Similar to guess_type(), but takes file path istead of URL.
"""
path = os.fsdecode(path)
path = os.path.splitdrive(path)[1]
return self._guess_file_type(path, strict, os.path.splitext)

def _guess_file_type(self, path, strict, splitext):
base, ext = splitext(path)
while (ext_lower := ext.lower()) in self.suffix_map:
base, ext = splitext(base + self.suffix_map[ext_lower])
# encodings_map is case sensitive
if ext in self.encodings_map:
encoding = self.encodings_map[ext]
base, ext = posixpath.splitext(base)
base, ext = splitext(base)
else:
encoding = None
ext = ext.lower()
types_map = self.types_map[True]
if ext in types_map:
return types_map[ext], encoding
elif ext.lower() in types_map:
return types_map[ext.lower()], encoding
elif strict:
return None, encoding
types_map = self.types_map[False]
if ext in types_map:
return types_map[ext], encoding
elif ext.lower() in types_map:
return types_map[ext.lower()], encoding
else:
return None, encoding

Expand All @@ -169,7 +193,7 @@ def guess_all_extensions(self, type, strict=True):
but non-standard types.
"""
type = type.lower()
extensions = self.types_map_inv[True].get(type, [])
extensions = list(self.types_map_inv[True].get(type, []))
if not strict:
for ext in self.types_map_inv[False].get(type, []):
if ext not in extensions:
Expand Down Expand Up @@ -213,10 +237,7 @@ def readfp(self, fp, strict=True):
list of standard types, else to the list of non-standard
types.
"""
while 1:
line = fp.readline()
if not line:
break
while line := fp.readline():
words = line.split()
for i in range(len(words)):
if words[i][0] == '#':
Expand All @@ -237,10 +258,21 @@ def read_windows_registry(self, strict=True):
types.
"""

# Windows only
if not _winreg:
if not _mimetypes_read_windows_registry and not _winreg:
return

add_type = self.add_type
if strict:
add_type = lambda type, ext: self.add_type(type, ext, True)

# Accelerated function if it is available
if _mimetypes_read_windows_registry:
_mimetypes_read_windows_registry(add_type)
elif _winreg:
self._read_windows_registry(add_type)

@classmethod
def _read_windows_registry(cls, add_type):
def enum_types(mimedb):
i = 0
while True:
Expand All @@ -265,7 +297,7 @@ def enum_types(mimedb):
subkey, 'Content Type')
if datatype != _winreg.REG_SZ:
continue
self.add_type(mimetype, subkeyname, strict)
add_type(mimetype, subkeyname)
except OSError:
continue

Expand All @@ -292,6 +324,16 @@ def guess_type(url, strict=True):
return _db.guess_type(url, strict)


def guess_file_type(path, *, strict=True):
"""Guess the type of a file based on its path.

Similar to guess_type(), but takes file path istead of URL.
"""
if _db is None:
init()
return _db.guess_file_type(path, strict=strict)


def guess_all_extensions(type, strict=True):
"""Guess the extensions for a file based on its MIME type.

Expand Down Expand Up @@ -349,8 +391,8 @@ def init(files=None):

if files is None or _db is None:
db = MimeTypes()
if _winreg:
db.read_windows_registry()
# Quick return if not supported
db.read_windows_registry()

if files is None:
files = knownfiles
Expand Down Expand Up @@ -401,6 +443,7 @@ def _default_mime_types():
'.Z': 'compress',
'.bz2': 'bzip2',
'.xz': 'xz',
'.br': 'br',
}

# Before adding new types, make sure they are either registered with IANA,
Expand All @@ -411,13 +454,15 @@ def _default_mime_types():
# Make sure the entry with the preferred file extension for a particular mime type
# appears before any others of the same mimetype.
types_map = _types_map_default = {
'.js' : 'application/javascript',
'.mjs' : 'application/javascript',
'.js' : 'text/javascript',
'.mjs' : 'text/javascript',
'.json' : 'application/json',
'.webmanifest': 'application/manifest+json',
'.doc' : 'application/msword',
'.dot' : 'application/msword',
'.wiz' : 'application/msword',
'.nq' : 'application/n-quads',
'.nt' : 'application/n-triples',
'.bin' : 'application/octet-stream',
'.a' : 'application/octet-stream',
'.dll' : 'application/octet-stream',
Expand All @@ -431,6 +476,7 @@ def _default_mime_types():
'.ps' : 'application/postscript',
'.ai' : 'application/postscript',
'.eps' : 'application/postscript',
'.trig' : 'application/trig',
'.m3u' : 'application/vnd.apple.mpegurl',
'.m3u8' : 'application/vnd.apple.mpegurl',
'.xls' : 'application/vnd.ms-excel',
Expand Down Expand Up @@ -480,28 +526,40 @@ def _default_mime_types():
'.wsdl' : 'application/xml',
'.xpdl' : 'application/xml',
'.zip' : 'application/zip',
'.3gp' : 'audio/3gpp',
'.3gpp' : 'audio/3gpp',
'.3g2' : 'audio/3gpp2',
'.3gpp2' : 'audio/3gpp2',
'.aac' : 'audio/aac',
'.adts' : 'audio/aac',
'.loas' : 'audio/aac',
'.ass' : 'audio/aac',
'.au' : 'audio/basic',
'.snd' : 'audio/basic',
'.mp3' : 'audio/mpeg',
'.mp2' : 'audio/mpeg',
'.opus' : 'audio/opus',
'.aif' : 'audio/x-aiff',
'.aifc' : 'audio/x-aiff',
'.aiff' : 'audio/x-aiff',
'.ra' : 'audio/x-pn-realaudio',
'.wav' : 'audio/x-wav',
'.avif' : 'image/avif',
'.bmp' : 'image/bmp',
'.gif' : 'image/gif',
'.ief' : 'image/ief',
'.jpg' : 'image/jpeg',
'.jpe' : 'image/jpeg',
'.jpeg' : 'image/jpeg',
'.heic' : 'image/heic',
'.heif' : 'image/heif',
'.png' : 'image/png',
'.svg' : 'image/svg+xml',
'.tiff' : 'image/tiff',
'.tif' : 'image/tiff',
'.ico' : 'image/vnd.microsoft.icon',
'.webp' : 'image/webp',
'.ras' : 'image/x-cmu-raster',
'.bmp' : 'image/x-ms-bmp',
'.pnm' : 'image/x-portable-anymap',
'.pbm' : 'image/x-portable-bitmap',
'.pgm' : 'image/x-portable-graymap',
Expand All @@ -518,15 +576,22 @@ def _default_mime_types():
'.csv' : 'text/csv',
'.html' : 'text/html',
'.htm' : 'text/html',
'.md' : 'text/markdown',
'.markdown': 'text/markdown',
'.n3' : 'text/n3',
'.txt' : 'text/plain',
'.bat' : 'text/plain',
'.c' : 'text/plain',
'.h' : 'text/plain',
'.ksh' : 'text/plain',
'.pl' : 'text/plain',
'.srt' : 'text/plain',
'.rtx' : 'text/richtext',
'.rtf' : 'text/rtf',
'.tsv' : 'text/tab-separated-values',
'.vtt' : 'text/vtt',
'.py' : 'text/x-python',
'.rst' : 'text/x-rst',
'.etx' : 'text/x-setext',
'.sgm' : 'text/x-sgml',
'.sgml' : 'text/x-sgml',
Expand Down
Loading
Loading