Commit f24fecd4 authored by Brett Cannon's avatar Brett Cannon

Issue #18076: Introduce imoportlib.util.decode_source().

The helper function makes it easier to implement
imoprtlib.abc.InspectLoader.get_source() by making that function
require just the raw bytes for source code and handling all other
details.
parent f4375ef4
...@@ -916,6 +916,14 @@ an :term:`importer`. ...@@ -916,6 +916,14 @@ an :term:`importer`.
.. versionadded:: 3.4 .. versionadded:: 3.4
.. function:: decode_source(source_bytes)
Decode the given bytes representing source code and return it as a string
with universal newlines (as required by
:meth:`importlib.abc.InspectLoader.get_source`).
.. versionadded:: 3.4
.. function:: resolve_name(name, package) .. function:: resolve_name(name, package)
Resolve a relative module name to an absolute one. Resolve a relative module name to an absolute one.
......
...@@ -723,6 +723,18 @@ def _code_to_bytecode(code, mtime=0, source_size=0): ...@@ -723,6 +723,18 @@ def _code_to_bytecode(code, mtime=0, source_size=0):
return data return data
def decode_source(source_bytes):
"""Decode bytes representing source code and return the string.
Universal newline support is used in the decoding.
"""
import tokenize # To avoid bootstrap issues.
source_bytes_readline = _io.BytesIO(source_bytes).readline
encoding = tokenize.detect_encoding(source_bytes_readline)
newline_decoder = _io.IncrementalNewlineDecoder(None, True)
return newline_decoder.decode(source_bytes.decode(encoding[0]))
# Loaders ##################################################################### # Loaders #####################################################################
class BuiltinImporter: class BuiltinImporter:
...@@ -965,11 +977,7 @@ class SourceLoader(_LoaderBasics): ...@@ -965,11 +977,7 @@ class SourceLoader(_LoaderBasics):
except OSError as exc: except OSError as exc:
raise ImportError("source not available through get_data()", raise ImportError("source not available through get_data()",
name=fullname) from exc name=fullname) from exc
import tokenize return decode_source(source_bytes)
readsource = _io.BytesIO(source_bytes).readline
encoding = tokenize.detect_encoding(readsource)
newline_decoder = _io.IncrementalNewlineDecoder(None, True)
return newline_decoder.decode(source_bytes.decode(encoding[0]))
def source_to_code(self, data, path, *, _optimize=-1): def source_to_code(self, data, path, *, _optimize=-1):
"""Return the code object compiled from source. """Return the code object compiled from source.
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
from ._bootstrap import MAGIC_NUMBER from ._bootstrap import MAGIC_NUMBER
from ._bootstrap import cache_from_source from ._bootstrap import cache_from_source
from ._bootstrap import decode_source
from ._bootstrap import module_to_load from ._bootstrap import module_to_load
from ._bootstrap import set_loader from ._bootstrap import set_loader
from ._bootstrap import set_package from ._bootstrap import set_package
......
...@@ -9,6 +9,27 @@ import unittest ...@@ -9,6 +9,27 @@ import unittest
import warnings import warnings
class DecodeSourceBytesTests(unittest.TestCase):
source = "string ='ü'"
def test_ut8_default(self):
source_bytes = self.source.encode('utf-8')
self.assertEqual(util.decode_source(source_bytes), self.source)
def test_specified_encoding(self):
source = '# coding=latin-1\n' + self.source
source_bytes = source.encode('latin-1')
assert source_bytes != source.encode('utf-8')
self.assertEqual(util.decode_source(source_bytes), source)
def test_universal_newlines(self):
source = '\r\n'.join([self.source, self.source])
source_bytes = source.encode('utf-8')
self.assertEqual(util.decode_source(source_bytes),
'\n'.join([self.source, self.source]))
class ModuleToLoadTests(unittest.TestCase): class ModuleToLoadTests(unittest.TestCase):
module_name = 'ModuleManagerTest_module' module_name = 'ModuleManagerTest_module'
......
...@@ -123,6 +123,8 @@ Core and Builtins ...@@ -123,6 +123,8 @@ Core and Builtins
Library Library
------- -------
- Issue #18076: Introduce importlib.util.decode_source().
- importlib.abc.SourceLoader.get_source() no longer changes SyntaxError or - importlib.abc.SourceLoader.get_source() no longer changes SyntaxError or
UnicodeDecodeError into ImportError. UnicodeDecodeError into ImportError.
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment