Commit f24fecd4 authored by Brett Cannon's avatar Brett Cannon

Issue #18076: Introduce imoportlib.util.decode_source().

The helper function makes it easier to implement
imoprtlib.abc.InspectLoader.get_source() by making that function
require just the raw bytes for source code and handling all other
details.
parent f4375ef4
...@@ -916,6 +916,14 @@ an :term:`importer`. ...@@ -916,6 +916,14 @@ an :term:`importer`.
.. versionadded:: 3.4 .. versionadded:: 3.4
.. function:: decode_source(source_bytes)
Decode the given bytes representing source code and return it as a string
with universal newlines (as required by
:meth:`importlib.abc.InspectLoader.get_source`).
.. versionadded:: 3.4
.. function:: resolve_name(name, package) .. function:: resolve_name(name, package)
Resolve a relative module name to an absolute one. Resolve a relative module name to an absolute one.
......
...@@ -723,6 +723,18 @@ def _code_to_bytecode(code, mtime=0, source_size=0): ...@@ -723,6 +723,18 @@ def _code_to_bytecode(code, mtime=0, source_size=0):
return data return data
def decode_source(source_bytes):
"""Decode bytes representing source code and return the string.
Universal newline support is used in the decoding.
"""
import tokenize # To avoid bootstrap issues.
source_bytes_readline = _io.BytesIO(source_bytes).readline
encoding = tokenize.detect_encoding(source_bytes_readline)
newline_decoder = _io.IncrementalNewlineDecoder(None, True)
return newline_decoder.decode(source_bytes.decode(encoding[0]))
# Loaders ##################################################################### # Loaders #####################################################################
class BuiltinImporter: class BuiltinImporter:
...@@ -965,11 +977,7 @@ class SourceLoader(_LoaderBasics): ...@@ -965,11 +977,7 @@ class SourceLoader(_LoaderBasics):
except OSError as exc: except OSError as exc:
raise ImportError("source not available through get_data()", raise ImportError("source not available through get_data()",
name=fullname) from exc name=fullname) from exc
import tokenize return decode_source(source_bytes)
readsource = _io.BytesIO(source_bytes).readline
encoding = tokenize.detect_encoding(readsource)
newline_decoder = _io.IncrementalNewlineDecoder(None, True)
return newline_decoder.decode(source_bytes.decode(encoding[0]))
def source_to_code(self, data, path, *, _optimize=-1): def source_to_code(self, data, path, *, _optimize=-1):
"""Return the code object compiled from source. """Return the code object compiled from source.
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
from ._bootstrap import MAGIC_NUMBER from ._bootstrap import MAGIC_NUMBER
from ._bootstrap import cache_from_source from ._bootstrap import cache_from_source
from ._bootstrap import decode_source
from ._bootstrap import module_to_load from ._bootstrap import module_to_load
from ._bootstrap import set_loader from ._bootstrap import set_loader
from ._bootstrap import set_package from ._bootstrap import set_package
......
...@@ -9,6 +9,27 @@ import unittest ...@@ -9,6 +9,27 @@ import unittest
import warnings import warnings
class DecodeSourceBytesTests(unittest.TestCase):
source = "string ='ü'"
def test_ut8_default(self):
source_bytes = self.source.encode('utf-8')
self.assertEqual(util.decode_source(source_bytes), self.source)
def test_specified_encoding(self):
source = '# coding=latin-1\n' + self.source
source_bytes = source.encode('latin-1')
assert source_bytes != source.encode('utf-8')
self.assertEqual(util.decode_source(source_bytes), source)
def test_universal_newlines(self):
source = '\r\n'.join([self.source, self.source])
source_bytes = source.encode('utf-8')
self.assertEqual(util.decode_source(source_bytes),
'\n'.join([self.source, self.source]))
class ModuleToLoadTests(unittest.TestCase): class ModuleToLoadTests(unittest.TestCase):
module_name = 'ModuleManagerTest_module' module_name = 'ModuleManagerTest_module'
......
...@@ -123,6 +123,8 @@ Core and Builtins ...@@ -123,6 +123,8 @@ Core and Builtins
Library Library
------- -------
- Issue #18076: Introduce importlib.util.decode_source().
- importlib.abc.SourceLoader.get_source() no longer changes SyntaxError or - importlib.abc.SourceLoader.get_source() no longer changes SyntaxError or
UnicodeDecodeError into ImportError. UnicodeDecodeError into ImportError.
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment