Merge

112c6f16 · Antoine Pitrou · bd8e7e88 · ac6feb68 · 112c6f16 · 112c6f16
Commit 112c6f16 authored Jul 13, 2012 by Antoine Pitrou
7 changed files
--- a/Doc/c-api/import.rst
+++ b/Doc/c-api/import.rst
@@ -163,9 +163,14 @@ Importing Modules
 .. c:function:: PyObject* PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname, char *cpathname)

   Like :c:func:`PyImport_ExecCodeModuleObject`, but *name*, *pathname* and
-   *cpathname* are UTF-8 encoded strings.
+   *cpathname* are UTF-8 encoded strings. Attempts are also made to figure out
+   what the value for *pathname* should be from *cpathname* if the former is
+   set to ``NULL``.

   .. versionadded:: 3.2
+   .. versionchanged:: 3.3
+      Uses :func:`imp.source_from_cache()` in calculating the source path if
+      only the bytecode path is provided.


 .. c:function:: long PyImport_GetMagicNumber()

--- a/Lib/imp.py
+++ b/Lib/imp.py
@@ -13,7 +13,7 @@ from _imp import (lock_held, acquire_lock, release_lock,

 # Directly exposed by this module
 from importlib._bootstrap import new_module
-from importlib._bootstrap import cache_from_source
+from importlib._bootstrap import cache_from_source, source_from_cache


 from importlib import _bootstrap
@@ -58,29 +58,6 @@ def get_suffixes():
    return extensions + source + bytecode


-def source_from_cache(path):
-    """Given the path to a .pyc./.pyo file, return the path to its .py file.
-
-    The .pyc/.pyo file does not need to exist; this simply returns the path to
-    the .py file calculated to correspond to the .pyc/.pyo file.  If path does
-    not conform to PEP 3147 format, ValueError will be raised. If
-    sys.implementation.cache_tag is None then NotImplementedError is raised.
-
-    """
-    if sys.implementation.cache_tag is None:
-        raise NotImplementedError('sys.implementation.cache_tag is None')
-    head, pycache_filename = os.path.split(path)
-    head, pycache = os.path.split(head)
-    if pycache != _bootstrap._PYCACHE:
-        raise ValueError('{} not bottom-level directory in '
-                         '{!r}'.format(_bootstrap._PYCACHE, path))
-    if pycache_filename.count('.') != 2:
-        raise ValueError('expected only 2 dots in '
-                         '{!r}'.format(pycache_filename))
-    base_filename = pycache_filename.partition('.')[0]
-    return os.path.join(head, base_filename + machinery.SOURCE_SUFFIXES[0])
-
-
 class NullImporter:

    """Null import object."""

--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -428,6 +428,50 @@ def cache_from_source(path, debug_override=None):
    return _path_join(head, _PYCACHE, filename)


+def source_from_cache(path):
+    """Given the path to a .pyc./.pyo file, return the path to its .py file.
+
+    The .pyc/.pyo file does not need to exist; this simply returns the path to
+    the .py file calculated to correspond to the .pyc/.pyo file.  If path does
+    not conform to PEP 3147 format, ValueError will be raised. If
+    sys.implementation.cache_tag is None then NotImplementedError is raised.
+
+    """
+    if sys.implementation.cache_tag is None:
+        raise NotImplementedError('sys.implementation.cache_tag is None')
+    head, pycache_filename = _path_split(path)
+    head, pycache = _path_split(head)
+    if pycache != _PYCACHE:
+        raise ValueError('{} not bottom-level directory in '
+                         '{!r}'.format(_PYCACHE, path))
+    if pycache_filename.count('.') != 2:
+        raise ValueError('expected only 2 dots in '
+                         '{!r}'.format(pycache_filename))
+    base_filename = pycache_filename.partition('.')[0]
+    return _path_join(head, base_filename + SOURCE_SUFFIXES[0])
+
+
+def _get_sourcefile(bytecode_path):
+    """Convert a bytecode file path to a source path (if possible).
+
+    This function exists purely for backwards-compatibility for
+    PyImport_ExecCodeModuleWithFilenames() in the C API.
+
+    """
+    if len(bytecode_path) == 0:
+        return None
+    rest, _, extension = bytecode_path.rparition('.')
+    if not rest or extension.lower()[-3:-1] != '.py':
+        return bytecode_path
+
+    try:
+        source_path = source_from_cache(bytecode_path)
+    except (NotImplementedError, ValueError):
+        source_path = bytcode_path[-1:]
+
+    return source_path if _path_isfile(source_stats) else bytecode_path
+
+
 def _verbose_message(message, *args):
    """Print the message to stderr if -v/PYTHONVERBOSE is turned on."""
    if sys.flags.verbose:

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -92,6 +92,15 @@ Library
 - Issue 10924: Fixed mksalt() to use a RNG that is suitable for cryptographic
  purpose.

+C API
+-----
+
+- Issues #15169, #14599: Strip out the C implementation of
+  imp.source_from_cache() used by PyImport_ExecCodeModuleWithPathnames() and
+  used the Python code instead. Leads to PyImport_ExecCodeModuleObject() to not
+  try to infer the source path from the bytecode path as
+  PyImport_ExecCodeModuleWithPathnames() does.
+
 Extension Modules
 -----------------


--- a/Python/import.c
+++ b/Python/import.c
@@ -630,8 +630,6 @@ remove_module(PyObject *name)
                      "sys.modules failed");
 }

-static PyObject * get_sourcefile(PyObject *filename);
-static PyObject *make_source_pathname(PyObject *pathname);

 /* Execute a code object in a module and return the module object
 * WITH INCREMENTED REFERENCE COUNT.  If an error occurs, name is
@@ -668,18 +666,37 @@ PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname,
    if (nameobj == NULL)
        return NULL;

-    if (pathname != NULL) {
-        pathobj = PyUnicode_DecodeFSDefault(pathname);
-        if (pathobj == NULL)
-            goto error;
-    } else
-        pathobj = NULL;
    if (cpathname != NULL) {
        cpathobj = PyUnicode_DecodeFSDefault(cpathname);
        if (cpathobj == NULL)
            goto error;
-    } else
+    }
+    else
        cpathobj = NULL;
+
+    if (pathname != NULL) {
+        pathobj = PyUnicode_DecodeFSDefault(pathname);
+        if (pathobj == NULL)
+            goto error;
+    }
+    else if (cpathobj != NULL) {
+        PyInterpreterState *interp = PyThreadState_GET()->interp;
+        _Py_IDENTIFIER(_get_sourcefile);
+
+        if (interp == NULL) {
+            Py_FatalError("PyImport_ExecCodeModuleWithPathnames: "
+                          "no interpreter!");
+        }
+
+        pathobj = _PyObject_CallMethodObjIdArgs(interp->importlib,
+                                                &PyId__get_sourcefile, cpathobj,
+                                                NULL);
+        if (pathobj == NULL)
+            PyErr_Clear();
+    }
+    else
+        pathobj = NULL;
+
    m = PyImport_ExecCodeModuleObject(nameobj, co, pathobj, cpathobj);
 error:
    Py_DECREF(nameobj);
@@ -706,18 +723,13 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
                                 PyEval_GetBuiltins()) != 0)
            goto error;
    }
-    /* Remember the filename as the __file__ attribute */
    if (pathname != NULL) {
-        v = get_sourcefile(pathname);
-        if (v == NULL)
-            PyErr_Clear();
+        v = pathname;
    }
-    else
-        v = NULL;
-    if (v == NULL) {
+    else {
        v = ((PyCodeObject *)co)->co_filename;
-        Py_INCREF(v);
    }
+    Py_INCREF(v);
    if (PyDict_SetItemString(d, "__file__", v) != 0)
        PyErr_Clear(); /* Not important enough to report */
    Py_DECREF(v);
@@ -752,100 +764,6 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
 }


-/* Like rightmost_sep, but operate on unicode objects. */
-static Py_ssize_t
-rightmost_sep_obj(PyObject* o, Py_ssize_t start, Py_ssize_t end)
-{
-    Py_ssize_t found, i;
-    Py_UCS4 c;
-    for (found = -1, i = start; i < end; i++) {
-        c = PyUnicode_READ_CHAR(o, i);
-        if (c == SEP
-#ifdef ALTSEP
-            || c == ALTSEP
-#endif
-            )
-        {
-            found = i;
-        }
-    }
-    return found;
-}
-
-
-/* Given a pathname to a Python byte compiled file, return the path to the
-   source file, if the path matches the PEP 3147 format.  This does not check
-   for any file existence, however, if the pyc file name does not match PEP
-   3147 style, NULL is returned.  buf must be at least as big as pathname;
-   the resulting path will always be shorter.
-
-   (...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py */
-
-static PyObject*
-make_source_pathname(PyObject *path)
-{
-    Py_ssize_t left, right, dot0, dot1, len;
-    Py_ssize_t i, j;
-    PyObject *result;
-    int kind;
-    void *data;
-
-    len = PyUnicode_GET_LENGTH(path);
-    if (len > MAXPATHLEN)
-        return NULL;
-
-    /* Look back two slashes from the end.  In between these two slashes
-       must be the string __pycache__ or this is not a PEP 3147 style
-       path.  It's possible for there to be only one slash.
-    */
-    right = rightmost_sep_obj(path, 0, len);
-    if (right == -1)
-        return NULL;
-    left = rightmost_sep_obj(path, 0, right);
-    if (left == -1)
-        left = 0;
-    else
-        left++;
-    if (right-left !=  sizeof(CACHEDIR)-1)
-        return NULL;
-    for (i = 0; i < sizeof(CACHEDIR)-1; i++)
-        if (PyUnicode_READ_CHAR(path, left+i) != CACHEDIR[i])
-            return NULL;
-
-    /* Now verify that the path component to the right of the last slash
-       has two dots in it.
-    */
-    dot0 = PyUnicode_FindChar(path, '.', right+1, len, 1);
-    if (dot0 < 0)
-        return NULL;
-    dot1 = PyUnicode_FindChar(path, '.', dot0+1, len, 1);
-    if (dot1 < 0)
-        return NULL;
-    /* Too many dots? */
-    if (PyUnicode_FindChar(path, '.', dot1+1, len, 1) != -1)
-        return NULL;
-
-    /* This is a PEP 3147 path.  Start by copying everything from the
-       start of pathname up to and including the leftmost slash.  Then
-       copy the file's basename, removing the magic tag and adding a .py
-       suffix.
-    */
-    result = PyUnicode_New(left + (dot0-right) + 2,
-                           PyUnicode_MAX_CHAR_VALUE(path));
-    if (!result)
-        return NULL;
-    kind = PyUnicode_KIND(result);
-    data = PyUnicode_DATA(result);
-    PyUnicode_CopyCharacters(result, 0, path, 0, (i = left));
-    PyUnicode_CopyCharacters(result, left, path, right+1,
-                             (j = dot0-right));
-    PyUnicode_WRITE(kind, data, i+j,   'p');
-    PyUnicode_WRITE(kind, data, i+j+1, 'y');
-    assert(_PyUnicode_CheckConsistency(result, 1));
-    return result;
-}
-
-
 static void
 update_code_filenames(PyCodeObject *co, PyObject *oldname, PyObject *newname)
 {
@@ -911,61 +829,6 @@ imp_fix_co_filename(PyObject *self, PyObject *args)
 }


-/* Get source file -> unicode or None
- * Returns the path to the py file if available, else the given path
- */
-static PyObject *
-get_sourcefile(PyObject *filename)
-{
-    Py_ssize_t len;
-    PyObject *py;
-    struct stat statbuf;
-    int err;
-    void *data;
-    unsigned int kind;
-
-    len = PyUnicode_GET_LENGTH(filename);
-    if (len == 0)
-        Py_RETURN_NONE;
-
-    /* don't match *.pyc or *.pyo? */
-    data = PyUnicode_DATA(filename);
-    kind = PyUnicode_KIND(filename);
-    if (len < 5
-        || PyUnicode_READ(kind, data, len-4) != '.'
-        || (PyUnicode_READ(kind, data, len-3) != 'p'
-            && PyUnicode_READ(kind, data, len-3) != 'P')
-        || (PyUnicode_READ(kind, data, len-2) != 'y'
-            && PyUnicode_READ(kind, data, len-2) != 'Y'))
-        goto unchanged;
-
-    /* Start by trying to turn PEP 3147 path into source path.  If that
-     * fails, just chop off the trailing character, i.e. legacy pyc path
-     * to py.
-     */
-    py = make_source_pathname(filename);
-    if (py == NULL) {
-        PyErr_Clear();
-        py = PyUnicode_Substring(filename, 0, len - 1);
-    }
-    if (py == NULL)
-        goto error;
-
-    err = _Py_stat(py, &statbuf);
-    if (err == -2)
-        goto error;
-    if (err == 0 && S_ISREG(statbuf.st_mode))
-        return py;
-    Py_DECREF(py);
-    goto unchanged;
-
-error:
-    PyErr_Clear();
-unchanged:
-    Py_INCREF(filename);
-    return filename;
-}
-
 /* Forward */
 static struct _frozen * find_frozen(PyObject *);


--- a/Python/importlib.h
+++ b/Python/importlib.h
--- a/Tools/scripts/highlight.py
+++ b/Tools/scripts/highlight.py
@@ -4,12 +4,16 @@
 __author__ = 'Raymond Hettinger'

 import keyword, tokenize, cgi, re, functools
+try:
+    import builtins
+except ImportError:
+    import __builtin__ as builtins

 #### Analyze Python Source #################################

 def is_builtin(s):
    'Return True if s is the name of a builtin'
-    return hasattr(__builtins__, s)
+    return hasattr(builtins, s)

 def combine_range(lines, start, end):
    'Join content from a range of lines between start and end'
@@ -21,9 +25,7 @@ def combine_range(lines, start, end):

 def analyze_python(source):
    '''Generate and classify chunks of Python for syntax highlighting.
-       Yields tuples in the form: (leadin_text, category, categorized_text).
-       The final tuple has empty strings for the category and categorized text.
-
+       Yields tuples in the form: (category, categorized_text).
    '''
    lines = source.splitlines(True)
    lines.append('')
@@ -37,7 +39,7 @@ def analyze_python(source):
        kind = ''
        if tok_type == tokenize.COMMENT:
            kind = 'comment'
-        elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;':
+        elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@':
            kind = 'operator'
        elif tok_type == tokenize.STRING:
            kind = 'string'
@@ -53,22 +55,20 @@ def analyze_python(source):
            elif is_builtin(tok_str) and prev_tok_str != '.':
                kind = 'builtin'
        if kind:
-            line_upto_token, written = combine_range(lines, written, (srow, scol))
-            line_thru_token, written = combine_range(lines, written, (erow, ecol))
-            yield line_upto_token, kind, line_thru_token
+            text, written = combine_range(lines, written, (srow, scol))
+            yield '', text
+            text, written = combine_range(lines, written, (erow, ecol))
+            yield kind, text
    line_upto_token, written = combine_range(lines, written, (erow, ecol))
-    yield line_upto_token, '', ''
+    yield '', line_upto_token

 #### Raw Output  ###########################################

 def raw_highlight(classified_text):
    'Straight text display of text classifications'
    result = []
-    for line_upto_token, kind, line_thru_token in classified_text:
-        if line_upto_token:
-            result.append('          plain:  %r\n' % line_upto_token)
-        if line_thru_token:
-            result.append('%15s:  %r\n' % (kind, line_thru_token))
+    for kind, text in classified_text:
+        result.append('%15s:  %r\n' % (kind or 'plain', text))
    return ''.join(result)

 #### ANSI Output ###########################################
@@ -88,9 +88,9 @@ def ansi_highlight(classified_text, colors=default_ansi):
    'Add syntax highlighting to source code using ANSI escape sequences'
    # http://en.wikipedia.org/wiki/ANSI_escape_code
    result = []
-    for line_upto_token, kind, line_thru_token in classified_text:
+    for kind, text in classified_text:
        opener, closer = colors.get(kind, ('', ''))
-        result += [line_upto_token, opener, line_thru_token, closer]
+        result += [opener, text, closer]
    return ''.join(result)

 #### HTML Output ###########################################
@@ -98,16 +98,13 @@ def ansi_highlight(classified_text, colors=default_ansi):
 def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'):
    'Convert classified text to an HTML fragment'
    result = [opener]
-    for line_upto_token, kind, line_thru_token in classified_text:
+    for kind, text in classified_text:
+        if kind:
+            result.append('<span class="%s">' % kind)
+        result.append(cgi.escape(text))
        if kind:
-            result += [cgi.escape(line_upto_token),
-                       '<span class="%s">' % kind,
-                       cgi.escape(line_thru_token),
-                       '</span>']
-        else:
-            result += [cgi.escape(line_upto_token),
-                       cgi.escape(line_thru_token)]
-    result += [closer]
+            result.append('</span>')
+    result.append(closer)
    return ''.join(result)

 default_css = {
@@ -188,15 +185,12 @@ def latex_highlight(classified_text, title = 'python',
                    document = default_latex_document):
    'Create a complete LaTeX document with colorized source code'
    result = []
-    for line_upto_token, kind, line_thru_token in classified_text:
+    for kind, text in classified_text:
+        if kind:
+            result.append(r'{\color{%s}' % colors[kind])
+        result.append(latex_escape(text))
        if kind:
-            result += [latex_escape(line_upto_token),
-                       r'{\color{%s}' % colors[kind],
-                       latex_escape(line_thru_token),
-                       '}']
-        else:
-            result += [latex_escape(line_upto_token),
-                       latex_escape(line_thru_token)]
+            result.append('}')
    return default_latex_document % dict(title=title, body=''.join(result))