Commit c9abda0c authored by Victor Stinner's avatar Victor Stinner

Issue #3080: get_sourcefile(), make_source_pathname(), load_package()

Use Unicode for module name and path in get_sourcefile(),
make_source_pathname() and load_package() functions.
parent d68c2cf2
...@@ -113,6 +113,8 @@ typedef unsigned short mode_t; ...@@ -113,6 +113,8 @@ typedef unsigned short mode_t;
#define MAGIC (3180 | ((long)'\r'<<16) | ((long)'\n'<<24)) #define MAGIC (3180 | ((long)'\r'<<16) | ((long)'\n'<<24))
#define TAG "cpython-32" #define TAG "cpython-32"
#define CACHEDIR "__pycache__" #define CACHEDIR "__pycache__"
static const Py_UNICODE CACHEDIR_UNICODE[] = {
'_', '_', 'p', 'y', 'c', 'a', 'c', 'h', 'e', '_', '_', '\0'};
/* Current magic word and string tag as globals. */ /* Current magic word and string tag as globals. */
static long pyc_magic = MAGIC; static long pyc_magic = MAGIC;
static const char *pyc_tag = TAG; static const char *pyc_tag = TAG;
...@@ -741,8 +743,8 @@ remove_module(PyObject *name) ...@@ -741,8 +743,8 @@ remove_module(PyObject *name)
"sys.modules failed"); "sys.modules failed");
} }
static PyObject * get_sourcefile(char *file); static PyObject * get_sourcefile(PyObject *filename);
static char *make_source_pathname(char *pathname, char *buf); static PyObject *make_source_pathname(PyObject *pathname);
static char *make_compiled_pathname(char *pathname, char *buf, size_t buflen, static char *make_compiled_pathname(char *pathname, char *buf, size_t buflen,
int debug); int debug);
...@@ -807,7 +809,6 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname, ...@@ -807,7 +809,6 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
{ {
PyObject *modules = PyImport_GetModuleDict(); PyObject *modules = PyImport_GetModuleDict();
PyObject *m, *d, *v; PyObject *m, *d, *v;
PyObject *pathbytes;
m = PyImport_AddModuleObject(name); m = PyImport_AddModuleObject(name);
if (m == NULL) if (m == NULL)
...@@ -822,12 +823,7 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname, ...@@ -822,12 +823,7 @@ PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname,
} }
/* Remember the filename as the __file__ attribute */ /* Remember the filename as the __file__ attribute */
if (pathname != NULL) { if (pathname != NULL) {
pathbytes = PyUnicode_EncodeFSDefault(pathname); v = get_sourcefile(pathname);
if (pathbytes != NULL) {
v = get_sourcefile(PyBytes_AS_STRING(pathbytes));
Py_DECREF(pathbytes);
} else
v = NULL;
if (v == NULL) if (v == NULL)
PyErr_Clear(); PyErr_Clear();
} }
...@@ -892,6 +888,27 @@ rightmost_sep(char *s) ...@@ -892,6 +888,27 @@ rightmost_sep(char *s)
} }
/* Like strrchr(string, '/') but searches for the rightmost of either SEP
or ALTSEP, if the latter is defined.
*/
static Py_UNICODE*
rightmost_sep_unicode(Py_UNICODE *s)
{
Py_UNICODE *found, c;
for (found = NULL; (c = *s); s++) {
if (c == SEP
#ifdef ALTSEP
|| c == ALTSEP
#endif
)
{
found = s;
}
}
return found;
}
/* Given a pathname for a Python source file, fill a buffer with the /* Given a pathname for a Python source file, fill a buffer with the
pathname for the corresponding compiled file. Return the pathname pathname for the corresponding compiled file. Return the pathname
for the compiled file, or NULL if there's no space in the buffer. for the compiled file, or NULL if there's no space in the buffer.
...@@ -1005,42 +1022,50 @@ make_compiled_pathname(char *pathname, char *buf, size_t buflen, int debug) ...@@ -1005,42 +1022,50 @@ make_compiled_pathname(char *pathname, char *buf, size_t buflen, int debug)
source file, if the path matches the PEP 3147 format. This does not check source file, if the path matches the PEP 3147 format. This does not check
for any file existence, however, if the pyc file name does not match PEP for any file existence, however, if the pyc file name does not match PEP
3147 style, NULL is returned. buf must be at least as big as pathname; 3147 style, NULL is returned. buf must be at least as big as pathname;
the resulting path will always be shorter. */ the resulting path will always be shorter.
static char * (...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py */
make_source_pathname(char *pathname, char *buf)
static PyObject*
make_source_pathname(PyObject *pathobj)
{ {
/* __pycache__/foo.<tag>.pyc -> foo.py */ Py_UNICODE buf[MAXPATHLEN];
Py_UNICODE *pathname;
Py_UNICODE *left, *right, *dot0, *dot1, sep;
size_t i, j; size_t i, j;
char *left, *right, *dot0, *dot1, sep;
if (PyUnicode_GET_SIZE(pathobj) > MAXPATHLEN)
return NULL;
pathname = PyUnicode_AS_UNICODE(pathobj);
/* Look back two slashes from the end. In between these two slashes /* Look back two slashes from the end. In between these two slashes
must be the string __pycache__ or this is not a PEP 3147 style must be the string __pycache__ or this is not a PEP 3147 style
path. It's possible for there to be only one slash. path. It's possible for there to be only one slash.
*/ */
if ((right = rightmost_sep(pathname)) == NULL) right = rightmost_sep_unicode(pathname);
if (right == NULL)
return NULL; return NULL;
sep = *right; sep = *right;
*right = '\0'; *right = '\0';
left = rightmost_sep(pathname); left = rightmost_sep_unicode(pathname);
*right = sep; *right = sep;
if (left == NULL) if (left == NULL)
left = pathname; left = pathname;
else else
left++; left++;
if (right-left != strlen(CACHEDIR) || if (right-left != Py_UNICODE_strlen(CACHEDIR_UNICODE) ||
strncmp(left, CACHEDIR, right-left) != 0) Py_UNICODE_strncmp(left, CACHEDIR_UNICODE, right-left) != 0)
return NULL; return NULL;
/* Now verify that the path component to the right of the last slash /* Now verify that the path component to the right of the last slash
has two dots in it. has two dots in it.
*/ */
if ((dot0 = strchr(right + 1, '.')) == NULL) if ((dot0 = Py_UNICODE_strchr(right + 1, '.')) == NULL)
return NULL; return NULL;
if ((dot1 = strchr(dot0 + 1, '.')) == NULL) if ((dot1 = Py_UNICODE_strchr(dot0 + 1, '.')) == NULL)
return NULL; return NULL;
/* Too many dots? */ /* Too many dots? */
if (strchr(dot1 + 1, '.') != NULL) if (Py_UNICODE_strchr(dot1 + 1, '.') != NULL)
return NULL; return NULL;
/* This is a PEP 3147 path. Start by copying everything from the /* This is a PEP 3147 path. Start by copying everything from the
...@@ -1048,10 +1073,11 @@ make_source_pathname(char *pathname, char *buf) ...@@ -1048,10 +1073,11 @@ make_source_pathname(char *pathname, char *buf)
copy the file's basename, removing the magic tag and adding a .py copy the file's basename, removing the magic tag and adding a .py
suffix. suffix.
*/ */
strncpy(buf, pathname, (i=left-pathname)); Py_UNICODE_strncpy(buf, pathname, (i=left-pathname));
strncpy(buf+i, right+1, (j=dot0-right)); Py_UNICODE_strncpy(buf+i, right+1, (j=dot0-right));
strcpy(buf+i+j, "py"); buf[i+j] = 'p';
return buf; buf[i+j+1] = 'y';
return PyUnicode_FromUnicode(buf, i+j+2);
} }
/* Given a pathname for a Python source file, its time of last /* Given a pathname for a Python source file, its time of last
...@@ -1390,40 +1416,47 @@ load_source_module(char *name, char *pathname, FILE *fp) ...@@ -1390,40 +1416,47 @@ load_source_module(char *name, char *pathname, FILE *fp)
* Returns the path to the py file if available, else the given path * Returns the path to the py file if available, else the given path
*/ */
static PyObject * static PyObject *
get_sourcefile(char *file) get_sourcefile(PyObject *filename)
{ {
char py[MAXPATHLEN + 1];
Py_ssize_t len; Py_ssize_t len;
PyObject *u; Py_UNICODE *fileuni;
PyObject *py;
struct stat statbuf; struct stat statbuf;
if (!file || !*file) { len = PyUnicode_GET_SIZE(filename);
if (len == 0)
Py_RETURN_NONE; Py_RETURN_NONE;
}
len = strlen(file); /* don't match *.pyc or *.pyo? */
/* match '*.py?' */ fileuni = PyUnicode_AS_UNICODE(filename);
if (len > MAXPATHLEN || PyOS_strnicmp(&file[len-4], ".py", 3) != 0) { if (len < 5
return PyUnicode_DecodeFSDefault(file); || fileuni[len-4] != '.'
} || (fileuni[len-3] != 'p' && fileuni[len-3] != 'P')
|| (fileuni[len-2] != 'y' && fileuni[len-2] != 'Y'))
goto unchanged;
/* Start by trying to turn PEP 3147 path into source path. If that /* Start by trying to turn PEP 3147 path into source path. If that
* fails, just chop off the trailing character, i.e. legacy pyc path * fails, just chop off the trailing character, i.e. legacy pyc path
* to py. * to py.
*/ */
if (make_source_pathname(file, py) == NULL) { py = make_source_pathname(filename);
strncpy(py, file, len-1); if (py == NULL) {
py[len-1] = '\0'; PyErr_Clear();
py = PyUnicode_FromUnicode(fileuni, len - 1);
} }
if (py == NULL)
goto error;
if (stat(py, &statbuf) == 0 && if (_Py_stat(py, &statbuf) == 0 && S_ISREG(statbuf.st_mode))
S_ISREG(statbuf.st_mode)) { return py;
u = PyUnicode_DecodeFSDefault(py); Py_DECREF(py);
} goto unchanged;
else {
u = PyUnicode_DecodeFSDefault(file); error:
} PyErr_Clear();
return u; unchanged:
Py_INCREF(filename);
return filename;
} }
/* Forward */ /* Forward */
...@@ -1436,54 +1469,56 @@ static struct _frozen * find_frozen(PyObject *); ...@@ -1436,54 +1469,56 @@ static struct _frozen * find_frozen(PyObject *);
REFERENCE COUNT */ REFERENCE COUNT */
static PyObject * static PyObject *
load_package(char *name, char *pathname) load_package(PyObject *name, PyObject *pathname)
{ {
PyObject *m, *d; PyObject *m, *d;
PyObject *file = NULL; PyObject *file = NULL, *path_list = NULL;
PyObject *path = NULL;
int err; int err;
char buf[MAXPATHLEN+1]; char buf[MAXPATHLEN+1];
FILE *fp; FILE *fp = NULL;
struct filedescr *fdp; struct filedescr *fdp;
char *namestr;
m = PyImport_AddModule(name); m = PyImport_AddModuleObject(name);
if (m == NULL) if (m == NULL)
return NULL; return NULL;
if (Py_VerboseFlag) if (Py_VerboseFlag)
PySys_WriteStderr("import %s # directory %s\n", PySys_FormatStderr("import %U # directory %U\n",
name, pathname); name, pathname);
d = PyModule_GetDict(m);
file = get_sourcefile(pathname); file = get_sourcefile(pathname);
if (file == NULL) if (file == NULL)
goto error; return NULL;
path = Py_BuildValue("[O]", file); path_list = Py_BuildValue("[O]", file);
if (path == NULL) if (path_list == NULL) {
goto error; Py_DECREF(file);
return NULL;
}
d = PyModule_GetDict(m);
err = PyDict_SetItemString(d, "__file__", file); err = PyDict_SetItemString(d, "__file__", file);
Py_DECREF(file);
if (err == 0) if (err == 0)
err = PyDict_SetItemString(d, "__path__", path); err = PyDict_SetItemString(d, "__path__", path_list);
if (err != 0) if (err != 0) {
Py_DECREF(path_list);
return NULL;
}
namestr = _PyUnicode_AsString(name);
if (namestr == NULL)
goto error; goto error;
fdp = find_module(name, "__init__", path, buf, sizeof(buf), &fp, NULL); fdp = find_module(namestr, "__init__", path_list, buf, sizeof(buf), &fp, NULL);
Py_DECREF(path_list);
if (fdp == NULL) { if (fdp == NULL) {
if (PyErr_ExceptionMatches(PyExc_ImportError)) { if (PyErr_ExceptionMatches(PyExc_ImportError)) {
PyErr_Clear(); PyErr_Clear();
Py_INCREF(m); Py_INCREF(m);
return m;
} }
else else
m = NULL; return NULL;
goto cleanup;
} }
m = load_module(name, fp, buf, fdp->type, NULL); m = load_module(namestr, fp, buf, fdp->type, NULL);
if (fp != NULL) if (fp != NULL)
fclose(fp); fclose(fp);
goto cleanup;
error:
m = NULL;
cleanup:
Py_XDECREF(path);
Py_XDECREF(file);
return m; return m;
} }
...@@ -2282,9 +2317,21 @@ load_module(char *name, FILE *fp, char *pathname, int type, PyObject *loader) ...@@ -2282,9 +2317,21 @@ load_module(char *name, FILE *fp, char *pathname, int type, PyObject *loader)
} }
#endif #endif
case PKG_DIRECTORY: case PKG_DIRECTORY: {
m = load_package(name, pathname); PyObject *nameobj, *pathobj;
nameobj = PyUnicode_FromString(name);
if (nameobj == NULL)
return NULL;
pathobj = PyUnicode_DecodeFSDefault(pathname);
if (pathobj == NULL) {
Py_DECREF(nameobj);
return NULL;
}
m = load_package(nameobj, pathobj);
Py_DECREF(nameobj);
Py_DECREF(pathobj);
break; break;
}
case C_BUILTIN: case C_BUILTIN:
case PY_FROZEN: { case PY_FROZEN: {
...@@ -3637,13 +3684,12 @@ imp_load_module(PyObject *self, PyObject *args) ...@@ -3637,13 +3684,12 @@ imp_load_module(PyObject *self, PyObject *args)
static PyObject * static PyObject *
imp_load_package(PyObject *self, PyObject *args) imp_load_package(PyObject *self, PyObject *args)
{ {
char *name; PyObject *name, *pathname;
PyObject *pathname;
PyObject * ret; PyObject * ret;
if (!PyArg_ParseTuple(args, "sO&:load_package", if (!PyArg_ParseTuple(args, "UO&:load_package",
&name, PyUnicode_FSConverter, &pathname)) &name, PyUnicode_FSDecoder, &pathname))
return NULL; return NULL;
ret = load_package(name, PyBytes_AS_STRING(pathname)); ret = load_package(name, pathname);
Py_DECREF(pathname); Py_DECREF(pathname);
return ret; return ret;
} }
...@@ -3716,25 +3762,22 @@ static PyObject * ...@@ -3716,25 +3762,22 @@ static PyObject *
imp_source_from_cache(PyObject *self, PyObject *args, PyObject *kws) imp_source_from_cache(PyObject *self, PyObject *args, PyObject *kws)
{ {
static char *kwlist[] = {"path", NULL}; static char *kwlist[] = {"path", NULL};
PyObject *pathname, *source;
PyObject *pathname_obj;
char *pathname;
char buf[MAXPATHLEN+1];
if (!PyArg_ParseTupleAndKeywords( if (!PyArg_ParseTupleAndKeywords(
args, kws, "O&", kwlist, args, kws, "O&", kwlist,
PyUnicode_FSConverter, &pathname_obj)) PyUnicode_FSDecoder, &pathname))
return NULL; return NULL;
pathname = PyBytes_AS_STRING(pathname_obj); source = make_source_pathname(pathname);
if (make_source_pathname(pathname, buf) == NULL) { if (source == NULL) {
PyErr_Format(PyExc_ValueError, "Not a PEP 3147 pyc path: %s", PyErr_Format(PyExc_ValueError, "Not a PEP 3147 pyc path: %R",
pathname); pathname);
Py_DECREF(pathname_obj); Py_DECREF(pathname);
return NULL; return NULL;
} }
Py_DECREF(pathname_obj); Py_DECREF(pathname);
return PyUnicode_FromString(buf); return source;
} }
PyDoc_STRVAR(doc_source_from_cache, PyDoc_STRVAR(doc_source_from_cache,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment