bpo-45324: Capture data in FrozenImporter.find_spec() to use in exec_module(). (gh-28633)

Before this change we end up duplicating effort and throwing away data in FrozenImporter.find_spec().  Now we do the work once in find_spec() and the only thing we do in FrozenImporter.exec_module() is turn the raw frozen data into a code object and then exec it.

We've added _imp.find_frozen(), add an arg to _imp.get_frozen_object(), and updated FrozenImporter.  We've also moved some code around to reduce duplication, get a little more consistency in outcomes, and be more efficient.

Note that this change is mostly necessary if we want to set __file__ on frozen stdlib modules. (See https://bugs.python.org/issue21736.)

https://bugs.python.org/issue45324
This commit is contained in:
Eric Snow 2021-10-05 10:01:27 -06:00 committed by GitHub
parent b9bb74871b
commit c3d9ac8b34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 331 additions and 120 deletions

View File

@ -826,10 +826,15 @@ def module_repr(m):
@classmethod @classmethod
def find_spec(cls, fullname, path=None, target=None): def find_spec(cls, fullname, path=None, target=None):
if _imp.is_frozen(fullname): info = _call_with_frames_removed(_imp.find_frozen, fullname)
return spec_from_loader(fullname, cls, origin=cls._ORIGIN) if info is None:
else:
return None return None
data, ispkg = info
spec = spec_from_loader(fullname, cls,
origin=cls._ORIGIN,
is_package=ispkg)
spec.loader_state = data
return spec
@classmethod @classmethod
def find_module(cls, fullname, path=None): def find_module(cls, fullname, path=None):
@ -849,11 +854,22 @@ def create_module(spec):
@staticmethod @staticmethod
def exec_module(module): def exec_module(module):
name = module.__spec__.name spec = module.__spec__
if not _imp.is_frozen(name): name = spec.name
raise ImportError('{!r} is not a frozen module'.format(name), try:
name=name) data = spec.loader_state
code = _call_with_frames_removed(_imp.get_frozen_object, name) except AttributeError:
if not _imp.is_frozen(name):
raise ImportError('{!r} is not a frozen module'.format(name),
name=name)
data = None
else:
# We clear the extra data we got from the finder, to save memory.
# Note that if this method is called again (e.g. by
# importlib.reload()) then _imp.get_frozen_object() will notice
# no data was provided and will look it up.
spec.loader_state = None
code = _call_with_frames_removed(_imp.get_frozen_object, name, data)
exec(code, module.__dict__) exec(code, module.__dict__)
@classmethod @classmethod

View File

@ -1,13 +1,15 @@
from .. import abc from .. import abc
import os.path
from .. import util from .. import util
machinery = util.import_importlib('importlib.machinery') machinery = util.import_importlib('importlib.machinery')
import _imp
import marshal
import os.path
import unittest import unittest
import warnings import warnings
from test.support import import_helper from test.support import import_helper, REPO_ROOT
class FindSpecTests(abc.FinderTests): class FindSpecTests(abc.FinderTests):
@ -19,39 +21,67 @@ def find(self, name, **kwargs):
with import_helper.frozen_modules(): with import_helper.frozen_modules():
return finder.find_spec(name, **kwargs) return finder.find_spec(name, **kwargs)
def check(self, spec, name): def check_basic(self, spec, name, ispkg=False):
self.assertEqual(spec.name, name) self.assertEqual(spec.name, name)
self.assertIs(spec.loader, self.machinery.FrozenImporter) self.assertIs(spec.loader, self.machinery.FrozenImporter)
self.assertEqual(spec.origin, 'frozen') self.assertEqual(spec.origin, 'frozen')
self.assertFalse(spec.has_location) self.assertFalse(spec.has_location)
if ispkg:
self.assertIsNotNone(spec.submodule_search_locations)
else:
self.assertIsNone(spec.submodule_search_locations)
self.assertIsNotNone(spec.loader_state)
def check_search_location(self, spec, source=None):
# Frozen packages do not have any path entries.
# (See https://bugs.python.org/issue21736.)
expected = []
self.assertListEqual(spec.submodule_search_locations, expected)
def check_data(self, spec, source=None, ispkg=None):
with import_helper.frozen_modules():
expected = _imp.get_frozen_object(spec.name)
data = spec.loader_state
# We can't compare the marshaled data directly because
# marshal.dumps() would mark "expected" as a ref, which slightly
# changes the output. (See https://bugs.python.org/issue34093.)
code = marshal.loads(data)
self.assertEqual(code, expected)
def test_module(self): def test_module(self):
names = [ modules = {
'__hello__', '__hello__': None,
'__hello_alias__', '__phello__.__init__': None,
'__hello_only__', '__phello__.spam': None,
'__phello__.__init__', '__phello__.ham.__init__': None,
'__phello__.spam', '__phello__.ham.eggs': None,
'__phello__.ham.__init__', '__hello_alias__': '__hello__',
'__phello__.ham.eggs', }
] for name, source in modules.items():
for name in names:
with self.subTest(name): with self.subTest(name):
spec = self.find(name) spec = self.find(name)
self.check(spec, name) self.check_basic(spec, name)
self.assertEqual(spec.submodule_search_locations, None) self.check_data(spec, source)
def test_package(self): def test_package(self):
names = [ modules = {
'__phello__', '__phello__': None,
'__phello__.ham', '__phello__.ham': None,
'__phello_alias__', '__phello_alias__': '__hello__',
] }
for name in names: for name, source in modules.items():
with self.subTest(name): with self.subTest(name):
spec = self.find(name) spec = self.find(name)
self.check(spec, name) self.check_basic(spec, name, ispkg=True)
self.assertEqual(spec.submodule_search_locations, []) self.check_search_location(spec, source)
self.check_data(spec, source, ispkg=True)
def test_frozen_only(self):
name = '__hello_only__'
source = os.path.join(REPO_ROOT, 'Tools', 'freeze', 'flag.py')
spec = self.find(name)
self.check_basic(spec, name)
self.check_data(spec, source)
# These are covered by test_module() and test_package(). # These are covered by test_module() and test_package().
test_module_in_package = None test_module_in_package = None

View File

@ -4,7 +4,9 @@
machinery = util.import_importlib('importlib.machinery') machinery = util.import_importlib('importlib.machinery')
from test.support import captured_stdout, import_helper from test.support import captured_stdout, import_helper
import _imp
import contextlib import contextlib
import marshal
import types import types
import unittest import unittest
import warnings import warnings
@ -33,11 +35,14 @@ class ExecModuleTests(abc.LoaderTests):
def exec_module(self, name): def exec_module(self, name):
with import_helper.frozen_modules(): with import_helper.frozen_modules():
is_package = self.machinery.FrozenImporter.is_package(name) is_package = self.machinery.FrozenImporter.is_package(name)
code = _imp.get_frozen_object(name)
data = marshal.dumps(code)
spec = self.machinery.ModuleSpec( spec = self.machinery.ModuleSpec(
name, name,
self.machinery.FrozenImporter, self.machinery.FrozenImporter,
origin='frozen', origin='frozen',
is_package=is_package, is_package=is_package,
loader_state=data,
) )
module = types.ModuleType(name) module = types.ModuleType(name)
module.__spec__ = spec module.__spec__ = spec
@ -61,6 +66,7 @@ def test_module(self):
self.assertEqual(getattr(module, attr), value) self.assertEqual(getattr(module, attr), value)
self.assertEqual(output, 'Hello world!\n') self.assertEqual(output, 'Hello world!\n')
self.assertTrue(hasattr(module, '__spec__')) self.assertTrue(hasattr(module, '__spec__'))
self.assertIsNone(module.__spec__.loader_state)
def test_package(self): def test_package(self):
name = '__phello__' name = '__phello__'
@ -73,6 +79,7 @@ def test_package(self):
name=name, attr=attr, given=attr_value, name=name, attr=attr, given=attr_value,
expected=value)) expected=value))
self.assertEqual(output, 'Hello world!\n') self.assertEqual(output, 'Hello world!\n')
self.assertIsNone(module.__spec__.loader_state)
def test_lacking_parent(self): def test_lacking_parent(self):
name = '__phello__.spam' name = '__phello__.spam'

View File

@ -0,0 +1,3 @@
In FrozenImporter.find_spec(), we now preserve the information needed in
exec_module() to load the module. This change mostly impacts internal
details, rather than changing the importer's behavior.

View File

@ -169,33 +169,80 @@ exit:
return return_value; return return_value;
} }
PyDoc_STRVAR(_imp_get_frozen_object__doc__, PyDoc_STRVAR(_imp_find_frozen__doc__,
"get_frozen_object($module, name, /)\n" "find_frozen($module, name, /)\n"
"--\n" "--\n"
"\n" "\n"
"Create a code object for a frozen module."); "Return info about the corresponding frozen module (if there is one) or None.\n"
"\n"
"The returned info (a 2-tuple):\n"
"\n"
" * data the raw marshalled bytes\n"
" * is_package whether or not it is a package");
#define _IMP_GET_FROZEN_OBJECT_METHODDEF \ #define _IMP_FIND_FROZEN_METHODDEF \
{"get_frozen_object", (PyCFunction)_imp_get_frozen_object, METH_O, _imp_get_frozen_object__doc__}, {"find_frozen", (PyCFunction)_imp_find_frozen, METH_O, _imp_find_frozen__doc__},
static PyObject * static PyObject *
_imp_get_frozen_object_impl(PyObject *module, PyObject *name); _imp_find_frozen_impl(PyObject *module, PyObject *name);
static PyObject * static PyObject *
_imp_get_frozen_object(PyObject *module, PyObject *arg) _imp_find_frozen(PyObject *module, PyObject *arg)
{ {
PyObject *return_value = NULL; PyObject *return_value = NULL;
PyObject *name; PyObject *name;
if (!PyUnicode_Check(arg)) { if (!PyUnicode_Check(arg)) {
_PyArg_BadArgument("get_frozen_object", "argument", "str", arg); _PyArg_BadArgument("find_frozen", "argument", "str", arg);
goto exit; goto exit;
} }
if (PyUnicode_READY(arg) == -1) { if (PyUnicode_READY(arg) == -1) {
goto exit; goto exit;
} }
name = arg; name = arg;
return_value = _imp_get_frozen_object_impl(module, name); return_value = _imp_find_frozen_impl(module, name);
exit:
return return_value;
}
PyDoc_STRVAR(_imp_get_frozen_object__doc__,
"get_frozen_object($module, name, data=None, /)\n"
"--\n"
"\n"
"Create a code object for a frozen module.");
#define _IMP_GET_FROZEN_OBJECT_METHODDEF \
{"get_frozen_object", (PyCFunction)(void(*)(void))_imp_get_frozen_object, METH_FASTCALL, _imp_get_frozen_object__doc__},
static PyObject *
_imp_get_frozen_object_impl(PyObject *module, PyObject *name,
PyObject *dataobj);
static PyObject *
_imp_get_frozen_object(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
PyObject *name;
PyObject *dataobj = Py_None;
if (!_PyArg_CheckPositional("get_frozen_object", nargs, 1, 2)) {
goto exit;
}
if (!PyUnicode_Check(args[0])) {
_PyArg_BadArgument("get_frozen_object", "argument 1", "str", args[0]);
goto exit;
}
if (PyUnicode_READY(args[0]) == -1) {
goto exit;
}
name = args[0];
if (nargs < 2) {
goto skip_optional;
}
dataobj = args[1];
skip_optional:
return_value = _imp_get_frozen_object_impl(module, name, dataobj);
exit: exit:
return return_value; return return_value;
@ -498,4 +545,4 @@ exit:
#ifndef _IMP_EXEC_DYNAMIC_METHODDEF #ifndef _IMP_EXEC_DYNAMIC_METHODDEF
#define _IMP_EXEC_DYNAMIC_METHODDEF #define _IMP_EXEC_DYNAMIC_METHODDEF
#endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */ #endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */
/*[clinic end generated code: output=96038c277119d6e3 input=a9049054013a1b77]*/ /*[clinic end generated code: output=a31e1c00653359ff input=a9049054013a1b77]*/

View File

@ -887,10 +887,6 @@ _imp__fix_co_filename_impl(PyObject *module, PyCodeObject *code,
} }
/* Forward */
static const struct _frozen * find_frozen(PyObject *);
/* Helper to test for built-in module */ /* Helper to test for built-in module */
static int static int
@ -1119,75 +1115,125 @@ list_frozen_module_names()
return names; return names;
} }
static const struct _frozen * typedef enum {
find_frozen(PyObject *modname) FROZEN_OKAY,
FROZEN_BAD_NAME, // The given module name wasn't valid.
FROZEN_NOT_FOUND, // It wasn't in PyImport_FrozenModules.
FROZEN_DISABLED, // -X frozen_modules=off (and not essential)
FROZEN_EXCLUDED, // The PyImport_FrozenModules entry has NULL "code".
FROZEN_INVALID, // The PyImport_FrozenModules entry is bogus.
} frozen_status;
static inline void
set_frozen_error(frozen_status status, PyObject *modname)
{ {
if (modname == NULL) { const char *err = NULL;
return NULL; switch (status) {
case FROZEN_BAD_NAME:
case FROZEN_NOT_FOUND:
case FROZEN_DISABLED:
err = "No such frozen object named %R";
break;
case FROZEN_EXCLUDED:
err = "Excluded frozen object named %R";
break;
case FROZEN_INVALID:
err = "Frozen object named %R is invalid";
break;
case FROZEN_OKAY:
// There was no error.
break;
default:
Py_UNREACHABLE();
} }
const char *name = PyUnicode_AsUTF8(modname); if (err != NULL) {
PyObject *msg = PyUnicode_FromFormat(err, modname);
if (msg == NULL) {
PyErr_Clear();
}
PyErr_SetImportError(msg, modname, NULL);
Py_XDECREF(msg);
}
}
struct frozen_info {
PyObject *nameobj;
const char *data;
Py_ssize_t size;
bool is_package;
};
static frozen_status
find_frozen(PyObject *nameobj, struct frozen_info *info)
{
if (info != NULL) {
info->nameobj = NULL;
info->data = NULL;
info->size = 0;
info->is_package = false;
}
if (nameobj == NULL || nameobj == Py_None) {
return FROZEN_BAD_NAME;
}
const char *name = PyUnicode_AsUTF8(nameobj);
if (name == NULL) { if (name == NULL) {
// Note that this function previously used
// _PyUnicode_EqualToASCIIString(). We clear the error here
// (instead of propagating it) to match the earlier behavior
// more closely.
PyErr_Clear(); PyErr_Clear();
return NULL; return FROZEN_BAD_NAME;
} }
if (!use_frozen() && !is_essential_frozen_module(name)) { if (!use_frozen() && !is_essential_frozen_module(name)) {
return NULL; return FROZEN_DISABLED;
} }
const struct _frozen *p; const struct _frozen *p;
for (p = PyImport_FrozenModules; ; p++) { for (p = PyImport_FrozenModules; ; p++) {
if (p->name == NULL) { if (p->name == NULL) {
return NULL; // We hit the end-of-list sentinel value.
return FROZEN_NOT_FOUND;
} }
if (strcmp(name, p->name) == 0) { if (strcmp(name, p->name) == 0) {
break; break;
} }
} }
return p; if (info != NULL) {
} info->nameobj = nameobj; // borrowed
info->data = (const char *)p->code;
static PyObject * info->size = p->size < 0 ? -(p->size) : p->size;
get_frozen_object(PyObject *name) info->is_package = p->size < 0 ? true : false;
{
const struct _frozen *p = find_frozen(name);
int size;
if (p == NULL) {
PyErr_Format(PyExc_ImportError,
"No such frozen object named %R",
name);
return NULL;
} }
if (p->code == NULL) { if (p->code == NULL) {
PyErr_Format(PyExc_ImportError, /* It is frozen but marked as un-importable. */
"Excluded frozen object named %R", return FROZEN_EXCLUDED;
name);
return NULL;
} }
size = p->size; if (p->code[0] == '\0' || p->size == 0) {
if (size < 0) return FROZEN_INVALID;
size = -size; }
return PyMarshal_ReadObjectFromString((const char *)p->code, size); return FROZEN_OKAY;
} }
static PyObject * static PyObject *
is_frozen_package(PyObject *name) unmarshal_frozen_code(struct frozen_info *info)
{ {
const struct _frozen *p = find_frozen(name); PyObject *co = PyMarshal_ReadObjectFromString(info->data, info->size);
int size; if (co == NULL) {
set_frozen_error(FROZEN_INVALID, info->nameobj);
if (p == NULL) {
PyErr_Format(PyExc_ImportError,
"No such frozen object named %R",
name);
return NULL; return NULL;
} }
if (!PyCode_Check(co)) {
size = p->size; // We stick with TypeError for backward compatibility.
PyErr_Format(PyExc_TypeError,
if (size < 0) "frozen object %R is not a code object",
Py_RETURN_TRUE; info->nameobj);
else Py_DECREF(co);
Py_RETURN_FALSE; return NULL;
}
return co;
} }
@ -1200,35 +1246,25 @@ int
PyImport_ImportFrozenModuleObject(PyObject *name) PyImport_ImportFrozenModuleObject(PyObject *name)
{ {
PyThreadState *tstate = _PyThreadState_GET(); PyThreadState *tstate = _PyThreadState_GET();
const struct _frozen *p;
PyObject *co, *m, *d; PyObject *co, *m, *d;
int ispackage;
int size;
p = find_frozen(name); struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (p == NULL) if (status == FROZEN_NOT_FOUND || status == FROZEN_DISABLED) {
return 0; return 0;
if (p->code == NULL) { }
_PyErr_Format(tstate, PyExc_ImportError, else if (status == FROZEN_BAD_NAME) {
"Excluded frozen object named %R", return 0;
name); }
else if (status != FROZEN_OKAY) {
set_frozen_error(status, name);
return -1; return -1;
} }
size = p->size; co = unmarshal_frozen_code(&info);
ispackage = (size < 0); if (co == NULL) {
if (ispackage)
size = -size;
co = PyMarshal_ReadObjectFromString((const char *)p->code, size);
if (co == NULL)
return -1; return -1;
if (!PyCode_Check(co)) {
_PyErr_Format(tstate, PyExc_TypeError,
"frozen object %R is not a code object",
name);
goto err_return;
} }
if (ispackage) { if (info.is_package) {
/* Set __path__ to the empty list */ /* Set __path__ to the empty list */
PyObject *l; PyObject *l;
int err; int err;
@ -1966,20 +2002,83 @@ _imp_init_frozen_impl(PyObject *module, PyObject *name)
return import_add_module(tstate, name); return import_add_module(tstate, name);
} }
/*[clinic input]
_imp.find_frozen
name: unicode
/
Return info about the corresponding frozen module (if there is one) or None.
The returned info (a 2-tuple):
* data the raw marshalled bytes
* is_package whether or not it is a package
[clinic start generated code]*/
static PyObject *
_imp_find_frozen_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=3fd17da90d417e4e input=4e52b3ac95f6d7ab]*/
{
struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (status == FROZEN_NOT_FOUND || status == FROZEN_DISABLED) {
Py_RETURN_NONE;
}
else if (status == FROZEN_BAD_NAME) {
Py_RETURN_NONE;
}
else if (status != FROZEN_OKAY) {
set_frozen_error(status, name);
return NULL;
}
PyObject *data = PyBytes_FromStringAndSize(info.data, info.size);
if (data == NULL) {
return NULL;
}
PyObject *result = PyTuple_Pack(2, data,
info.is_package ? Py_True : Py_False);
Py_DECREF(data);
return result;
}
/*[clinic input] /*[clinic input]
_imp.get_frozen_object _imp.get_frozen_object
name: unicode name: unicode
data as dataobj: object = None
/ /
Create a code object for a frozen module. Create a code object for a frozen module.
[clinic start generated code]*/ [clinic start generated code]*/
static PyObject * static PyObject *
_imp_get_frozen_object_impl(PyObject *module, PyObject *name) _imp_get_frozen_object_impl(PyObject *module, PyObject *name,
/*[clinic end generated code: output=2568cc5b7aa0da63 input=ed689bc05358fdbd]*/ PyObject *dataobj)
/*[clinic end generated code: output=54368a673a35e745 input=034bdb88f6460b7b]*/
{ {
return get_frozen_object(name); struct frozen_info info;
if (PyBytes_Check(dataobj)) {
info.nameobj = name;
info.data = PyBytes_AS_STRING(dataobj);
info.size = PyBytes_Size(dataobj);
if (info.size == 0) {
set_frozen_error(FROZEN_INVALID, name);
return NULL;
}
}
else if (dataobj != Py_None) {
_PyArg_BadArgument("get_frozen_object", "argument 2", "bytes", dataobj);
return NULL;
}
else {
frozen_status status = find_frozen(name, &info);
if (status != FROZEN_OKAY) {
set_frozen_error(status, name);
return NULL;
}
}
return unmarshal_frozen_code(&info);
} }
/*[clinic input] /*[clinic input]
@ -1995,7 +2094,13 @@ static PyObject *
_imp_is_frozen_package_impl(PyObject *module, PyObject *name) _imp_is_frozen_package_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=e70cbdb45784a1c9 input=81b6cdecd080fbb8]*/ /*[clinic end generated code: output=e70cbdb45784a1c9 input=81b6cdecd080fbb8]*/
{ {
return is_frozen_package(name); struct frozen_info info;
frozen_status status = find_frozen(name, &info);
if (status != FROZEN_OKAY && status != FROZEN_EXCLUDED) {
set_frozen_error(status, name);
return NULL;
}
return PyBool_FromLong(info.is_package);
} }
/*[clinic input] /*[clinic input]
@ -2027,10 +2132,12 @@ static PyObject *
_imp_is_frozen_impl(PyObject *module, PyObject *name) _imp_is_frozen_impl(PyObject *module, PyObject *name)
/*[clinic end generated code: output=01f408f5ec0f2577 input=7301dbca1897d66b]*/ /*[clinic end generated code: output=01f408f5ec0f2577 input=7301dbca1897d66b]*/
{ {
const struct _frozen *p; struct frozen_info info;
frozen_status status = find_frozen(name, &info);
p = find_frozen(name); if (status != FROZEN_OKAY) {
return PyBool_FromLong((long) (p == NULL ? 0 : p->size)); Py_RETURN_FALSE;
}
Py_RETURN_TRUE;
} }
/*[clinic input] /*[clinic input]
@ -2221,6 +2328,7 @@ static PyMethodDef imp_methods[] = {
_IMP_LOCK_HELD_METHODDEF _IMP_LOCK_HELD_METHODDEF
_IMP_ACQUIRE_LOCK_METHODDEF _IMP_ACQUIRE_LOCK_METHODDEF
_IMP_RELEASE_LOCK_METHODDEF _IMP_RELEASE_LOCK_METHODDEF
_IMP_FIND_FROZEN_METHODDEF
_IMP_GET_FROZEN_OBJECT_METHODDEF _IMP_GET_FROZEN_OBJECT_METHODDEF
_IMP_IS_FROZEN_PACKAGE_METHODDEF _IMP_IS_FROZEN_PACKAGE_METHODDEF
_IMP_CREATE_BUILTIN_METHODDEF _IMP_CREATE_BUILTIN_METHODDEF