Python3源码分析
本文环境python3.5.2。
参考书籍<<Python源码剖析>>
python官网
Python3模块初始化与加载
Python的模块分为内建的模块,函数与用户定义的模块,首先分析Python内建模块。
Python3的系统内建模块初始化
上文介绍了Python的线程对象和解释器对象,在初始化的时候,会执行_Py_InitializeEx_Private函数,会先创建解释器对象和线程对象,然后再设置系统默认的moudle,
void
_Py_InitializeEx_Private(int install_sigs, int install_importlib)
{
...
interp->modules = PyDict_New(); // 设置解释器的modules为字典类型
...
bimod = _PyBuiltin_Init(); // 初始化内建函数
...
_PyImport_FixupBuiltin(bimod, "builtins"); //
interp->builtins = PyModule_GetDict(bimod);
...
/* initialize builtin exceptions */
_PyExc_Init(bimod); // 添加内建报错类型
sysmod = _PySys_Init(); // 初始化sys模块
...
interp->sysdict = PyModule_GetDict(sysmod); // 设置解释器的sysdict值
...
_PyImport_FixupBuiltin(sysmod, "sys"); // 绑定到sys名称下
PySys_SetPath(Py_GetPath()); // 设置module搜索路径
PyDict_SetItemString(interp->sysdict, "modules",
interp->modules); // 设置modules
...
}
此时可以看出,首先会初始化interp->modules为一个字典,然后调用_PyBuiltin_Init初始化内建函数,
PyObject *
_PyBuiltin_Init(void)
{
PyObject *mod, *dict, *debug;
if (PyType_Ready(&PyFilter_Type) < 0 ||
PyType_Ready(&PyMap_Type) < 0 ||
PyType_Ready(&PyZip_Type) < 0) // 检查类型是否初始化成功
return NULL;
mod = PyModule_Create(&builtinsmodule); // 创建module
if (mod == NULL)
return NULL;
dict = PyModule_GetDict(mod); // 创建module对应的属性字典
#ifdef Py_TRACE_REFS
/* "builtins" exposes a number of statically allocated objects
* that, before this code was added in 2.3, never showed up in
* the list of "all objects" maintained by Py_TRACE_REFS. As a
* result, programs leaking references to None and False (etc)
* couldn't be diagnosed by examining sys.getobjects(0).
*/
#define ADD_TO_ALL(OBJECT) _Py_AddToAllObjects((PyObject *)(OBJECT), 0)
#else
#define ADD_TO_ALL(OBJECT) (void)0
#endif
#define SETBUILTIN(NAME, OBJECT) \
if (PyDict_SetItemString(dict, NAME, (PyObject *)OBJECT) < 0) \
return NULL; \
ADD_TO_ALL(OBJECT)
SETBUILTIN("None", Py_None); // 添加到module中
SETBUILTIN("Ellipsis", Py_Ellipsis);
SETBUILTIN("NotImplemented", Py_NotImplemented);
SETBUILTIN("False", Py_False);
SETBUILTIN("True", Py_True);
SETBUILTIN("bool", &PyBool_Type);
SETBUILTIN("memoryview", &PyMemoryView_Type);
SETBUILTIN("bytearray", &PyByteArray_Type);
SETBUILTIN("bytes", &PyBytes_Type);
SETBUILTIN("classmethod", &PyClassMethod_Type);
SETBUILTIN("complex", &PyComplex_Type);
SETBUILTIN("dict", &PyDict_Type);
SETBUILTIN("enumerate", &PyEnum_Type);
SETBUILTIN("filter", &PyFilter_Type);
SETBUILTIN("float", &PyFloat_Type);
SETBUILTIN("frozenset", &PyFrozenSet_Type);
SETBUILTIN("property", &PyProperty_Type);
SETBUILTIN("int", &PyLong_Type);
SETBUILTIN("list", &PyList_Type);
SETBUILTIN("map", &PyMap_Type);
SETBUILTIN("object", &PyBaseObject_Type);
SETBUILTIN("range", &PyRange_Type);
SETBUILTIN("reversed", &PyReversed_Type);
SETBUILTIN("set", &PySet_Type);
SETBUILTIN("slice", &PySlice_Type);
SETBUILTIN("staticmethod", &PyStaticMethod_Type);
SETBUILTIN("str", &PyUnicode_Type);
SETBUILTIN("super", &PySuper_Type);
SETBUILTIN("tuple", &PyTuple_Type);
SETBUILTIN("type", &PyType_Type);
SETBUILTIN("zip", &PyZip_Type);
debug = PyBool_FromLong(Py_OptimizeFlag == 0);
if (PyDict_SetItemString(dict, "__debug__", debug) < 0) {
Py_XDECREF(debug);
return NULL;
}
Py_XDECREF(debug);
return mod;
#undef ADD_TO_ALL
#undef SETBUILTIN
}
由该函数可以看出,Python的内建关键字都是通过该函数建立的,常用的str等内建方法。此时查看PyModule_Create函数和builtinsmodule定义,
typedef struct PyModuleDef{
PyModuleDef_Base m_base;
const char* m_name;
const char* m_doc;
Py_ssize_t m_size;
PyMethodDef *m_methods;
struct PyModuleDef_Slot* m_slots;
traverseproc m_traverse;
inquiry m_clear;
freefunc m_free;
}PyModuleDef;
...
static struct PyModuleDef builtinsmodule = {
PyModuleDef_HEAD_INIT,
"builtins",
builtin_doc,
-1, /* multiple "initialization" just copies the module dict. */
builtin_methods,
NULL,
NULL,
NULL,
NULL
};
此时,可以看出builtinsmodule类型上文所示,定义的相关方法就是builtin_methods, PyModuleDef分别存了模块名称,模块的说明文档,文档的大小等信息,模块中获取相关的方法就从m_methods从获取相关方法,此处builtin_methods模块相关方法,
static PyMethodDef builtin_methods[] = {
{"__build_class__", (PyCFunction)builtin___build_class__,
METH_VARARGS | METH_KEYWORDS, build_class_doc},
{"__import__", (PyCFunction)builtin___import__, METH_VARARGS | METH_KEYWORDS, import_doc},
BUILTIN_ABS_METHODDEF
BUILTIN_ALL_METHODDEF
BUILTIN_ANY_METHODDEF
...
{"iter", builtin_iter, METH_VARARGS, iter_doc},
...
{"max", (PyCFunction)builtin_max, METH_VARARGS | METH_KEYWORDS, max_doc},
{"min", (PyCFunction)builtin_min, METH_VARARGS | METH_KEYWORDS, min_doc},
{"next", (PyCFunction)builtin_next, METH_VARARGS, next_doc},
...
{"print", (PyCFunction)builtin_print, METH_VARARGS | METH_KEYWORDS, print_doc},
...
{"round", (PyCFunction)builtin_round, METH_VARARGS | METH_KEYWORDS, round_doc},
...
{"vars", builtin_vars, METH_VARARGS, vars_doc},
{NULL, NULL},
};
由此可知相关的内建函数也导入到了内建模块中,此时回过头来看PyModule_Create是怎样运行的呢?
#define PyModule_Create(module) \
PyModule_Create2(module, PYTHON_API_VERSION)
此时继续查看PyModule_Create2函数的执行流程,
PyObject *
PyModule_Create2(struct PyModuleDef* module, int module_api_version)
{
const char* name;
PyModuleObject *m;
PyInterpreterState *interp = PyThreadState_Get()->interp; // 获取当前的解释器
if (interp->modules == NULL) // 判断当前模块是否为空
Py_FatalError("Python import machinery not initialized");
if (!PyModuleDef_Init(module)) // 检查PyModuleDef_Type是否初始化如果没有则初始化
return NULL;
name = module->m_name; // 获取模块名称
...
if ((m = (PyModuleObject*)PyModule_New(name)) == NULL) // 新建module对象
return NULL;
if (module->m_size > 0) { // 检查是否大于0 大于0则分配相关内存
m->md_state = PyMem_MALLOC(module->m_size);
if (!m->md_state) {
PyErr_NoMemory();
Py_DECREF(m);
return NULL;
}
memset(m->md_state, 0, module->m_size);
}
if (module->m_methods != NULL) { // 检查模块包含的方法是否为空
if (PyModule_AddFunctions((PyObject *) m, module->m_methods) != 0) {
Py_DECREF(m);
return NULL;
}
}
if (module->m_doc != NULL) { // 检查参数是否为空
if (PyModule_SetDocString((PyObject *) m, module->m_doc) != 0) {
Py_DECREF(m);
return NULL;
}
}
m->md_def = module; // 保存对应的module到md_def字段
return (PyObject*)m; // 返回模块
}
其中比较重要得就是PyModule_New函数,
typedef struct {
PyObject_HEAD
PyObject *md_dict; // 存储相关方法的字典
struct PyModuleDef *md_def; // 对应的原模块
void *md_state;
PyObject *md_weaklist;
PyObject *md_name; /* for logging purposes after md_dict is cleared */ // 模块名称
} PyModuleObject;
...
PyObject *
PyModule_NewObject(PyObject *name)
{
PyModuleObject *m;
m = PyObject_GC_New(PyModuleObject, &PyModule_Type); // 申请内存
if (m == NULL)
return NULL;
m->md_def = NULL;
m->md_state = NULL;
m->md_weaklist = NULL;
m->md_name = NULL;
m->md_dict = PyDict_New(); // 设置属性字典
if (module_init_dict(m, m->md_dict, name, NULL) != 0)
goto fail;
PyObject_GC_Track(m);
return (PyObject *)m; // 返回
fail:
Py_DECREF(m);
return NULL;
}
PyObject *
PyModule_New(const char *name)
{
PyObject *nameobj, *module;
nameobj = PyUnicode_FromString(name); // 名称转换
if (nameobj == NULL)
return NULL;
module = PyModule_NewObject(nameobj); // 初始化PyModuleObject对象
Py_DECREF(nameobj);
return module;
}
由此可知生成的module是一个PyModuleObject类型,此时返回_PyBuiltin_Init函数中继续执行
dict = PyModule_GetDict(mod);
此时就将mod对应的为空的属性字典返回,然后就向dict添加相关字典对应的方法,
SETBUILTIN("None", Py_None);
此时就相关内建方法设置到dict中,此时相对应的类型就设置到了module对应的字典中,此时初始化完成后返回_Py_InitializeEx_Private函数继续执行,接着就执行到;
_PyImport_FixupBuiltin(bimod, "builtins");
该函数对应的代码如下,
int
_PyImport_FixupBuiltin(PyObject *mod, const char *name)
{
int res;
PyObject *nameobj;
nameobj = PyUnicode_InternFromString(name); // 名称转换
if (nameobj == NULL)
return -1;
res = _PyImport_FixupExtensionObject(mod, nameobj, nameobj); // 导入到扩展的extension中
Py_DECREF(nameobj);
return res;
}
调用了_PyImport_FixupExtensionObject函数将传入的mod导入到extension中,
/* See _PyImport_FixupExtensionObject() below */
static PyObject *extensions = NULL;
/* Magic for extension modules (built-in as well as dynamically
loaded). To prevent initializing an extension module more than
once, we keep a static dictionary 'extensions' keyed by the tuple
(module name, module name) (for built-in modules) or by
(filename, module name) (for dynamically loaded modules), containing these
modules. A copy of the module's dictionary is stored by calling
_PyImport_FixupExtensionObject() immediately after the module initialization
function succeeds. A copy can be retrieved from there by calling
_PyImport_FindExtensionObject().
Modules which do support multiple initialization set their m_size
field to a non-negative number (indicating the size of the
module-specific state). They are still recorded in the extensions
dictionary, to avoid loading shared libraries twice.
*/ // 导入的module作为一份备份,避免共享库时导入两次
int
_PyImport_FixupExtensionObject(PyObject *mod, PyObject *name,
PyObject *filename)
{
PyObject *modules, *dict, *key;
struct PyModuleDef *def;
int res;
if (extensions == NULL) { // 检查静态extensions是否为空
extensions = PyDict_New(); // 为空初始化一个字典类型
if (extensions == NULL) // 初始化值为空则返回错误值
return -1;
}
if (mod == NULL || !PyModule_Check(mod)) { // 检查传入的mod是否为空
PyErr_BadInternalCall();
return -1;
}
def = PyModule_GetDef(mod); // 获取mod中定义的方法
if (!def) {
PyErr_BadInternalCall();
return -1;
}
modules = PyImport_GetModuleDict(); // 获取当前解释器的modules数据
if (PyDict_SetItem(modules, name, mod) < 0) // 设置对应的name和mod到modules字典中
return -1;
if (_PyState_AddModule(mod, def) < 0) {
PyDict_DelItem(modules, name);
return -1;
}
if (def->m_size == -1) { // builtins初始时为-1
if (def->m_base.m_copy) {
/* Somebody already imported the module,
likely under a different name.
XXX this should really not happen. */
Py_CLEAR(def->m_base.m_copy);
}
dict = PyModule_GetDict(mod); // 获取mod的属性字典
if (dict == NULL)
return -1;
def->m_base.m_copy = PyDict_Copy(dict); // 复制一份,设置到def中的m_copy中
if (def->m_base.m_copy == NULL)
return -1;
}
key = PyTuple_Pack(2, filename, name); // 生成一个tuple
if (key == NULL)
return -1;
res = PyDict_SetItem(extensions, key, (PyObject *)def); // 设置到extentsions中
Py_DECREF(key);
if (res < 0)
return -1;
return 0;
}
其中, extentsions作为全局静态变量存在,保存全局导入的模块,可以实现已经导入的模块避免二次导入实现,此刻返回_Py_InitializeEx_Private函数继续执行,
interp->builtins = PyModule_GetDict(bimod);
...
/* initialize builtin exceptions */
_PyExc_Init(bimod);
获取bimod属性的字典值,该值就是_PyBuiltin_Init初始化时,填入的dict该d字典包括None,False, True等字典值,将该字典复制到解释器对象的builtins中,然后初始化内建的错误类型,查看_PyExc_Init函数,
void
_PyExc_Init(PyObject *bltinmod)
{
PyObject *bdict;
PRE_INIT(BaseException) // 检查该类型是否初始化
PRE_INIT(Exception)
...
bdict = PyModule_GetDict(bltinmod); // 获取传入的模块的属性字典
if (bdict == NULL)
Py_FatalError("exceptions bootstrapping error.");
POST_INIT(BaseException) // 将BaseException设置到bdict中
POST_INIT(Exception)
...
}
查看PRE_INIT和POST_INIT宏,
#define PRE_INIT(TYPE) \
if (!(_PyExc_ ## TYPE.tp_flags & Py_TPFLAGS_READY)) { \
if (PyType_Ready(&_PyExc_ ## TYPE) < 0) \
Py_FatalError("exceptions bootstrapping error."); \
Py_INCREF(PyExc_ ## TYPE); \
}
#define POST_INIT(TYPE) \
if (PyDict_SetItemString(bdict, # TYPE, PyExc_ ## TYPE)) \
Py_FatalError("Module dictionary insertion problem.");
由宏展开可知,先检查类型是否已经初始化,然后将对应的类型添加到bdict字典中,至此就将内建的类型初始化完成。
总结
根据Python的初始化流程,此时初始化了Python的内建builtins模块的初始化模型,可用如下图所示描述初始化过程,
主要初始化了Python的内部定义的类型None,False,和基础的异常类型等方法。