Python3.5源码分析-内建模块builtins初始化

Python3源码分析
本文环境python3.5.2。
参考书籍<<Python源码剖析>>
python官网
Python3模块初始化与加载

Python的模块分为内建的模块,函数与用户定义的模块,首先分析Python内建模块。

Python3的系统内建模块初始化

上文介绍了Python的线程对象和解释器对象,在初始化的时候,会执行_Py_InitializeEx_Private函数,会先创建解释器对象和线程对象,然后再设置系统默认的moudle,

void
_Py_InitializeEx_Private(int install_sigs, int install_importlib)
{
    ...
    interp->modules = PyDict_New();                                 // 设置解释器的modules为字典类型
    ...

    bimod = _PyBuiltin_Init();                                      // 初始化内建函数
    ... 
    _PyImport_FixupBuiltin(bimod, "builtins");                      // 
    interp->builtins = PyModule_GetDict(bimod);
    ...

    /* initialize builtin exceptions */
    _PyExc_Init(bimod);                                             // 添加内建报错类型

    sysmod = _PySys_Init();                                         // 初始化sys模块
    ...
    interp->sysdict = PyModule_GetDict(sysmod);                     // 设置解释器的sysdict值
    ...
    _PyImport_FixupBuiltin(sysmod, "sys");                          // 绑定到sys名称下
    PySys_SetPath(Py_GetPath());                                    // 设置module搜索路径
    PyDict_SetItemString(interp->sysdict, "modules",
                         interp->modules);                          // 设置modules

    ...
}

此时可以看出,首先会初始化interp->modules为一个字典,然后调用_PyBuiltin_Init初始化内建函数,

PyObject *
_PyBuiltin_Init(void)
{
    PyObject *mod, *dict, *debug;

    if (PyType_Ready(&PyFilter_Type) < 0 || 
        PyType_Ready(&PyMap_Type) < 0 ||
        PyType_Ready(&PyZip_Type) < 0)                      // 检查类型是否初始化成功
        return NULL;

    mod = PyModule_Create(&builtinsmodule);                 // 创建module
    if (mod == NULL)
        return NULL;
    dict = PyModule_GetDict(mod);                           // 创建module对应的属性字典

#ifdef Py_TRACE_REFS
    /* "builtins" exposes a number of statically allocated objects
     * that, before this code was added in 2.3, never showed up in
     * the list of "all objects" maintained by Py_TRACE_REFS.  As a
     * result, programs leaking references to None and False (etc)
     * couldn't be diagnosed by examining sys.getobjects(0).
     */
#define ADD_TO_ALL(OBJECT) _Py_AddToAllObjects((PyObject *)(OBJECT), 0)
#else
#define ADD_TO_ALL(OBJECT) (void)0
#endif

#define SETBUILTIN(NAME, OBJECT) \
    if (PyDict_SetItemString(dict, NAME, (PyObject *)OBJECT) < 0)       \
        return NULL;                                                    \
    ADD_TO_ALL(OBJECT)

    SETBUILTIN("None",                  Py_None);                   // 添加到module中
    SETBUILTIN("Ellipsis",              Py_Ellipsis);
    SETBUILTIN("NotImplemented",        Py_NotImplemented);
    SETBUILTIN("False",                 Py_False);
    SETBUILTIN("True",                  Py_True);
    SETBUILTIN("bool",                  &PyBool_Type);
    SETBUILTIN("memoryview",        &PyMemoryView_Type);
    SETBUILTIN("bytearray",             &PyByteArray_Type);
    SETBUILTIN("bytes",                 &PyBytes_Type);
    SETBUILTIN("classmethod",           &PyClassMethod_Type);
    SETBUILTIN("complex",               &PyComplex_Type);
    SETBUILTIN("dict",                  &PyDict_Type);
    SETBUILTIN("enumerate",             &PyEnum_Type);
    SETBUILTIN("filter",                &PyFilter_Type);
    SETBUILTIN("float",                 &PyFloat_Type);
    SETBUILTIN("frozenset",             &PyFrozenSet_Type);
    SETBUILTIN("property",              &PyProperty_Type);
    SETBUILTIN("int",                   &PyLong_Type);
    SETBUILTIN("list",                  &PyList_Type);
    SETBUILTIN("map",                   &PyMap_Type);
    SETBUILTIN("object",                &PyBaseObject_Type);
    SETBUILTIN("range",                 &PyRange_Type);
    SETBUILTIN("reversed",              &PyReversed_Type);
    SETBUILTIN("set",                   &PySet_Type);
    SETBUILTIN("slice",                 &PySlice_Type);
    SETBUILTIN("staticmethod",          &PyStaticMethod_Type);
    SETBUILTIN("str",                   &PyUnicode_Type);
    SETBUILTIN("super",                 &PySuper_Type);
    SETBUILTIN("tuple",                 &PyTuple_Type);
    SETBUILTIN("type",                  &PyType_Type);
    SETBUILTIN("zip",                   &PyZip_Type);
    debug = PyBool_FromLong(Py_OptimizeFlag == 0);
    if (PyDict_SetItemString(dict, "__debug__", debug) < 0) {
        Py_XDECREF(debug);
        return NULL;
    }
    Py_XDECREF(debug);

    return mod;
#undef ADD_TO_ALL
#undef SETBUILTIN
}

由该函数可以看出,Python的内建关键字都是通过该函数建立的,常用的str等内建方法。此时查看PyModule_Create函数和builtinsmodule定义,

  typedef struct PyModuleDef{
    PyModuleDef_Base m_base;
    const char* m_name;
    const char* m_doc;
    Py_ssize_t m_size;
    PyMethodDef *m_methods;
    struct PyModuleDef_Slot* m_slots;
    traverseproc m_traverse;
    inquiry m_clear;
    freefunc m_free;
  }PyModuleDef;
  ...
  static struct PyModuleDef builtinsmodule = {
        PyModuleDef_HEAD_INIT,
        "builtins",
        builtin_doc,
        -1, /* multiple "initialization" just copies the module dict. */
        builtin_methods,
        NULL,
        NULL,
        NULL,
        NULL
    };

此时,可以看出builtinsmodule类型上文所示,定义的相关方法就是builtin_methods, PyModuleDef分别存了模块名称,模块的说明文档,文档的大小等信息,模块中获取相关的方法就从m_methods从获取相关方法,此处builtin_methods模块相关方法,

static PyMethodDef builtin_methods[] = {
    {"__build_class__", (PyCFunction)builtin___build_class__,
     METH_VARARGS | METH_KEYWORDS, build_class_doc},
    {"__import__",      (PyCFunction)builtin___import__, METH_VARARGS | METH_KEYWORDS, import_doc},
    BUILTIN_ABS_METHODDEF
    BUILTIN_ALL_METHODDEF
    BUILTIN_ANY_METHODDEF
    ...
    {"iter",            builtin_iter,       METH_VARARGS, iter_doc},
    ...
    {"max",             (PyCFunction)builtin_max,        METH_VARARGS | METH_KEYWORDS, max_doc},
    {"min",             (PyCFunction)builtin_min,        METH_VARARGS | METH_KEYWORDS, min_doc},
    {"next",            (PyCFunction)builtin_next,       METH_VARARGS, next_doc},
    ...
    {"print",           (PyCFunction)builtin_print,      METH_VARARGS | METH_KEYWORDS, print_doc},
    ...
    {"round",           (PyCFunction)builtin_round,      METH_VARARGS | METH_KEYWORDS, round_doc},
    ...
    {"vars",            builtin_vars,       METH_VARARGS, vars_doc},
    {NULL,              NULL},
};

由此可知相关的内建函数也导入到了内建模块中,此时回过头来看PyModule_Create是怎样运行的呢?

#define PyModule_Create(module) \
    PyModule_Create2(module, PYTHON_API_VERSION)

此时继续查看PyModule_Create2函数的执行流程,

PyObject *
PyModule_Create2(struct PyModuleDef* module, int module_api_version)
{
    const char* name;
    PyModuleObject *m;
    PyInterpreterState *interp = PyThreadState_Get()->interp;                   // 获取当前的解释器
    if (interp->modules == NULL)                                                // 判断当前模块是否为空
        Py_FatalError("Python import machinery not initialized");
    if (!PyModuleDef_Init(module))                                              // 检查PyModuleDef_Type是否初始化如果没有则初始化
        return NULL;
    name = module->m_name;                                                      // 获取模块名称                      
    ...

    if ((m = (PyModuleObject*)PyModule_New(name)) == NULL)                          // 新建module对象
        return NULL;

    if (module->m_size > 0) {                                                       // 检查是否大于0 大于0则分配相关内存
        m->md_state = PyMem_MALLOC(module->m_size);
        if (!m->md_state) {
            PyErr_NoMemory();
            Py_DECREF(m);
            return NULL;
        }
        memset(m->md_state, 0, module->m_size);
    }

    if (module->m_methods != NULL) {                                                // 检查模块包含的方法是否为空
        if (PyModule_AddFunctions((PyObject *) m, module->m_methods) != 0) {
            Py_DECREF(m);
            return NULL;
        }
    } 
    if (module->m_doc != NULL) {                                                    // 检查参数是否为空
        if (PyModule_SetDocString((PyObject *) m, module->m_doc) != 0) {
            Py_DECREF(m);
            return NULL;
        }
    }
    m->md_def = module;                                                             // 保存对应的module到md_def字段
    return (PyObject*)m;                                                            // 返回模块
}

其中比较重要得就是PyModule_New函数,

typedef struct {
    PyObject_HEAD
    PyObject *md_dict;                                                       // 存储相关方法的字典
    struct PyModuleDef *md_def;                                              // 对应的原模块
    void *md_state;                                             
    PyObject *md_weaklist;  
    PyObject *md_name;  /* for logging purposes after md_dict is cleared */  // 模块名称
} PyModuleObject;

...

PyObject *
PyModule_NewObject(PyObject *name)
{
    PyModuleObject *m;
    m = PyObject_GC_New(PyModuleObject, &PyModule_Type);                    // 申请内存
    if (m == NULL)
        return NULL;
    m->md_def = NULL;
    m->md_state = NULL;
    m->md_weaklist = NULL;
    m->md_name = NULL;
    m->md_dict = PyDict_New();                                              // 设置属性字典
    if (module_init_dict(m, m->md_dict, name, NULL) != 0)
        goto fail;
    PyObject_GC_Track(m);
    return (PyObject *)m;                                                   // 返回

 fail:
    Py_DECREF(m);
    return NULL;
}

PyObject *
PyModule_New(const char *name)
{
    PyObject *nameobj, *module;
    nameobj = PyUnicode_FromString(name);                                   // 名称转换
    if (nameobj == NULL)
        return NULL;
    module = PyModule_NewObject(nameobj);                                   // 初始化PyModuleObject对象
    Py_DECREF(nameobj);
    return module;
}

由此可知生成的module是一个PyModuleObject类型,此时返回_PyBuiltin_Init函数中继续执行

    dict = PyModule_GetDict(mod);

此时就将mod对应的为空的属性字典返回,然后就向dict添加相关字典对应的方法,

SETBUILTIN("None",                  Py_None);

此时就相关内建方法设置到dict中,此时相对应的类型就设置到了module对应的字典中,此时初始化完成后返回_Py_InitializeEx_Private函数继续执行,接着就执行到;

_PyImport_FixupBuiltin(bimod, "builtins");

该函数对应的代码如下,

int
_PyImport_FixupBuiltin(PyObject *mod, const char *name)
{
    int res;
    PyObject *nameobj;
    nameobj = PyUnicode_InternFromString(name);                     // 名称转换
    if (nameobj == NULL)
        return -1;
    res = _PyImport_FixupExtensionObject(mod, nameobj, nameobj);    // 导入到扩展的extension中
    Py_DECREF(nameobj);
    return res;
}

调用了_PyImport_FixupExtensionObject函数将传入的mod导入到extension中,

/* See _PyImport_FixupExtensionObject() below */
static PyObject *extensions = NULL;

/* Magic for extension modules (built-in as well as dynamically
   loaded).  To prevent initializing an extension module more than
   once, we keep a static dictionary 'extensions' keyed by the tuple
   (module name, module name)  (for built-in modules) or by
   (filename, module name) (for dynamically loaded modules), containing these
   modules.  A copy of the module's dictionary is stored by calling
   _PyImport_FixupExtensionObject() immediately after the module initialization
   function succeeds.  A copy can be retrieved from there by calling
   _PyImport_FindExtensionObject().

   Modules which do support multiple initialization set their m_size
   field to a non-negative number (indicating the size of the
   module-specific state). They are still recorded in the extensions
   dictionary, to avoid loading shared libraries twice.
*/              // 导入的module作为一份备份,避免共享库时导入两次
int
_PyImport_FixupExtensionObject(PyObject *mod, PyObject *name,
                               PyObject *filename)
{
    PyObject *modules, *dict, *key;
    struct PyModuleDef *def;
    int res;
    if (extensions == NULL) {                               // 检查静态extensions是否为空
        extensions = PyDict_New();                          // 为空初始化一个字典类型
        if (extensions == NULL)                             // 初始化值为空则返回错误值
            return -1;
    }
    if (mod == NULL || !PyModule_Check(mod)) {              // 检查传入的mod是否为空
        PyErr_BadInternalCall();
        return -1;
    }
    def = PyModule_GetDef(mod);                             // 获取mod中定义的方法
    if (!def) {
        PyErr_BadInternalCall();
        return -1;
    }
    modules = PyImport_GetModuleDict();                     // 获取当前解释器的modules数据
    if (PyDict_SetItem(modules, name, mod) < 0)             // 设置对应的name和mod到modules字典中
        return -1;
    if (_PyState_AddModule(mod, def) < 0) {
        PyDict_DelItem(modules, name);
        return -1;
    }
    if (def->m_size == -1) {                                // builtins初始时为-1
        if (def->m_base.m_copy) {
            /* Somebody already imported the module,
               likely under a different name.
               XXX this should really not happen. */
            Py_CLEAR(def->m_base.m_copy);
        }
        dict = PyModule_GetDict(mod);                       // 获取mod的属性字典
        if (dict == NULL)
            return -1;
        def->m_base.m_copy = PyDict_Copy(dict);             // 复制一份,设置到def中的m_copy中
        if (def->m_base.m_copy == NULL)
            return -1;
    }
    key = PyTuple_Pack(2, filename, name);                  // 生成一个tuple
    if (key == NULL)
        return -1;
    res = PyDict_SetItem(extensions, key, (PyObject *)def);  // 设置到extentsions中
    Py_DECREF(key);
    if (res < 0)
        return -1;
    return 0;
}

其中, extentsions作为全局静态变量存在,保存全局导入的模块,可以实现已经导入的模块避免二次导入实现,此刻返回_Py_InitializeEx_Private函数继续执行,

interp->builtins = PyModule_GetDict(bimod);
...
/* initialize builtin exceptions */
_PyExc_Init(bimod);

获取bimod属性的字典值,该值就是_PyBuiltin_Init初始化时,填入的dict该d字典包括None,False, True等字典值,将该字典复制到解释器对象的builtins中,然后初始化内建的错误类型,查看_PyExc_Init函数,

void
_PyExc_Init(PyObject *bltinmod)
{
    PyObject *bdict;

    PRE_INIT(BaseException)                 // 检查该类型是否初始化
    PRE_INIT(Exception)
    ...
    bdict = PyModule_GetDict(bltinmod);     // 获取传入的模块的属性字典
    if (bdict == NULL)
        Py_FatalError("exceptions bootstrapping error.");

    POST_INIT(BaseException)                // 将BaseException设置到bdict中
    POST_INIT(Exception)
    ...
}

查看PRE_INIT和POST_INIT宏,

#define PRE_INIT(TYPE) \
if (!(_PyExc_ ## TYPE.tp_flags & Py_TPFLAGS_READY)) { \
    if (PyType_Ready(&_PyExc_ ## TYPE) < 0) \
        Py_FatalError("exceptions bootstrapping error."); \
    Py_INCREF(PyExc_ ## TYPE); \
}

#define POST_INIT(TYPE) \
    if (PyDict_SetItemString(bdict, # TYPE, PyExc_ ## TYPE)) \
        Py_FatalError("Module dictionary insertion problem.");

由宏展开可知,先检查类型是否已经初始化,然后将对应的类型添加到bdict字典中,至此就将内建的类型初始化完成。

总结

根据Python的初始化流程,此时初始化了Python的内建builtins模块的初始化模型,可用如下图所示描述初始化过程,

builtins初始化后内存布局

主要初始化了Python的内部定义的类型None,False,和基础的异常类型等方法。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值