两个重要的结构体
#define PyObject_HEAD PyObject ob_base;
#define PyObject_VAR_HEAD PyVarObject ob_base;
// 宏定义,包含 上一个、下一个,用于构造双向链表用。(放到refchain链表中时,要用到)
#define _PyObject_HEAD_EXTRA \
struct _object *_ob_next; \
struct _object *_ob_prev;
typedef struct _object {
_PyObject_HEAD_EXTRA // 用于构造双向链表
Py_ssize_t ob_refcnt; // 引用计数器
struct _typeobject *ob_type; // 数据类型
} PyObject;
typedef struct {
PyObject ob_base; // PyObject对象
Py_ssize_t ob_size; /* Number of items in variable part,即:元素个数 */
} PyVarObject;
这两个结构体PyObject和PyVarObject是基石,他们保存这其他数据类型公共部分,例如:每个类型的对象在创建时都有PyObject中的那4部分数据;list/set/tuple等由多个元素组成对象创建时都有PyVarObject中的那5部分数据。
常见类型结构体
平时我们在创建一个对象时,本质上就是实例化一个相关类型的结构体,在内部保存值和引用计数器等。
float类型
typedef struct {
PyObject_HEAD
double ob_fval;
} PyFloatObject;
int类型
struct _longobject {
PyObject_VAR_HEAD
digit ob_digit[1];
};
/* Long (arbitrary precision) integer object interface */
typedef struct _longobject PyLongObject; /* Revealed in longintrepr.h */
str类型
typedef struct {
PyObject_HEAD
Py_ssize_t length; /* Number of code points in the string */
Py_hash_t hash; /* Hash value; -1 if not set */
struct {
unsigned int interned:2;
/* Character size:
- PyUnicode_WCHAR_KIND (0):
* character type = wchar_t (16 or 32 bits, depending on the
platform)
- PyUnicode_1BYTE_KIND (1):
* character type = Py_UCS1 (8 bits, unsigned)
* all characters are in the range U+0000-U+00FF (latin1)
* if ascii is set, all characters are in the range U+0000-U+007F
(ASCII), otherwise at least one character is in the range
U+0080-U+00FF
- PyUnicode_2BYTE_KIND (2):
* character type = Py_UCS2 (16 bits, unsigned)
* all characters are in the range U+0000-U+FFFF (BMP)
* at least one character is in the range U+0100-U+FFFF
- PyUnicode_4BYTE_KIND (4):
* character type = Py_UCS4 (32 bits, unsigned)
* all characters are in the range U+0000-U+10FFFF
* at least one character is in the range U+10000-U+10FFFF
*/
unsigned int kind:3;
unsigned int compact:1;
unsigned int ascii:1;
unsigned int ready:1;
unsigned int :24;
} state;
wchar_t *wstr; /* wchar_t representation (null-terminated) */
} PyASCIIObject;
typedef struct {
PyASCIIObject _base;
Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
* terminating \0. */
char *utf8; /* UTF-8 representation (null-terminated) */
Py_ssize_t wstr_length; /* Number of code points in wstr, possible
* surrogates count as two code points. */
} PyCompactUnicodeObject;
typedef struct {
PyCompactUnicodeObject _base;
union {
void *any;
Py_UCS1 *latin1;
Py_UCS2 *ucs2;
Py_UCS4 *ucs4;
} data; /* Canonical, smallest-form Unicode buffer */
} PyUnicodeObject;
list类型
typedef struct {
PyObject_VAR_HEAD
PyObject **ob_item;
Py_ssize_t allocated;
} PyListObject;
tuple类型
typedef struct {
PyObject_VAR_HEAD
PyObject *ob_item[1];
} PyTupleObject;
dict类型
typedef struct {
PyObject_HEAD
Py_ssize_t ma_used;
PyDictKeysObject *ma_keys;
PyObject **ma_values;
} PyDictObject;
通过常见结构体可以基本了解到本质上每个对象内部会存储的数据。
扩展:在结构体部分你应该发现了str类型比较繁琐,那是因为python字符串在处理时需要考虑到编码的问题,在内部规定(见源码结构体):
字符串只包含ascii,则每个字符用1个字节表示,即:latin1
字符串包含中文等,则每个字符用2个字节表示,即:ucs2
字符串包含emoji等,则每个字符用4个字节表示,即:ucs4
Float类型
创建
val = 3.14
类似于这样创建一个float对象时,会执行C源码中的如下代码:
// Objects/floatobject.c
// 用于缓存float对象的链表
static PyFloatObject *free_list = NULL;
static int numfree = 0;
PyObject *
PyFloat_FromDouble(double fval)
{
// 如果free_list中有可用对象,则从free_list链表拿出来一个;否则为对象重新开辟内存。
PyFloatObject *op = free_list;
if (op != NULL) {
free_list = (PyFloatObject *) Py_TYPE(op);
numfree--;
} else {
// 根据float类型的大小,为float对象新开辟内存。
op = (PyFloatObject*) PyObject_MALLOC(sizeof(PyFloatObject));
if (!op)
return PyErr_NoMemory();
}
// 对float对象进行初始化,例如:引用计数器初始化为1、添加到refchain链表等。
/* Inline PyObject_New */
(void)PyObject_INIT(op, &PyFloat_Type);
// 对float对象赋值。即:op->ob_fval = 3.14
op->ob_fval = fval;
return (PyObject *) op;
}
// Include/objimpl.h
#define PyObject_INIT(op, typeobj) \
( Py_TYPE(op) = (typeobj), _Py_NewReference((PyObject *)(op)), (op) )
// Objects/object.c
// 维护了所有对象的一个环状双向链表
static PyObject refchain = {&refchain, &refchain};
void
_Py_AddToAllObjects(PyObject *op, int force)
{
if (force || op->_ob_prev == NULL) {
op->_ob_next = refchain._ob_next;
op->_ob_prev = &refchain;
refchain._ob_next->_ob_prev = op;
refchain._ob_next = op;
}
}
void
_Py_NewReference(PyObject *op)
{
_Py_INC_REFTOTAL;
// 引用计数器初始化为1。
op->ob_refcnt = 1;
// 对象添加到双向链表refchain中。
_Py_AddToAllObjects(op, 1);
_Py_INC_TPALLOCS(op);
}
引用
val = 3.14
data = val
在项目中如果出现这种引用关系时,会将原对象的引用计数器+1。
C源码执行流程如下:
// Include/object.h
static inline void _Py_INCREF(PyObject *op)
{
_Py_INC_REFTOTAL;
// 对象的引用计数器 + 1
op->ob_refcnt++;
}
#define Py_INCREF(op) _Py_INCREF(_PyObject_CAST(op))
销毁
val = 3.14
del val
在项目中如果出现这种删除的语句,则内部会将引用计数器-1,如果引用计数器减为0,则进行缓存或垃圾回收。
C源码执行流程如下:
// Include/object.h
static inline void _Py_DECREF(const char *filename, int lineno,
PyObject *op)
{
(void)filename; /* may be unused, shut up -Wunused-parameter */
(void)lineno; /* may be unused, shut up -Wunused-parameter */
_Py_DEC_REFTOTAL;
// 引用计数器-1,如果引用计数器为0,则执行 _Py_Dealloc去缓存或垃圾回收。
if (--op->ob_refcnt != 0) {
#ifdef Py_REF_DEBUG
if (op->ob_refcnt < 0) {
_Py_NegativeRefcount(filename, lineno, op);
}
#endif
}
else {
_Py_Dealloc(op);
}
}
#define Py_DECREF(op) _Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op))
// Objects/object.c
void
_Py_Dealloc(PyObject *op)
{
// 找到float类型的 tp_dealloc 函数
destructor dealloc = Py_TYPE(op)->tp_dealloc;
// 在refchain双向链表中摘除此对象。
_Py_ForgetReference(op);
// 执行float类型的 tp_dealloc 函数,去进行缓存或垃圾回收。
(*dealloc)(op);
}
void
_Py_ForgetReference(PyObject *op)
{
...
// 在refchain链表中移除此对象
op->_ob_next->_ob_prev = op->_ob_prev;
op->_ob_prev->_ob_next = op->_ob_next;
op->_ob_next = op->_ob_prev = NULL;
_Py_INC_TPFREES(op);
}
// Objects/floatobject.c
#define PyFloat_MAXFREELIST 100
static int numfree = 0;
static PyFloatObject *free_list = NULL;
// float类型中函数的对应关系
PyTypeObject PyFloat_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"float",
sizeof(PyFloatObject),
0,
// tp_dealloc表示执行float_dealloc方法
(destructor)float_dealloc, /* tp_dealloc */
0, /* tp_print */
...
};
static void
float_dealloc(PyFloatObject *op)
{
// 检测是否是float类型
if (PyFloat_CheckExact(op)) {
// 检测free_list中缓存的个数是否已满,如果已满,则直接将对象销毁。
if (numfree >= PyFloat_MAXFREELIST) {
// 销毁
PyObject_FREE(op);
return;
}
// 将对象加入到free_list链表中
numfree++;
Py_TYPE(op) = (struct _typeobject *)free_list;
free_list = op;
}
else
Py_TYPE(op)->tp_free((PyObject *)op);
}