python整型数据源码分析_大师兄的Python源码学习笔记(三）: 整数对象

最新推荐文章于 2024-03-30 14:45:26 发布

徐聪瓜要努力

最新推荐文章于 2024-03-30 14:45:26 发布

阅读量436

点赞数

文章标签： python整型数据源码分析

本文链接：https://blog.csdn.net/weixin_30716611/article/details/114437155

版权

本文详细介绍了Python3中整数对象PyLongObject的内部实现，包括其不可变特性、结构定义、创建过程、小整数对象优化及数值操作等。通过源码分析，揭示了整数对象的内存管理和运算机制。

摘要由CSDN通过智能技术生成

一、关于整数对象

整数对象是Python中最简单的对象。

Python2中包含PyIntObject和PyLongObject两种类型。

而Python3取消了PyIntObject，仅保留PyLongObject。

以下是代码的头两行，可以感受作者在修改过程中的艰辛:

/* Long (arbitrary precision) integer object implementation */

/* XXX The functional organization of this file is terrible */

1.1 PyLongObject

除了定长对象和变长对象，根据对象维护数据的可变性，可将对象分为可变对象(mutable)和不可变对象(immutable)。

PyLongObject就是一个不可变对象。

Include/longobject.h

/* Long (arbitrary precision) integer object interface */

typedef struct _longobject PyLongObject; /* Revealed in longintrepr.h */

Include/longintrepr.h

/* Long integer representation.

The absolute value of a number is equal to

SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)

Negative numbers are represented with ob_size < 0;

zero is represented by ob_size == 0.

In a normalized number, ob_digit[abs(ob_size)-1] (the most significant

digit) is never zero. Also, in all cases, for all valid i,

0 <= ob_digit[i] <= MASK.

The allocation function takes care of allocating extra memory

so that ob_digit[0] ... ob_digit[abs(ob_size)-1] are actually available.

CAUTION: Generic code manipulating subtypes of PyVarObject has to

aware that ints abuse ob_size's sign bit.

struct _longobject {

PyObject_VAR_HEAD

digit ob_digit[1];

};

Python中的整数对象实际上是对C中原生类型long的简单包装。

Objects/longobject.c

PyTypeObject PyLong_Type = {

PyVarObject_HEAD_INIT(&PyType_Type, 0)

"int", /* tp_name */

offsetof(PyLongObject, ob_digit), /* tp_basicsize */

sizeof(digit), /* tp_itemsize */

long_dealloc, /* tp_dealloc */

0, /* tp_print */

0, /* tp_getattr */

0, /* tp_setattr */

0, /* tp_reserved */

long_to_decimal_string, /* tp_repr */

&long_as_number, /* tp_as_number */

0, /* tp_as_sequence */

0, /* tp_as_mapping */

(hashfunc)long_hash, /* tp_hash */

0, /* tp_call */

long_to_decimal_string, /* tp_str */

PyObject_GenericGetAttr, /* tp_getattro */

0, /* tp_setattro */

0, /* tp_as_buffer */

Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |

Py_TPFLAGS_LONG_SUBCLASS, /* tp_flags */

long_doc, /* tp_doc */

0, /* tp_traverse */

0, /* tp_clear */

long_richcompare, /* tp_richcompare */

0, /* tp_weaklistoffset */

0, /* tp_iter */

0, /* tp_iternext */

long_methods, /* tp_methods */

0, /* tp_members */

long_getset, /* tp_getset */

0, /* tp_base */

0, /* tp_dict */

0, /* tp_descr_get */

0, /* tp_descr_set */

0, /* tp_dictoffset */

0, /* tp_init */

0, /* tp_alloc */

long_new, /* tp_new */

PyObject_Del, /* tp_free */

};

PyLong_Type保存了PyLongObject相关的丰富元信息，其中：

long_dealloc # PyLongObject对象的析构操作

PyObject_Del # 释放操作

long_to_decimal_string # 转换为PyStringObject对象

(hashfunc)long_hash # 获得哈希值

long_richcompare # 比较操作

&long_as_number # 数值操作集合

long_methods # 成员函数集合

long_new # 创建整数对象

以整数比大小为例，可以看出实际上就是将C中的long值进行比较:

static int

long_compare(PyLongObject *a, PyLongObject *b)

{

Py_ssize_t sign;

if (Py_SIZE(a) != Py_SIZE(b)) {

sign = Py_SIZE(a) - Py_SIZE(b);

}

else {

Py_ssize_t i = Py_ABS(Py_SIZE(a));

while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i])

;

if (i < 0)

sign = 0;

else {

sign = (sdigit)a->ob_digit[i] - (sdigit)b->ob_digit[i];

if (Py_SIZE(a) < 0)

sign = -sign;

}

return sign < 0 ? -1 : sign > 0 ? 1 : 0;

}

static PyObject *

long_richcompare(PyObject *self, PyObject *other, int op)

{

int result;

CHECK_BINOP(self, other);

if (self == other)

result = 0;

else

result = long_compare((PyLongObject*)self, (PyLongObject*)other);

Py_RETURN_RICHCOMPARE(result, 0, op);

}

二、创建整数对象

在PyLong_Type中，创建整数对象的入口函数为long_new:

Objects/clinic/longobject.c.h

static PyObject *

long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase);

static PyObject *

long_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)

{

PyObject *return_value = NULL;

static const char * const _keywords[] = {"", "base", NULL};

static _PyArg_Parser _parser = {"|OO:int", _keywords, 0};

PyObject *x = NULL;

PyObject *obase = NULL;

if (!_PyArg_ParseTupleAndKeywordsFast(args, kwargs, &_parser,

&x, &obase)) {

goto exit;

}

return_value = long_new_impl(type, x, obase);

exit:

return return_value;

}

Objects/longobject.c

static PyObject *

long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase)

/*[clinic end generated code: output=e47cfe777ab0f24c input=81c98f418af9eb6f]*/

{

Py_ssize_t base;

if (type != &PyLong_Type)

return long_subtype_new(type, x, obase); /* Wimp out */

if (x == NULL) {

if (obase != NULL) {

PyErr_SetString(PyExc_TypeError,

"int() missing string argument");

return NULL;

}

return PyLong_FromLong(0L);

}

if (obase == NULL)

return PyNumber_Long(x);

base = PyNumber_AsSsize_t(obase, NULL);

if (base == -1 && PyErr_Occurred())

return NULL;

if ((base != 0 && base < 2) || base > 36) {

PyErr_SetString(PyExc_ValueError,

"int() base must be >= 2 and <= 36, or 0");

return NULL;

}

if (PyUnicode_Check(x))

return PyLong_FromUnicodeObject(x, (int)base);

else if (PyByteArray_Check(x) || PyBytes_Check(x)) {

char *string;

if (PyByteArray_Check(x))

string = PyByteArray_AS_STRING(x);

else

string = PyBytes_AS_STRING(x);

return _PyLong_FromBytes(string, Py_SIZE(x), (int)base);

}

else {

PyErr_SetString(PyExc_TypeError,

"int() can't convert non-string with explicit base");

return NULL;

}

从以上代码可以看出有如下几种情况

如果x(对象)为NULL == obase(底数) != NULL 调用 PyLong_FromLong

如果obase(底数) 为 NULL 调用 PyNumber_Long

如果x(对象) 和 obase(底数) 都!= NULL

当为PyUnicode时调用 PyLong_FromUnicodeObject，最终调用 PyLong_FromString

当为PyByteArray/PyBytes时调用_PyLong_FromBytes，最终调用 PyLong_FromString

三、小整数对象

在实际编程中，比较小的整数会非常频繁的使用，比如遍历。

由于在Python中，所有对象基于系统堆，如果不停地在堆上申请空间和free,会大大降低系统运行效率。

为此，Python对小整数使用了对象池，直接将小整数对象放到缓存中。

小整数的范围在-5 至 257。

Objects/clinic/longobject.c.h

#ifndef NSMALLPOSINTS

#define NSMALLPOSINTS 257

#endif

#ifndef NSMALLNEGINTS

#define NSMALLNEGINTS 5

#endif

#if NSMALLNEGINTS + NSMALLPOSINTS > 0

/* Small integers are preallocated in this array so that they

can be shared.

The integers that are preallocated are those in the range

-NSMALLNEGINTS (inclusive) to NSMALLPOSINTS (not inclusive).

static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS];

#ifdef COUNT_ALLOCS

Py_ssize_t quick_int_allocs, quick_neg_int_allocs;

#endif

static PyObject *

get_small_int(sdigit ival)

{

PyObject *v;

assert(-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS);

v = (PyObject *)&small_ints[ival + NSMALLNEGINTS];

Py_INCREF(v);

#ifdef COUNT_ALLOCS

if (ival >= 0)

quick_int_allocs++;

else

quick_neg_int_allocs++;

#endif

return v;

}

#define CHECK_SMALL_INT(ival) \

do if (-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS) { \

return get_small_int((sdigit)ival); \

} while(0)

宏 CHECK_SMALL_INT 会检查传入的数是否在小整数范围内，如果是直接返回。

小整数初始化过程:

Objects/longobject.c

int

_PyLong_Init(void)

{

#if NSMALLNEGINTS + NSMALLPOSINTS > 0

int ival, size;

PyLongObject *v = small_ints;

for (ival = -NSMALLNEGINTS; ival < NSMALLPOSINTS; ival++, v++) {

size = (ival < 0) ? -1 : ((ival == 0) ? 0 : 1);

if (Py_TYPE(v) == &PyLong_Type) {

/* The element is already initialized, most likely

* the Python interpreter was initialized before.

Py_ssize_t refcnt;

PyObject* op = (PyObject*)v;

refcnt = Py_REFCNT(op) < 0 ? 0 : Py_REFCNT(op);

_Py_NewReference(op);

/* _Py_NewReference sets the ref count to 1 but

* the ref count might be larger. Set the refcnt

* to the original refcnt + 1 */

Py_REFCNT(op) = refcnt + 1;

assert(Py_SIZE(op) == size);

assert(v->ob_digit[0] == (digit)abs(ival));

}

else {

(void)PyObject_INIT(v, &PyLong_Type);

}

Py_SIZE(v) = size;

v->ob_digit[0] = (digit)abs(ival);

}

#endif

_PyLong_Zero = PyLong_FromLong(0);

if (_PyLong_Zero == NULL)

return 0;

_PyLong_One = PyLong_FromLong(1);

if (_PyLong_One == NULL)

return 0;

/* initialize int_info */

if (Int_InfoType.tp_name == NULL) {

if (PyStructSequence_InitType2(&Int_InfoType, &int_info_desc) < 0)

return 0;

}

return 1;

}

四、整数的存储结构

为了更方便的理解整数的存储结构，可以修改long_to_decimal_string_internal函数:

Objects/longobject.c

static int

long_to_decimal_string_internal(PyObject *aa,

PyObject **p_output,

_PyUnicodeWriter *writer,

_PyBytesWriter *bytes_writer,

char **bytes_str)

{

PyLongObject *scratch, *a;

PyObject *str = NULL;

Py_ssize_t size, strlen, size_a, i, j;

digit *pout, *pin, rem, tenpow;

int negative;

int d;

enum PyUnicode_Kind kind;

a = (PyLongObject *)aa;

// 打印ob_size和ob_digit

printf("ob_size = %d\n", Py_SIZE(a));

for (int index = 0; index < Py_SIZE(a); ++index) {

printf("ob_digit[%d] = %d\n", index, a->ob_digit[index]);

}

...

}

重新编译后，打印:

# $29248*(2^30)^0 + 32029*(2^30)^1 + 114*(2^30)^2$

>>> print(123456123456)

ob_size = 3

ob_digit[0] = 29248

ob_digit[1] = 32029

ob_digit[2] = 114

123456123456

其中的30由PyLong_SHIFT决定，64 位系统中，PyLong_SHIFT 为 30，32位为15。

五、整数对象的数值操作

整数的对象操作在PyNumberMethodsz中：

Objects/longobject.c

static PyNumberMethods long_as_number = {

(binaryfunc)long_add, /*nb_add*/

(binaryfunc)long_sub, /*nb_subtract*/

(binaryfunc)long_mul, /*nb_multiply*/

long_mod, /*nb_remainder*/

long_divmod, /*nb_divmod*/

long_pow, /*nb_power*/

(unaryfunc)long_neg, /*nb_negative*/

(unaryfunc)long_long, /*tp_positive*/

(unaryfunc)long_abs, /*tp_absolute*/

(inquiry)long_bool, /*tp_bool*/

(unaryfunc)long_invert, /*nb_invert*/

long_lshift, /*nb_lshift*/

(binaryfunc)long_rshift, /*nb_rshift*/

long_and, /*nb_and*/

long_xor, /*nb_xor*/

long_or, /*nb_or*/

long_long, /*nb_int*/

0, /*nb_reserved*/

long_float, /*nb_float*/

0, /* nb_inplace_add */

0, /* nb_inplace_subtract */

0, /* nb_inplace_multiply */

0, /* nb_inplace_remainder */

0, /* nb_inplace_power */

0, /* nb_inplace_lshift */

0, /* nb_inplace_rshift */

0, /* nb_inplace_and */

0, /* nb_inplace_xor */

0, /* nb_inplace_or */

long_div, /* nb_floor_divide */

long_true_divide, /* nb_true_divide */

0, /* nb_inplace_floor_divide */

0, /* nb_inplace_true_divide */

long_long, /* nb_index */

};

1. 整数相加

Objects/longobject.c

static PyObject *

long_add(PyLongObject *a, PyLongObject *b)

{

PyLongObject *z;

CHECK_BINOP(a, b);

if (Py_ABS(Py_SIZE(a)) <= 1 && Py_ABS(Py_SIZE(b)) <= 1) {

return PyLong_FromLong(MEDIUM_VALUE(a) + MEDIUM_VALUE(b));

}

if (Py_SIZE(a) < 0) {

if (Py_SIZE(b) < 0) {

z = x_add(a, b);

if (z != NULL) {

/* x_add received at least one multiple-digit int,

and thus z must be a multiple-digit int.

That also means z is not an element of

small_ints, so negating it in-place is safe. */

assert(Py_REFCNT(z) == 1);

Py_SIZE(z) = -(Py_SIZE(z));

}

else

z = x_sub(b, a);

}

else {

if (Py_SIZE(b) < 0)

z = x_sub(a, b);

else

z = x_add(a, b);

}

return (PyObject *)z;

}

2. 整数相乘

Objects/longobject.c

static PyObject *

long_mul(PyLongObject *a, PyLongObject *b)

{

PyLongObject *z;

CHECK_BINOP(a, b);

/* fast path for single-digit multiplication */

if (Py_ABS(Py_SIZE(a)) <= 1 && Py_ABS(Py_SIZE(b)) <= 1) {

stwodigits v = (stwodigits)(MEDIUM_VALUE(a)) * MEDIUM_VALUE(b);

return PyLong_FromLongLong((long long)v);

}

z = k_mul(a, b);

/* Negate if exactly one of the inputs is negative. */

if (((Py_SIZE(a) ^ Py_SIZE(b)) < 0) && z) {

_PyLong_Negate(&z);

if (z == NULL)

return NULL;

}

return (PyObject *)z;

}

3. 整数求幂

Objects/longobject.c

/* pow(v, w, x) */

static PyObject *

long_pow(PyObject *v, PyObject *w, PyObject *x)

{

PyLongObject *a, *b, *c; /* a,b,c = v,w,x */

int negativeOutput = 0; /* if x<0 return negative output */

PyLongObject *z = NULL; /* accumulated result */

Py_ssize_t i, j, k; /* counters */

PyLongObject *temp = NULL;

/* 5-ary values. If the exponent is large enough, table is

* precomputed so that table[i] == a**i % c for i in range(32).

PyLongObject *table[32] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};

/* a, b, c = v, w, x */

CHECK_BINOP(v, w);

a = (PyLongObject*)v; Py_INCREF(a);

b = (PyLongObject*)w; Py_INCREF(b);

if (PyLong_Check(x)) {

c = (PyLongObject *)x;

Py_INCREF(x);

}

else if (x == Py_None)

c = NULL;

else {

Py_DECREF(a);

Py_DECREF(b);

Py_RETURN_NOTIMPLEMENTED;

}

if (Py_SIZE(b) < 0) { /* if exponent is negative */

if (c) {

PyErr_SetString(PyExc_ValueError, "pow() 2nd argument "

"cannot be negative when 3rd argument specified");

goto Error;

}

else {

/* else return a float. This works because we know

that this calls float_pow() which converts its

arguments to double. */

Py_DECREF(a);

Py_DECREF(b);

return PyFloat_Type.tp_as_number->nb_power(v, w, x);

}

if (c) {

/* if modulus == 0:

raise ValueError() */

if (Py_SIZE(c) == 0) {

PyErr_SetString(PyExc_ValueError,

"pow() 3rd argument cannot be 0");

goto Error;

}

/* if modulus < 0:

negativeOutput = True

modulus = -modulus */

if (Py_SIZE(c) < 0) {

negativeOutput = 1;

temp = (PyLongObject *)_PyLong_Copy(c);

if (temp == NULL)

goto Error;

Py_DECREF(c);

c = temp;

temp = NULL;

_PyLong_Negate(&c);

if (c == NULL)

goto Error;

}

/* if modulus == 1:

return 0 */

if ((Py_SIZE(c) == 1) && (c->ob_digit[0] == 1)) {

z = (PyLongObject *)PyLong_FromLong(0L);

goto Done;

}

/* Reduce base by modulus in some cases:

1. If base < 0. Forcing the base non-negative makes things easier.

2. If base is obviously larger than the modulus. The "small

exponent" case later can multiply directly by base repeatedly,

while the "large exponent" case multiplies directly by base 31

times. It can be unboundedly faster to multiply by

base % modulus instead.

We could _always_ do this reduction, but l_divmod() isn't cheap,

so we only do it when it buys something. */

if (Py_SIZE(a) < 0 || Py_SIZE(a) > Py_SIZE(c)) {

if (l_divmod(a, c, NULL, &temp) < 0)

goto Error;

Py_DECREF(a);

a = temp;

temp = NULL;

}

/* At this point a, b, and c are guaranteed non-negative UNLESS

c is NULL, in which case a may be negative. */

z = (PyLongObject *)PyLong_FromLong(1L);

if (z == NULL)

goto Error;

/* Perform a modular reduction, X = X % c, but leave X alone if c

* is NULL.

#define REDUCE(X) \

do { \

if (c != NULL) { \

if (l_divmod(X, c, NULL, &temp) < 0) \

goto Error; \

Py_XDECREF(X); \

X = temp; \

temp = NULL; \

} \

} while(0)

/* Multiply two values, then reduce the result:

result = X*Y % c. If c is NULL, skip the mod. */

#define MULT(X, Y, result) \

do { \

temp = (PyLongObject *)long_mul(X, Y); \

if (temp == NULL) \

goto Error; \

Py_XDECREF(result); \

result = temp; \

temp = NULL; \

REDUCE(result); \

} while(0)

if (Py_SIZE(b) <= FIVEARY_CUTOFF) {

/* Left-to-right binary exponentiation (HAC Algorithm 14.79) */

/* http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf */

for (i = Py_SIZE(b) - 1; i >= 0; --i) {

digit bi = b->ob_digit[i];

for (j = (digit)1 << (PyLong_SHIFT-1); j != 0; j >>= 1) {

MULT(z, z, z);

if (bi & j)

MULT(z, a, z);

}

else {

/* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */

Py_INCREF(z); /* still holds 1L */

table[0] = z;

for (i = 1; i < 32; ++i)

MULT(table[i-1], a, table[i]);

for (i = Py_SIZE(b) - 1; i >= 0; --i) {

const digit bi = b->ob_digit[i];

for (j = PyLong_SHIFT - 5; j >= 0; j -= 5) {

const int index = (bi >> j) & 0x1f;

for (k = 0; k < 5; ++k)

MULT(z, z, z);

if (index)

MULT(z, table[index], z);

}

if (negativeOutput && (Py_SIZE(z) != 0)) {

temp = (PyLongObject *)long_sub(z, c);

if (temp == NULL)

goto Error;

Py_DECREF(z);

z = temp;

temp = NULL;

}

goto Done;

Error:

Py_CLEAR(z);

/* fall through */

Done:

if (Py_SIZE(b) > FIVEARY_CUTOFF) {

for (i = 0; i < 32; ++i)

Py_XDECREF(table[i]);

}

Py_DECREF(a);

Py_DECREF(b);

Py_XDECREF(c);

Py_XDECREF(temp);

return (PyObject *)z;

}

徐聪瓜要努力

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫