python字符串计算加法_深入探究Python:字符串加法

从Python3字符串统一使用Unicode编码

1.字符串加法

#源码

a="dddddddddd"

b="sssssssssss"

a = a+b

#字节码

0 LOAD_NAME 0 (a)

2 LOAD_NAME 1 (b)

4 BINARY_ADD

6 STORE_NAME 0 (a)

8 LOAD_CONST 0 (None)

10 RETURN_VALUE

【BINARY_ADD】#源码有删减

PyObject *right = POP();#出栈right

PyObject *left = TOP();#指向栈顶left

PyObject *sum;#新对象sum

#针对字符串加法运算的优化

if (PyUnicode_CheckExact(left) &&PyUnicode_CheckExact(right)) {

sum = unicode_concatenate(tstate, left, right, f, next_instr);

}

else {

'''

}

SET_TOP(sum);#重新设置栈顶

//源码有删减

static PyObject *

unicode_concatenate(PyThreadState *tstate, PyObject *v, PyObject *w,

PyFrameObject *f, const _Py_CODEUNIT *next_instr)

{

PyObject *res;

if (Py_REFCNT(v) == 2) {

int opcode, oparg;#字节码指令和指令参数

NEXTOPARG();#获取下一字节码指令

switch (opcode) {#根据下一字节码指令进行优化

case STORE_FAST:

{

''''

}

case STORE_DEREF:

{

''''

}

case STORE_NAME:#这是我们执行的情况

{

PyObject *names = f->f_code->co_names;#获取code对象name元组

PyObject *name = GETITEM(names, oparg);#获取参数名

PyObject *locals = f->f_locals;#获取局部参数字典

if (locals && PyDict_CheckExact(locals)) {

PyObject *w = PyDict_GetItemWithError(locals, name);

#两种情况直接返回

if ((w == v && PyDict_DelItem(locals, name) != 0) ||

(w == NULL && _PyErr_Occurred(tstate)))

{

Py_DECREF(v);

return NULL;

}

}

break;

}

}

}

res = v;

PyUnicode_Append(&res, w);#通常情况调用的函数

return res;

}

//源码有删减

void

PyUnicode_Append(PyObject **p_left, PyObject *right)

{

PyObject *left, *res;

Py_UCS4 maxchar, maxchar2;

Py_ssize_t left_len, right_len, new_len;

'''

'''

/* Shortcuts */

if (left == unicode_empty) { #左为''情况

Py_DECREF(left);

Py_INCREF(right);

*p_left = right;

return;

}

if (right == unicode_empty) #右为''情况

return;

#拼接后长度计算

left_len = PyUnicode_GET_LENGTH(left);

right_len = PyUnicode_GET_LENGTH(right);

if (left_len > PY_SSIZE_T_MAX - right_len) {

#PY_SSIZE_T_MAX 7FFF FFFF‬也就是(unsigned int)/2 - 1,字符串最大长度

goto error;

}

new_len = left_len + right_len;#拼接后长度

if (unicode_modifiable(left) #左类型检查

&& PyUnicode_CheckExact(right)#右类型检查

#左类型必须是和右类型相同的类型或先后兼容的类型(1:Py_UCS1,2:Py_UCS2,4:Py_UCS4)

&& PyUnicode_KIND(right) <= PyUnicode_KIND(left)

#而且两种类型必须都不是ascii码类型,也就是类型0

&& !(PyUnicode_IS_ASCII(left) && !PyUnicode_IS_ASCII(right)))

{

#动态扩展左操作字符串空间大小

if (unicode_resize(p_left, new_len) != 0)

goto error;

#快速内存字符串拷贝

_PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len);

}

else {

#类型不兼容或有操作串是ascii码的情况

maxchar = PyUnicode_MAX_CHAR_VALUE(left);

maxchar2 = PyUnicode_MAX_CHAR_VALUE(right);

maxchar = Py_MAX(maxchar, maxchar2);

#创建一个新对象,

res = PyUnicode_New(new_len, maxchar);

if (res == NULL)

goto error;

#先拷贝左边

_PyUnicode_FastCopyCharacters(res, 0, left, 0, left_len);

#再拷贝右边

_PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len);

#左指向新对象了,以前的对象就被垃圾回收了

*p_left = res;

}

return;

error:

Py_CLEAR(*p_left);

}

//源码有删减

static int

unicode_resize(PyObject **p_unicode, Py_ssize_t length)

{

PyObject *unicode;

Py_ssize_t old_length;

unicode = *p_unicode;

#获取传入unicode长度

if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND)

old_length = PyUnicode_WSTR_LENGTH(unicode);

else

old_length = PyUnicode_GET_LENGTH(unicode);

#不用处理

if (old_length == length)

return 0;

#长度为0,指向empty对象

if (length == 0) {

_Py_INCREF_UNICODE_EMPTY();

if (!unicode_empty)

return -1;

Py_SETREF(*p_unicode, unicode_empty);

return 0;

}

#不能更改的情况

if (!unicode_modifiable(unicode)) {

#创建一个新unicode对象,并拷贝原字符串内容

PyObject *copy = resize_copy(unicode, length);

if (copy == NULL)

return -1;

return 0;

}

#空间足够的情况

if (PyUnicode_IS_COMPACT(unicode)) {

#调用操作系统relloc函数重新分配内存

PyObject *new_unicode = resize_compact(unicode, length);

if (new_unicode == NULL)

return -1;

*p_unicode = new_unicode;

return 0;

}

#调用操作系统relloc函数重新分配内存

return resize_inplace(unicode, length);

}

//有删减

static PyObject*

resize_compact(PyObject *unicode, Py_ssize_t length)

{

Py_ssize_t char_size;

Py_ssize_t struct_size;

Py_ssize_t new_size;

int share_wstr;

PyObject *new_unicode;

char_size = PyUnicode_KIND(unicode);

//ascii类型长度

if (PyUnicode_IS_ASCII(unicode))

struct_size = sizeof(PyASCIIObject);

//unicode类型长度

else

struct_size = sizeof(PyCompactUnicodeObject);

share_wstr = _PyUnicode_SHARE_WSTR(unicode);

//超过最大长度

if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) {

PyErr_NoMemory();

return NULL;

}

//新的大小

new_size = (struct_size + (length + 1) * char_size);

if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {

PyObject_DEL(_PyUnicode_UTF8(unicode));

_PyUnicode_UTF8(unicode) = NULL;

_PyUnicode_UTF8_LENGTH(unicode) = 0;

}

//调用relloc函数分配

new_unicode = (PyObject *)PyObject_REALLOC(unicode, new_size);

if (new_unicode == NULL) {

_Py_NewReference(unicode);

PyErr_NoMemory();

return NULL;

}

unicode = new_unicode;

//设置length长度

_PyUnicode_LENGTH(unicode) = length;

//unicode编码设置长度

if (share_wstr) {

_PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode);

if (!PyUnicode_IS_ASCII(unicode))

_PyUnicode_WSTR_LENGTH(unicode) = length;

}

//其他情况

else if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) {

PyObject_DEL(_PyUnicode_WSTR(unicode));

_PyUnicode_WSTR(unicode) = NULL;

if (!PyUnicode_IS_ASCII(unicode))

_PyUnicode_WSTR_LENGTH(unicode) = 0;

}

#define PyUnicode_WRITE(kind, data, index, value) \

do { \

switch ((kind)) { \

case PyUnicode_1BYTE_KIND: { \

((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \

break; \

} \

case PyUnicode_2BYTE_KIND: { \

((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \

break; \

} \

default: { \

assert((kind) == PyUnicode_4BYTE_KIND); \

((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \

} \

} \

} while (0)

PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),

length, 0);

return unicode;

}

static int

resize_inplace(PyObject *unicode, Py_ssize_t length)

{

wchar_t *wstr;

Py_ssize_t new_size;

//一般情况

if (PyUnicode_IS_READY(unicode)) {

Py_ssize_t char_size;

int share_wstr, share_utf8;

void *data;

data = _PyUnicode_DATA_ANY(unicode);//data

char_size = PyUnicode_KIND(unicode);//size

share_wstr = _PyUnicode_SHARE_WSTR(unicode);//wstr

share_utf8 = _PyUnicode_SHARE_UTF8(unicode);//utf-8

//超过最大长度

if (length > (PY_SSIZE_T_MAX / char_size - 1)) {

PyErr_NoMemory();

return -1;

}

//新的长度

new_size = (length + 1) * char_size;

//不是utf8类型却有其内存

if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))

{

PyObject_DEL(_PyUnicode_UTF8(unicode));

_PyUnicode_UTF8(unicode) = NULL;

_PyUnicode_UTF8_LENGTH(unicode) = 0;

}

//调用relloc

data = (PyObject *)PyObject_REALLOC(data, new_size);

_PyUnicode_DATA_ANY(unicode) = data;

//wstr类型

if (share_wstr) {

_PyUnicode_WSTR(unicode) = data;

_PyUnicode_WSTR_LENGTH(unicode) = length;

}

//uft8类型

if (share_utf8) {

_PyUnicode_UTF8(unicode) = data;

_PyUnicode_UTF8_LENGTH(unicode) = length;

}

//设置长度

_PyUnicode_LENGTH(unicode) = length;

PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);

if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) {

assert(_PyUnicode_CheckConsistency(unicode, 0));

return 0;

}

}

//长度超过最大长度

/* check for integer overflow */

if (length > PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) - 1) {

PyErr_NoMemory();

return -1;

}

//重新新建对象,当成wstr类型处理

new_size = sizeof(wchar_t) * (length + 1);

wstr = _PyUnicode_WSTR(unicode);

wstr = PyObject_REALLOC(wstr, new_size);

if (!wstr) {

PyErr_NoMemory();

return -1;

}

_PyUnicode_WSTR(unicode) = wstr;

_PyUnicode_WSTR(unicode)[length] = 0;

_PyUnicode_WSTR_LENGTH(unicode) = length;

return 0;

}

static PyObject*

resize_copy(PyObject *unicode, Py_ssize_t length)

{

Py_ssize_t copy_length;

//不是wstr类型

if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND) {

PyObject *copy;

//新建对象

copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode));

//长度

copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode));

//快速内存拷贝

_PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length);

return copy;

}

//当成wstr类型处理

else {

PyObject *w;

w = (PyObject*)_PyUnicode_New(length);

if (w == NULL)

return NULL;

copy_length = _PyUnicode_WSTR_LENGTH(unicode);

copy_length = Py_MIN(copy_length, length);

memcpy(_PyUnicode_WSTR(w), _PyUnicode_WSTR(unicode),

copy_length * sizeof(wchar_t));

return w;

}

}

标签:PyUnicode,return,Python,Py,length,PyObject,探究,unicode,加法

来源: https://blog.csdn.net/qq_33913982/article/details/104757198

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值