python中字符串实现原理_Python原理与实现:字符串子串

a='dddddddddd'

a=a[1:6]

0 LOAD_CONST 0 ('dddddddddd')

2 STORE_NAME 0 (a)

4 LOAD_NAME 0 (a)

6 LOAD_CONST 1 (1)

8 LOAD_CONST 2 (6)

10 BUILD_SLICE 2

12 BINARY_SUBSCR

14 STORE_NAME 0 (a)

16 LOAD_CONST 3 (None)

18 RETURN_VALUE

【BUILD_SLICE】

PyObject *start, *stop, *step, *slice;

if (oparg == 3)

step = POP();

else

step = NULL;

stop = POP();

start = TOP();

slice = PySlice_New(start, stop, step);

Py_DECREF(start);

Py_DECREF(stop);

Py_XDECREF(step);

SET_TOP(slice);

if (slice == NULL)

goto error;

DISPATCH();

【BINARY_SUBSCR】

PyObject *sub = POP();

PyObject *container = TOP();

PyObject *res = PyObject_GetItem(container, sub);

Py_DECREF(container);

Py_DECREF(sub);

SET_TOP(res);

if (res == NULL)

goto error;

DISPATCH();

//有删减

PyObject *

PyObject_GetItem(PyObject *o, PyObject *key)

{

PyMappingMethods *m;

PySequenceMethods *ms;

//字符串用此方式

//mapping.mp_subscript

m = Py_TYPE(o)->tp_as_mapping;

if (m && m->mp_subscript) {

PyObject *item = m->mp_subscript(o, key);//调用函数->unicode_subscript

assert((item != NULL) ^ (PyErr_Occurred() != NULL));

return item;

}

//sequence.sq_item

ms = Py_TYPE(o)->tp_as_sequence;

if (ms && ms->sq_item) {

'''

}

if (PyType_Check(o)) {

'''

}

return type_error("'%.200s' object is not subscriptable", o);

}

//有删减

static PyObject*

unicode_subscript(PyObject* self, PyObject* item)

{

//单个索引

if (PyIndex_Check(item)) {

Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);

if (i == -1 && PyErr_Occurred())

return NULL;

if (i < 0)

i += PyUnicode_GET_LENGTH(self);

return unicode_getitem(self, i);

//Slice类型

}else if (PySlice_Check(item)) {

Py_ssize_t start, stop, step, slicelength, i;

size_t cur;

PyObject *result;

void *src_data, *dest_data;

int src_kind, dest_kind;

Py_UCS4 ch, max_char, kind_limit;

//获取对应值

if (PySlice_Unpack(item, &start, &stop, &step) < 0) {

return NULL;

}

slicelength = PySlice_AdjustIndices(PyUnicode_GET_LENGTH(self),

&start, &stop, step);

//空串情况

if (slicelength <= 0) {

_Py_RETURN_UNICODE_EMPTY();

//不操作情况

} else if (start == 0 && step == 1 &&

slicelength == PyUnicode_GET_LENGTH(self)) {

return unicode_result_unchanged(self);

//step为1情况

} else if (step == 1) {

return PyUnicode_Substring(self,

start, start + slicelength);

}

//通常情况

src_kind = PyUnicode_KIND(self);

src_data = PyUnicode_DATA(self);

if (!PyUnicode_IS_ASCII(self)) {

kind_limit = kind_maxchar_limit(src_kind);

max_char = 0;

for (cur = start, i = 0; i < slicelength; cur += step, i++) {

ch = PyUnicode_READ(src_kind, src_data, cur);

if (ch > max_char) {

max_char = ch;

if (max_char >= kind_limit)

break;

}

}

}

else

max_char = 127;

result = PyUnicode_New(slicelength, max_char);

if (result == NULL)

return NULL;

dest_kind = PyUnicode_KIND(result);

dest_data = PyUnicode_DATA(result);

for (cur = start, i = 0; i < slicelength; cur += step, i++) {

Py_UCS4 ch = PyUnicode_READ(src_kind, src_data, cur);

PyUnicode_WRITE(dest_kind, dest_data, i, ch);

}

assert(_PyUnicode_CheckConsistency(result, 1));

return result;

//其他类型

} else {

PyErr_SetString(PyExc_TypeError, "string indices must be integers");

return NULL;

}

}

//索引情况

//有删减

static PyObject *

unicode_getitem(PyObject *self, Py_ssize_t index)

{

void *data;

enum PyUnicode_Kind kind;

Py_UCS4 ch;

'''

kind = PyUnicode_KIND(self);//类型

data = PyUnicode_DATA(self);//buffer

ch = PyUnicode_READ(kind, data, index);

return unicode_char(ch);

}

#define PyUnicode_READ(kind, data, index) \

((Py_UCS4) \

((kind) == PyUnicode_1BYTE_KIND ? \

((const Py_UCS1 *)(data))[(index)] : \ //UCS1

((kind) == PyUnicode_2BYTE_KIND ? \

((const Py_UCS2 *)(data))[(index)] : \ //UCS2

((const Py_UCS4 *)(data))[(index)] \ //UCS4

) \

))

//step为1情况

//有删减

PyObject*

PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)

{

unsigned char *data;

int kind;

Py_ssize_t length;

length = PyUnicode_GET_LENGTH(self);

end = Py_MIN(end, length);

//不操作

if (start == 0 && end == length)

return unicode_result_unchanged(self);

//非法范围

if (start < 0 || end < 0) {

PyErr_SetString(PyExc_IndexError, "string index out of range");

return NULL;

}

//非法范围

if (start >= length || end < start)

_Py_RETURN_UNICODE_EMPTY();

length = end - start;

//ascii处理

if (PyUnicode_IS_ASCII(self)) {

data = PyUnicode_1BYTE_DATA(self);

return _PyUnicode_FromASCII((char*)(data + start), length);

}

//其他编码处理

else {

kind = PyUnicode_KIND(self);

data = PyUnicode_1BYTE_DATA(self);

return PyUnicode_FromKindAndData(kind,

data + kind * start,

length);

}

}

//ascii类型处理

PyObject*

_PyUnicode_FromASCII(const char *buffer, Py_ssize_t size)

{

const unsigned char *s = (const unsigned char *)buffer;

PyObject *unicode;

//单个字符

if (size == 1) {

return get_latin1_char(s[0]);

}

//创建新对象拷贝

unicode = PyUnicode_New(size, 127);

if (!unicode)

return NULL;

memcpy(PyUnicode_1BYTE_DATA(unicode), s, size);

assert(_PyUnicode_CheckConsistency(unicode, 1));

return unicode;

}

//unicode类型的处理

PyObject*

PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)

{

switch (kind) {

case PyUnicode_1BYTE_KIND:

return _PyUnicode_FromUCS1(buffer, size);

case PyUnicode_2BYTE_KIND:

return _PyUnicode_FromUCS2(buffer, size);

case PyUnicode_4BYTE_KIND:

return _PyUnicode_FromUCS4(buffer, size);

default:

PyErr_SetString(PyExc_SystemError, "invalid kind");

return NULL;

}

}

//US1

static PyObject*

_PyUnicode_FromUCS1(const Py_UCS1* u, Py_ssize_t size)

{

PyObject *res;

unsigned char max_char;

//空串

if (size == 0)

_Py_RETURN_UNICODE_EMPTY();

assert(size > 0);

//单个字符

if (size == 1)

return get_latin1_char(u[0]);

//编码最大值

max_char = ucs1lib_find_max_char(u, u + size);

res = PyUnicode_New(size, max_char);

#拷贝到新对象

memcpy(PyUnicode_1BYTE_DATA(res), u, size);

return res;

}

//US2

static PyObject*

_PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)

{

PyObject *res;

Py_UCS2 max_char;

//空串

if (size == 0)

_Py_RETURN_UNICODE_EMPTY();

assert(size > 0);

//单个字符

if (size == 1)

return unicode_char(u[0]);

//编码最大值

max_char = ucs2lib_find_max_char(u, u + size);

res = PyUnicode_New(size, max_char);

//一般情况

if (max_char >= 256)

#拷贝到新对象

memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2)*size);

//编码最大值过小转换编码

else {

_PyUnicode_CONVERT_BYTES(

Py_UCS2, Py_UCS1, u, u + size, PyUnicode_1BYTE_DATA(res));

}

return res;

}

//US4

static PyObject*

_PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)

{

PyObject *res;

Py_UCS4 max_char;

//空串

if (size == 0)

_Py_RETURN_UNICODE_EMPTY();

//单个字符

if (size == 1)

return unicode_char(u[0]);

//编码最大值

max_char = ucs4lib_find_max_char(u, u + size);

res = PyUnicode_New(size, max_char);

//编码最大值过小转换编码

if (max_char < 256)

_PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS1, u, u + size,

PyUnicode_1BYTE_DATA(res));

//编码最大值过小转换编码

else if (max_char < 0x10000)

_PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS2, u, u + size,

PyUnicode_2BYTE_DATA(res));

//一般情况

else

#拷贝到新对象

memcpy(PyUnicode_4BYTE_DATA(res), u, sizeof(Py_UCS4)*size);

return res;

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值