python3词法分析(五)token存储

一、初始化

初始化时只创建了存储一个token空间的二维数组。

Parser *
_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
                    int feature_version, int *errcode, PyArena *arena)
{
    Parser *p = PyMem_Malloc(sizeof(Parser));
    ...
    
    p->tokens = PyMem_Malloc(sizeof(Token *));
 	...
    p->tokens[0] = PyMem_Calloc(1, sizeof(Token));
    ...
    
    p->fill = 0;
    p->size = 1;

    ...
    
    return p;
}

请添加图片描述

二、填充

当调用PyTokenizer_Get获取一个token时,填充到分配的空间中

int
_PyPegen_fill_token(Parser *p)
 {
	const char *start;
    const char *end;
    int type = PyTokenizer_Get(p->tok, &start, &end);
...

   // Check if we are at the limit of the token array capacity and resize if needed
    if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
        return -1;
    }

    Token *t = p->tokens[p->fill];
    return initialize_token(p, t, start, end, type);
static int
initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) {
    assert(token != NULL);

    token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type;
    token->bytes = PyBytes_FromStringAndSize(start, end - start);
    if (token->bytes == NULL) {
        return -1;
    }

    if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) {
        Py_DECREF(token->bytes);
        return -1;
    }

    token->level = p->tok->level;

    const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start;
    int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno;
    int end_lineno = p->tok->lineno;

    int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1;
    int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1;

    token->lineno = lineno;
    token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset;
    token->end_lineno = end_lineno;
    token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset;

    p->fill += 1;

    if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
        return raise_decode_error(p);
    }

    return (token_type == ERRORTOKEN ? tokenizer_error(p) : 0);
}

2.1 token结构

typedef struct {
    int type;
    PyObject *bytes;
    int level;
    int lineno, col_offset, end_lineno, end_col_offset;
    Memo *memo;
} Token;

请添加图片描述

三、扩容

按照原先大小的2倍进行扩容

static int
_resize_tokens_array(Parser *p) {
    int newsize = p->size * 2;
    Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
    if (new_tokens == NULL) {
        PyErr_NoMemory();
        return -1;
    }
    p->tokens = new_tokens;

    for (int i = p->size; i < newsize; i++) {
        p->tokens[i] = PyMem_Calloc(1, sizeof(Token));
        if (p->tokens[i] == NULL) {
            p->size = i; // Needed, in order to cleanup correctly after parser fails
            PyErr_NoMemory();
            return -1;
        }
    }
    p->size = newsize;
    return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值