一、初始化
初始化时只创建了存储一个token空间的二维数组。
Parser *
_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
int feature_version, int *errcode, PyArena *arena)
{
Parser *p = PyMem_Malloc(sizeof(Parser));
...
p->tokens = PyMem_Malloc(sizeof(Token *));
...
p->tokens[0] = PyMem_Calloc(1, sizeof(Token));
...
p->fill = 0;
p->size = 1;
...
return p;
}

二、填充
当调用PyTokenizer_Get获取一个token时,填充到分配的空间中
int
_PyPegen_fill_token(Parser *p)
{
const char *start;
const char *end;
int type = PyTokenizer_Get(p->tok, &start, &end);
...
// Check if we are at the limit of the token array capacity and resize if needed
if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
return -1;
}
Token *t = p->tokens[p->fill];
return initialize_token(p, t, start, end, type);
static int
initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) {
assert(token != NULL);
token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type;
token->bytes = PyBytes_FromStringAndSize(start, end - start);
if (token->bytes == NULL) {
return -1;
}
if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) {
Py_DECREF(token->bytes);
return -1;
}
token->level = p->tok->level;
const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start;
int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno;
int end_lineno = p->tok->lineno;
int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1;
int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1;
token->lineno = lineno;
token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset;
token->end_lineno = end_lineno;
token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset;
p->fill += 1;
if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
return raise_decode_error(p);
}
return (token_type == ERRORTOKEN ? tokenizer_error(p) : 0);
}
2.1 token结构
typedef struct {
int type;
PyObject *bytes;
int level;
int lineno, col_offset, end_lineno, end_col_offset;
Memo *memo;
} Token;

三、扩容
按照原先大小的2倍进行扩容
static int
_resize_tokens_array(Parser *p) {
int newsize = p->size * 2;
Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
if (new_tokens == NULL) {
PyErr_NoMemory();
return -1;
}
p->tokens = new_tokens;
for (int i = p->size; i < newsize; i++) {
p->tokens[i] = PyMem_Calloc(1, sizeof(Token));
if (p->tokens[i] == NULL) {
p->size = i; // Needed, in order to cleanup correctly after parser fails
PyErr_NoMemory();
return -1;
}
}
p->size = newsize;
return 0;
}
token存储&spm=1001.2101.3001.5002&articleId=122932315&d=1&t=3&u=7efe4ba247394db9b1ff20fa265b8dbc)
745

被折叠的 条评论
为什么被折叠?



