python3词法分析(四)保留字以及关键字

词法分析时识别出来的标识符并没有区分是关键字还是普通的变量名,都识别为NAME,那是如何区分的呢?

一、定义关键词

Parser/parser.c

static const int n_keyword_lists = 9;
static KeywordToken *reserved_keywords[] = {
    (KeywordToken[]) {{NULL, -1}},
    (KeywordToken[]) {{NULL, -1}},
    (KeywordToken[]) {
        {"if", 510},
        {"in", 518},
        {"as", 520},
        {"is", 530},
        {"or", 531},
        {NULL, -1},
    },
    (KeywordToken[]) {
        {"del", 503},
        {"try", 511},
        {"for", 517},
        {"def", 526},
        {"not", 529},
        {"and", 532},
        {NULL, -1},
    },
    (KeywordToken[]) {
        {"pass", 502},
        {"from", 514},
        {"elif", 515},
        {"else", 516},
        {"with", 519},
        {"None", 523},
        {"True", 524},
        {NULL, -1},
    },
    (KeywordToken[]) {
        {"raise", 501},
        {"yield", 504},
        {"break", 506},
        {"while", 512},
        {"False", 525},
        {"class", 527},
        {NULL, -1},
    },
    (KeywordToken[]) {
        {"return", 500},
        {"assert", 505},
        {"global", 508},
        {"import", 513},
        {"except", 521},
        {"lambda", 528},
        {NULL, -1},
    },
    (KeywordToken[]) {
        {"finally", 522},
        {NULL, -1},
    },
    (KeywordToken[]) {
        {"continue", 507},
        {"nonlocal", 509},
        {NULL, -1},
    },
};
static char *soft_keywords[] = {
    "_",
    "case",
    "match",
    NULL,
};

1.1 关键词如何生成的?

通过脚本Tools/peg_generator/pegen/c_generator.py从Grammar/python.gram 中提取并生成关键词列表

  • Makefile
.PHONY: regen-pegen
regen-pegen:
	@$(MKDIR_P) $(srcdir)/Parser
	PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -q c \
		$(srcdir)/Grammar/python.gram \
		$(srcdir)/Grammar/Tokens \
		-o $(srcdir)/Parser/parser.new.c
	$(UPDATE_FILE) $(srcdir)/Parser/parser.c $(srcdir)/Parser/parser.new.c
  • Tools/peg_generator/pegen/__main.py__
argparser = argparse.ArgumentParser(
    prog="pegen", description="Experimental PEG-like parser generator"
)
argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
argparser.add_argument(
    "-v",
    "--verbose",
    action="count",
    default=0,
    help="Print timing stats; repeat for more debug output",
)
subparsers = argparser.add_subparsers(help="target language for the generated code")

c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
c_parser.set_defaults(func=generate_c_code)
c_parser.add_argument("grammar_filename", help="Grammar description")
c_parser.add_argument("tokens_filename", help="Tokens description")
c_parser.add_argument(
    "-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
)
c_parser.add_argument(
    "--compile-extension",
    action="store_true",
    help="Compile generated C code into an extension module",
)
c_parser.add_argument(
    "--optimized", action="store_true", help="Compile the extension in optimized mode"
)
c_parser.add_argument(
    "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
)
  • Tools/peg_generator/pegen/__main.py__ : main()
def main() -> None:
    from pegen.testutil import print_memstats

    args = argparser.parse_args()
    if "func" not in args:
        argparser.error("Must specify the target language mode ('c' or 'python')")

    t0 = time.time()
    grammar, parser, tokenizer, gen = args.func(args)
    ...
  • Tools/peg_generator/pegen/__main.py__ : generate_c_code()

  • Tools/peg_generator/pegen/build.py : build_c_parser_and_generator()

  • Tools/peg_generator/pegen/build.py : build_c_generator()

  • Tools/peg_generator/pegen/c_generator.py : generate()

二、解析器初始化时赋值

Parser/parser.c

void *
_PyPegen_parse(Parser *p)
{
    // Initialize keywords
    p->keywords = reserved_keywords;
    p->n_keyword_lists = n_keyword_lists;
    p->soft_keywords = soft_keywords;
...
}

三、查找是否为关键词

Parser/pegen.c
根据识别出来的标识符,去关键词列表中查询,为了方便以及快速的查询,根据关键词的长度进行分类。

static int
_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
{
    assert(name_len > 0);
    if (name_len >= p->n_keyword_lists ||
        p->keywords[name_len] == NULL ||
        p->keywords[name_len]->type == -1) {
        return NAME;
    }
    for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) {
        if (strncmp(k->str, name, name_len) == 0) {
            return k->type;
        }
    }
    return NAME;
}

四、关键词结构

typedef struct {
    char *str;
    int type;
} KeywordToken;

reserved_keywords是一个指针数组中下标就表示关键词的长度,长度相同的关键词放在同一个下标中的同一个数组中。

请添加图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值