microPython的源码解析之 lexer.c

最新推荐文章于 2024-04-15 11:51:35 发布

openwin_top

最新推荐文章于 2024-04-15 11:51:35 发布

阅读量1.6k

点赞数 42

分类专栏： micropython 文章标签： c语言开发语言 python micropython c

本文链接：https://blog.csdn.net/zhangzhechun/article/details/137254893

版权

MicroPython 是一种适用于微控制器和其他受限环境的 Python 编程语言的实现。它旨在提供与 Python 3 语言的紧密兼容，同时考虑到内存和计算资源的限制。MicroPython 库是这门语言的核心组成部分，提供了一系列的模块和函数，使得开发者能够在硬件上执行各种任务。
下面将通过系列文章,逐一解读microPython,以便让读者了解掌握microPython的整个核心逻辑.,以便读者可以透过这个Python的最小内核,掌握Python解析器的核心实现逻辑,学习世界上最优秀的源码设计之一.

microPython Python最小内核源码解析

这段代码是MicroPython词法分析器的一部分，它负责将输入的源代码文本分解成一系列的标记（tokens）。词法分析器是编译器或解释器的第一个阶段，它读取源代码并将其分解成更小的、有意义的单元，以便后续的解析和编译阶段可以处理。代码中包含了许多辅助函数，用于处理各种语言结构，如字符串字面量、标识符、关键字、操作符等。此外，还提供了创建和释放词法分析器实例的函数，以及从不同来源（如内存、文件等）创建词法分析器的辅助函数。

#include <stdio.h>
#include <string.h>
#include <assert.h>

// 引入MicroPython的读取器、词法分析器和运行时环境的头文件
#include "py/reader.h"
#include "py/lexer.h"
#include "py/runtime.h"

#if MICROPY_ENABLE_COMPILER

// 定义制表符的宽度为8个字符
#define TAB_SIZE (8)

// 定义词法分析器结束符
#define MP_LEXER_EOF ((unichar)MP_READER_EOF)
// 获取当前字符
#define CUR_CHAR(lex) ((lex)->chr0)

// 判断当前是否为词法分析器的结束
static bool is_end(mp_lexer_t *lex) {
   
    return lex->chr0 == MP_LEXER_EOF;
}

// 判断当前字符是否为换行符
static bool is_physical_newline(mp_lexer_t *lex) {
   
    return lex->chr0 == '\n';
}

// 判断当前字符是否为指定字符
static bool is_char(mp_lexer_t *lex, byte c) {
   
    return lex->chr0 == c;
}

// 判断当前字符是否为指定字符之一
static bool is_char_or(mp_lexer_t *lex, byte c1, byte c2) {
   
    return lex->chr0 == c1 || lex->chr0 == c2;
}

// 判断当前字符是否为指定的三个字符之一
static bool is_char_or3(mp_lexer_t *lex, byte c1, byte c2, byte c3) {
   
    return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3;
}

// 如果启用了格式化字符串，添加判断当前字符是否为指定的四个字符之一
#if MICROPY_PY_FSTRINGS
static bool is_char_or4(mp_lexer_t *lex, byte c1, byte c2, byte c3, byte c4) {
   
    return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3 || lex->chr0 == c4;
}
#endif

// 判断下一个字符是否为指定字符
static bool is_char_following(mp_lexer_t *lex, byte c) {
   
    return lex->chr1 == c;
}

// 判断下一个字符是否为指定字符之一
static bool is_char_following_or(mp_lexer_t *lex, byte c1, byte c2) {
   
    return lex->chr1 == c1 || lex->chr1 == c2;
}

// 判断接下来的两个字符是否为指定字符之一
static bool is_char_following_following_or(mp_lexer_t *lex, byte c1, byte c2) {
   
    return lex->chr2 == c1 || lex->chr2 == c2;
}

// 判断当前和下一个字符是否分别为指定的两个字符
static bool is_char_and(mp_lexer_t *lex, byte c1, byte c2) {
   
    return lex->chr0 == c1 && lex->chr1 == c2;
}

// 判断当前字符是否为空白字符
static bool is_whitespace(mp_lexer_t *lex) {
   
    return unichar_isspace(lex->chr0);
}

// 判断当前字符是否为字母
static bool is_letter(mp_lexer_t *lex) {
   
    return unichar_isalpha(lex->chr0);
}

// 判断当前字符是否为数字
static bool is_digit(mp_lexer_t *lex) {
   
    return unichar_isdigit(lex->chr0);
}

// 判断下一个字符是否为数字
static bool is_following_digit(mp_lexer_t *lex) {
   
    return unichar_isdigit(lex->chr1);
}

// 判断下一个字符是否为基数字符（二进制、八进制、十六进制）
static bool is_following_base_char(mp_lexer_t *lex) {
   
    const unichar chr1 = lex->chr1 | 0x20;
    return chr1 == 'b' || chr1 == 'o' || chr1 == 'x';
}

// 判断下一个字符是否为八进制数字
static bool is_following_odigit(mp_lexer_t *lex) {
   
    return lex->chr1 >= '0' && lex->chr1 <= '7';
}

// 判断当前字符是否为字符串或字节类型的开始
static bool is_string_or_bytes(mp_lexer_t *lex) {
   
    return is_char_or(lex, '\'', '\"')
           #if MICROPY_PY_FSTRINGS
           || (is_char_or4(lex, 'r', 'u', 'b', 'f') && is_char_following_or(lex, '\'', '\"'))
           || (((is_char_and(lex, 'r', 'f') || is_char_and(lex, 'f', 'r'))
               && is_char_following_following_or(lex, '\'', '\"')))
           #else
           || (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"'))
           #endif
           || ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r'))
               && is_char_following_following_or(lex, '\'', '\"'));
}

// 判断当前字符是否为标识符的开头
static bool is_head_of_identifier(mp_lexer_t *lex) {
   
    return is_letter(lex) || lex->chr0 == '_' || lex->chr0 >= 0x80;
}

// 判断当前字符是否为标识符的一部分
static bool is_tail_of_identifier(mp_lexer_t *lex) {
   
    return is_head_of_identifier(lex) || is_digit(lex);
}

// 读取下一个字符
static void next_char(mp_lexer_t *lex) {
   
    // 如果当前字符是换行符，更新行号和列号
    if (lex->chr0 == '\n') {
   
        ++lex->line;
        lex->column = 1;
    } else if (lex->chr0 == '\t') {
   
        // 如果当前字符是制表符，根据制表符宽度调整列号
        lex->column = (((lex->column - 1 + TAB_SIZE) / TAB_SIZE) * TAB_SIZE) + 1;
    } else {
   
        // 否则，列号加1
        ++lex->column;
    }

    // 将输入队列向前移动一位
    lex->chr0 = lex->chr1;
    lex->chr1 = lex->chr2;

    // 从读取器或格式化字符串参数中读取下一个字节
    #if MICROPY_PY_FSTRINGS
    if (lex->fstring_args_idx) {
   
        // 如果有保存的字符，说明当前正在注入格式化字符串参数
        if (lex->fstring_args_idx < lex->fstring_args.len) {
   
            lex->chr2 = lex->fstring_args.buf[lex->fstring_args_idx++];
        } else {
   
            // 没有更多的格式化字符串参数字节
            lex->chr2 = '\0';
        }

        if (lex->chr0 == '\0') {
   
            // 消费完所有格式化字符串数据，恢复保存的输入队列
            lex->chr0 = lex->chr0_saved;
            lex->chr1 = lex->chr1_saved;
            lex->chr2 = lex->chr2_saved;
            // 停止消费格式化字符串参数数据
            vstr_reset(&lex->fstring_args);
            lex->fstring_args_idx = 0;
        }
    } else {
   
        // 否则，从读取器中读取下一个字节
        lex->chr2 = lex->reader.readbyte(lex->reader.data);
    }

    // 如果下一个字符是回车符，将其转换为换行符
    if (lex->chr1 == '\r') {
   
        lex->chr1 = '\n';
        if (lex->chr2 == '\n') {
   
            // 回车换行是单个换行，丢弃多余的换行符
            lex->chr2 = lex->reader.readbyte(lex->reader.data);
        }
    }

    // 检查是否需要在文件末尾插入换行符
    if (lex->chr2 == MP_LEXER_EOF && lex->chr1 != MP_LEXER_EOF && lex->chr1 != '\n') {
   
        lex->chr2 = '\n';
    }
}

// 压入缩进级别
static void indent_push(mp_lexer_t *lex, size_t indent) {
   
    if (lex->num_indent_level >= lex->alloc_indent_level) {
   
        // 如果当前的缩进级别数量超过了分配的级别，重新分配更多空间
        lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level, lex->alloc_indent_level + MICROPY_ALLOC_LEXEL_INDENT_INC);
        lex->alloc_indent_level += MICROPY_ALLOC_LEXEL_INDENT_INC;
    }
    lex->indent_level[lex->num_indent_level++] = indent;
}

// 获取当前的缩进级别
static size_t indent_top(mp_lexer_t *lex) {
   
    return lex->indent_level[lex->num_indent_level - 1];
}

// 弹出缩进级别
static void indent_pop(mp_lexer_t *lex) {
   
    lex->num_indent_level -= 1;
}

// 操作符编码，用于快速匹配和识别操作符
// 每个操作符由一个或多个字符组成，根据字符的位置确定操作符的开始、结束或继续
static const char *const tok_enc =
    "()[]{},;~"   // 单个字符的操作符
    ":e="         // : :=
    "<e=c<e="     // < <= << <<=
    ">e=c>e="     // > >= >> >>=
    "*e=c*e="     // * *= ** **=
    "+e="         // + +=
    "-e=e>"       // - -= ->
    "&e="         // & &=
    "|e="         // | |=
    "/e=c/e="     // / /= // //=
    "%e="         // % %=
    "^e="         // ^ ^=
    "@e="         // @ @=
    "=e="         // = ==
    "!.";         // 特殊情况的开始：!= . ...

// 根据操作符编码确定操作符的类型
static const uint8_t tok_enc_kind[] = {
   
    MP_TOKEN_DEL_PAREN_OPEN, MP_TOKEN_DEL_PAREN_CLOSE,
    MP_TOKEN_DEL_BRACKET_OPEN, MP_TOKEN_DEL_BRACKET_CLOSE,
    MP_TOKEN_DEL_BRACE_OPEN, MP_TOKEN_DEL_BRACE_CLOSE,
    MP_TOKEN_DEL_COMMA, MP_TOKEN_DEL_SEMICOLON, MP_TOKEN_OP_TILDE,

    MP_TOKEN_DEL_COLON, MP_TOKEN_OP_ASSIGN,
    MP_TOKEN_OP_LESS, MP_TOKEN_OP_LESS_EQUAL, MP_TOKEN_OP_DBL_LESS, MP_TOKEN_DEL_DBL_LESS_EQUAL,
    MP_TOKEN_OP_MORE, MP_TOKEN_OP_MORE_EQUAL, MP_TOKEN_OP_DBL_MORE, MP_TOKEN_DEL_DBL_MORE_EQUAL,
    MP_TOKEN_OP_STAR, MP_TOKEN_DEL_STAR_EQUAL, MP_TOKEN_OP_DBL_STAR, MP_TOKEN_DEL_DBL_STAR_EQUAL,
    MP_TOKEN_OP_PLUS, MP_TOKEN_DEL_PLUS_EQUAL,
    MP_TOKEN_OP_MINUS, MP_TOKEN_DEL_MINUS_EQUAL, MP_TOKEN_DEL_MINUS_MORE,
    MP_TOKEN_OP_AMPERSAND, MP_TOKEN_DEL_AMPERSAND_EQUAL,
    MP_TOKEN_OP_PIPE, MP_TOKEN_DEL_PIPE_EQUAL,
    MP_TOKEN_OP_SLASH, MP_TOKEN_DEL_SLASH_EQUAL, MP_TOKEN_OP_DBL_SLASH, MP_TOKEN_DEL_DBL_SLASH_EQUAL,
    MP_TOKEN_OP_PERCENT, MP_TOKEN_DEL_PERCENT_EQUAL,
    MP_TOKEN_OP_CARET, MP_TOKEN_DEL_CARET_EQUAL,
    MP_TOKEN_OP_AT, MP_TOKEN_DEL_AT_EQUAL,
    MP_TOKEN_DEL_EQUAL, MP_TOKEN_OP_DBL_EQUAL,
};

// 关键字列表，必须与lexer.h中的枚举顺序相同，并按strcmp排序
static const char *const tok_kw[] = {
   
    "False", "None"

最低0.47元/天解锁文章

openwin_top

关注

42
点赞
踩
12

收藏

觉得还不错? 一键收藏
打赏
0
评论
microPython的源码解析之 lexer.c

MicroPython 是一种适用于微控制器和其他受限环境的 Python 编程语言的实现。它旨在提供与 Python 3 语言的紧密兼容，同时考虑到内存和计算资源的限制。MicroPython 库是这门语言的核心组成部分，提供了一系列的模块和函数，使得开发者能够在硬件上执行各种任务。
复制链接

扫一扫