一个简单的解释器

实现一个简单的解释器

功能

  • 支持整形变量定义,int a
  • 支持赋值语句a = expression
  • 表达式可以使用变量
  • 支持AST的打印

文法

BNF规则描述文法

program::= statement+

statement::= intDeclaration | expressionStatement | assignStatement

intDeclaration::= INT ID ('=' expression)? ';'

expressionStatement::= expression ';'

assignStatement::= ID '=' expression ';'

expression::= expression1 ('+' expression1 | '-' expression1)* 

expression1::= expression2 ('*' expression2 | '/' expression2)*

expression2::= ID | NUM | '(' expression ')'

设计

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-fgk1TZow-1647876869933)(C:\Users\20580\AppData\Roaming\Typora\typora-user-images\image-20220318170659566.png)]

实现

词法分析模块

词法分析的过程是将字符流识别出来变成一个个的词素Token,这个Token的定义,组织为

/* 词法单元模块 */
using Name = std::string;

// Token的枚举
enum TokenID{
    INT,    // int
    ID,     // id
    NUM,    // number
    OTHER,  // Other Char + - * / = ;
    UNDEF   // un define
};

// Token的定义,Name为Token的字符值,id为Token的编号
struct Token{
    Name value;
    TokenID id;
    Token():id(UNDEF),value(""){};
};

在项目中,笔者写了三个接口,分别为:

read: 从字符流中读取一个Token

peek: 从字符流中前看一个Token

lookAhead:从字符流中前看n个Token

他的源代码为

// 读取一个Token
Token read(){
    Token token;
    while (*p != '\0'){
        char character = *p;
        if(char_set.find(character) != char_set.cend()){
            p++;
            token.id = OTHER;
            token.value.push_back(character);
            return token;
        }else{
            if(character == ' ' || character == '\r' || character == '\n' || character == '\t'){
                p++;
                continue;
            }
            // 识别数字
            if(isDig(character)){
                token.id = NUM;
                readDig(token);
                return token;
            }
            // 识别int 和 id
            readIdOrInt(token);
            return token;
        }
    }
    token.id = UNDEF;
    return token;
}

// 预读n个token
std::vector<Token> lookAhead(int n){
    char *peek = p;
    std::vector<Token> tokens = {};
    for (int i = 0; i < n; ++i){
        if(*p == '\0')
            break;
        tokens.push_back(read());
    }
    p = peek;
    return tokens;
}

// 预读一个token
Token peek(){
    char *peek = p;  // p指向字符流的指针
    Token t = read();
    p = peek;
    return t;
}

语法分析模块生成AST

根据上面的文法描述,从顶层文法一层一层往下递归,来实现整个语法分析的过程,并且递归的构造AST,根据AST结点类型的不同,对于AST的组织也是仅仅使用了一个简单的定义,并且AST的结点类型使用一个枚举类型来声明,AST的定义为

/*语法分析,AST生成模块*/
enum ASTNodeType{
    IntDeclaration,
    AssignStatement,
    ExpressionStatement,
    AddNode,
    SubNode,
    MulNode,
    DivNode,
    ELENode,
};


class SimpleASTNode{
public:
    SimpleASTNode(ASTNodeType type):ast_type(type),childs{}, token(){}
    void addChild(SimpleASTNode *node){
        childs.push_back(node);
    }

    // eval AST 执行AST, 打印结果
    int eval(){
        switch (ast_type) {
            case IntDeclaration:{
                printf("Decl var: %s\n", token.value.c_str());
                var_map[token.value] = 0;
                if(!childs.empty()){
                    childs[0]->eval();
                }
                break;
            }
            case AssignStatement:{
                if(var_map.find(token.value) == var_map.cend()){
                    printf("[ERROR]:Var %s is not define\n", token.value.c_str());
                    throw std::exception();
                }else{
                    int value = childs[0]->eval();
                    var_map[token.value] = value;
                    printf("Assign %s = %d\n", token.value.c_str(), value);
                }
                break;
            };
            case ExpressionStatement:{
                int value = childs[0]->eval();
                printf("expression res: %d\n", value);
                break;
            }
            case AddNode:{
                return childs[0]->eval() + childs[1]->eval();
            };
            case SubNode:{
                return childs[0]->eval() - childs[1]->eval();
            };
            case MulNode:{
                return childs[0]->eval() * childs[1]->eval();
            };
            case DivNode:{
                return childs[0]->eval() / childs[1]->eval();
            };
            case ELENode:{
                if(token.id == NUM)
                    return atoi(token.value.c_str());
                else if(token.id == ID){
                    if(var_map.find(token.value) == var_map.cend()){
                        printf("[ERROR]:Var %s is not define\n", token.value.c_str());
                        throw std::exception();
                    }
                    return var_map[token.value];
                }else{
                    printf("[ERROR]:Unknown error!\n");
                    throw std::exception();
                }
            };
            default:{
                std::cerr<<"[ERROR]:Unknown AST type"<<std::endl;
                return -1;
            }
        }
        return 0;
    }

    //printAST 打印AST
    void printAST(std::string tab){
        switch(ast_type){
            case IntDeclaration:{
                printf("%sIntDeclarationNode:%s\n", tab.c_str(), token.value.c_str());
                if(!childs.empty()){
                    tab.append("    ");
                    childs[0]->printAST(tab);
                }
                return;
            }
            case AssignStatement:{
                printf("%sAssignNode:%s\n", tab.c_str(), token.value.c_str());
                tab.append("    ");
                childs[0]->printAST(tab);
                return;
            }
            case ExpressionStatement:{
                printf("%sExpressionNode:\n", tab.c_str());
                tab.append("    ");
                childs[0]->printAST(tab);
                return;
            }
            case AddNode:{
                printf("%sAddNode:\n", tab.c_str());
                tab.append("    ");
                childs[0]->printAST(tab);
                childs[1]->printAST(tab);
                return;
            }
            case SubNode:{
                printf("%sSubNode:\n", tab.c_str());
                tab.append("    ");
                childs[0]->printAST(tab);
                childs[1]->printAST(tab);
                return;
            }
            case MulNode:{
                printf("%sMulNode:\n", tab.c_str());
                tab.append("    ");
                childs[0]->printAST(tab);
                childs[1]->printAST(tab);
                return;
            }
            case DivNode:{
                printf("%sDivNode:\n", tab.c_str());
                tab.append("    ");
                childs[0]->printAST(tab);
                childs[1]->printAST(tab);
                return;
            }
            case ELENode:{
                if(token.id == ID){
                    printf("%sIDNode:%s\n", tab.c_str(), token.value.c_str());
                }else if(token.id == NUM){
                    printf("%sNUMNode:%s\n", tab.c_str(), token.value.c_str());
                }
                return;
            }
        }
    }

    void setToken(Token tok){
        token.id = tok.id;
        token.value = tok.value;
    }
private:
    ASTNodeType ast_type;
    Token token;
    std::vector<SimpleASTNode*> childs;
};

基于上面的AST构造,依照文法, 从顶层往下层构造AST,每一个非终结符使用一个函数构造

// 表达式求值
SimpleASTNode* expression();
SimpleASTNode* expression1();
SimpleASTNode* expression2();

/**
 * 文法描述,采用扩展BNF写法
 * program::= statement+
 * statement::= intDeclaration | expressionStatement | assignStatement
 * intDeclaration::= INT ID ('=' expression)? ';'
 * expressionStatement::= expression ';'
 * assignStatement::= ID '=' expression ';'
 * expression::= expression1 ('+' expression1 | '-' expression1)*
 * expression1::= expression2 ('*' expression2 | '/' expression2)*
 * expression2::= ID | NUM | '(' expression ')'
 */

SimpleASTNode* expression2(){
    SimpleASTNode *node = nullptr;
    char *loc = p;
    Token token = peek();
    if(token.id == ID || token.id == NUM){
        read();
        if(token.id == ID){
            Token t = peek();
            if(t.value == "="){
                p = loc;
                return nullptr;
            }
        }
        node = new SimpleASTNode(ELENode);
        node->setToken(token);
        return node;
    }else if(token.value == "("){
        read();
        node = expression();
        if(node == nullptr){
            printf("[ERROR]:Undefined Statement\n");
            throw std::exception();
        }
        token = peek();
        if(token.value != ")"){
            printf("[ERROR]:Undefined Statement\n");
            throw std::exception();
        }
        read();
    }
    return node;
}

SimpleASTNode* expression1(){
    //expression1::= expression2 (* expression2 | / expression2)*
    SimpleASTNode *node = expression2();
    if(node == nullptr)
        return node;
    Token t = peek();
    SimpleASTNode *p = node;
    SimpleASTNode *root;
    while (t.id == OTHER && (t.value == "*" || t.value == "/")){
        if(t.value == "*"){
            root = new SimpleASTNode(MulNode);
        }else{
            root = new SimpleASTNode(DivNode);
        }
        root->addChild(p);
        read();
        node = expression2();
        if(node == nullptr){
            printf("[ERROR]:Undefined Statement\n");
            throw std::exception();
        }
        root->addChild(node);
        p = root;
        t = peek();
    }
    return p;
}

SimpleASTNode* expression(){
    // expression::= expression1 (+ expression1 | - expression1)*
    SimpleASTNode *node = expression1();
    if(node == nullptr)
        return node;
    Token t = peek();
    SimpleASTNode *p = node;
    SimpleASTNode *root;
    if(t.value == "="){
        printf("[ERROR]:Undefined Statement\n");
        throw std::exception();
    }
    while (t.id == OTHER && (t.value == "+" || t.value == "-")){

        if(t.value == "+"){
            root = new SimpleASTNode(AddNode);
        }else{
            root = new SimpleASTNode(SubNode);
        }
        root->addChild(p);
        read();
        node = expression1();
        if(node == nullptr){
            printf("[ERROR]:Undefined Statement\n");
            throw std::exception();
        }
        root->addChild(node);
        p = root;
        t = peek();
    }
    return p;
}

//
SimpleASTNode* intDeclaration(){
    SimpleASTNode *node = nullptr;
    Token token = peek();
    if(token.id == INT){
        read(); // 消耗一个字符
        node = new SimpleASTNode(IntDeclaration);
        token = read();
        if(token.id != ID){
            printf("[ERROR]:Undefined Statement\n");
            throw std::exception();
        }else{
            node->setToken(token);
            Token tokens = token;
            token = peek();
            if(token.id == OTHER && token.value == "="){
                // 添加子节点
                read();
                SimpleASTNode *childs = new SimpleASTNode(AssignStatement);
                childs->setToken(tokens);
                SimpleASTNode *child = expression();
                if(child != nullptr){
                    node->addChild(childs);
                    childs->addChild(child);

                }else{
                    printf("[ERROR]:Undefined Statement\n");
                    throw std::exception();
                }
            }else if(token.value != "" && token.value != ";"){
                printf("[ERROR]:Undefined Statement\n");
                throw std::exception();
            }
            token = read();
            if(token.value != ";"){
                printf("[ERROR]:Statement '%s' not have ';'\n", statement);
                throw std::exception();
            }
        }
    }
    return node;
}

SimpleASTNode* expressionStatement(){
    char *loc = p;
    SimpleASTNode *expr = new SimpleASTNode(ExpressionStatement);
    SimpleASTNode *node = expression();
    if(node != nullptr){
        Token token = read();
        if(token.value != ";"){
            printf("[ERROR]:Statement '%s' not have ';'\n", statement);
            throw std::exception();
        }
        expr->addChild(node);
        return expr;
    }else{
        p = loc;
    }
    return node;
}

SimpleASTNode* assignmentStatement(){
    SimpleASTNode *node = nullptr;
    Token token = peek();
    if(token.id == ID){
        read();
        node = new SimpleASTNode(AssignStatement);
        node->setToken(token);
        token = peek();
        if(token.value == "="){
            read();
            SimpleASTNode *child = expression();
            if(p == nullptr){
                printf("[ERROR]:Undefined Statement\n");
                throw std::exception();
            }else{
                node->addChild(child);
                token = read();
                if(token.value != ";"){
                    printf("[ERROR]:Statement '%s' not have ';'\n", statement);
                    throw std::exception();
                }
            }
        }else{
            printf("[ERROR]:Undefined Statement\n");
            throw std::exception();
        }
    }
    return node;
}

主程序

/*主函数*/
int main(int argc, char *argv[]) {
    bool verbose = false;
    if(argc >= 2){
        verbose = true;
    }
    statement = (char*)calloc(256,sizeof(char));
    printf("======== Simple Script Hello! ========\n");
    while(true){
        printf("@>>>");
        std::cin.getline(statement, 256);
        p = statement;
        if(strcmp(statement, "exit") == 0){
            printf("======== Simple Script Bye! ========\n");
            break;
        }
        SimpleASTNode *root = nullptr;
        try{
            root = intDeclaration();
            if(root != nullptr){
                root->eval();
                if(verbose){
                    printf("AST:\n");
                    root->printAST("    ");
                }
                continue;
            }
            root = expressionStatement();
            if(root != nullptr){
                root->eval();
                if(verbose){
                    printf("AST:\n");
                    root->printAST("    ");
                }
                continue;
            }
            root = assignmentStatement();
            if(root != nullptr){
                root->eval();
                if(verbose){
                    printf("AST:\n");
                    root->printAST("    ");
                };
                continue;
            }
        }catch(...){
            continue;
        }
    }
    return 0;
}

最后结果

image-20220321232939644

源码

arqqqq/SimpleScript: A simple script executor (github.com)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值