bison flex 实现tiny语言的编译器

bison flex 实现tiny语言的编译器

项目地址:tiny-compiler
完成了词法分析,语法分析,中间代码生成,虚拟机执行,没有进行类型检查、错误处理和中间代码优化。

词法分析

%{
#include <iostream>
#include "tiny-parser.tab.h"
using namespace std;
/* lexeme of identifier or reserved word */

extern FILE *fin; /* we read from this file */

extern int current_lineno ;

#undef YY_INPUT
#define YY_INPUT(buf,result,max_size) \
	if ( (result = fread( (char*)buf, sizeof(char), max_size, fin)) < 0) \
		YY_FATAL_ERROR( "read() in flex scanner failed");
%}

digit       [0-9]
number      {digit}+
letter      [a-zA-Z]
identifier  {letter}+
whitespace  [ \t]+

%%

"if"            {return IF_T;}
"then"          {return THEN;}
"else"          {return ELSE;}
"end"           {return END;}
"repeat"        {return REPEAT;}
"until"         {return UNTIL;}
"read"          {return READ;}
"write"         {return WRITE;}
":="            {return ASSIGN;}
"="             {return EQ;}
"<"             {return LT;}
"+"             {return PLUS;}
"-"             {return MINUS;}
"*"             {return TIMES;}
"/"             {return OVER;}
"("             {return LPAREN;}
")"             {return RPAREN;}
";"             {return SEMI;}
{number}        {return NUM;}
{identifier}    {return ID;}
\n             {current_lineno++;}
{whitespace}    {/* skip whitespace */}
\{[^\}]*\}          { 
                    if(yytext[0] == '\n') {
                        current_lineno++;
                    }
                }
"$"             {return EOF;}
"}"             {return ERROR;}
.               {return ERROR;}

%%

这里需要注意两个点,

  1. 定义了current_lineno变量,用于记录当前行号。每次碰到\n都是需要加一的
  2. YY_INPUT 宏,用于指定从哪里读入数据,具体的File * fin在main函数中设置。

语法分析

代码

%{
#include "TreeNode.h"
#define YYSTYPE TreeNode *
#include <string>

using namespace std;

extern int current_lineno ;
extern char *yytext;

int yylex();
int yyerror(const char *s);

static char saved_name[256]; /* for use in assignments */
static int saved_lineNo;  /* ditto */
extern TreeNode * root; /* stores syntax tree for later return */

%}

%token IF_T THEN ELSE END REPEAT UNTIL READ WRITE
%token ID NUM 
%token ASSIGN EQ LT PLUS MINUS TIMES OVER LPAREN RPAREN SEMI
%token ERROR 

%% /* Grammar for TINY */

program     : stmt_seq {root = $1; }
            ;
stmt_seq    : stmt_seq SEMI stmt {$1->children.push_back( $3);
                                $$ = $1;
                                   }
            | stmt  { $$=new_stmt_node();
                     $$->children.push_back( $1);
             }
            ;
stmt        : if_stmt { $$ = $1; }
            | repeat_stmt { $$ = $1; }
            | assign_stmt { $$ = $1; }
            | read_stmt { $$ = $1; }
            | write_stmt { $$ = $1; }
            | error  { $$ = NULL; }
            ;
if_stmt     : IF_T exp THEN stmt_seq END     
                 { 
                  $$ = new_if_stmt(current_lineno, $2, $4);
                 }
            | IF_T exp THEN stmt_seq ELSE stmt_seq END
                 { $$ = new_if_stmt(current_lineno, $2, $4, $6);
                 }
            ;
repeat_stmt : REPEAT stmt_seq UNTIL exp
                 { $$ = new_repeat_stmt(current_lineno, $2, $4);
                 }
            ;
assign_stmt : ID { $1=new_id(current_lineno, yytext);
               }
              ASSIGN exp
                 { 
                    $$ = new_assign_stmt(current_lineno, $1, $4);                
                 }
            ;
read_stmt   : READ ID
                 {
                  $$=new_read_stmt(current_lineno, new_id(current_lineno, yytext));
                 }
            ;
write_stmt  : WRITE exp
                 { 
                  $$=new_write_stmt(current_lineno, $2);
                 }
            ;
exp         : simple_exp LT simple_exp 
                 { 
                  $$ = new_exp(current_lineno, $1,LT,$3);
                 }
            | simple_exp EQ simple_exp
                 { 
                  $$=new_exp(current_lineno, $1,EQ,$3);
                 }
            | simple_exp { $$ =$1; }
            ;
simple_exp  : simple_exp PLUS term 
                 { 
                  $$=new_simple_exp(current_lineno, $1, PLUS, $3);
                 }
            | simple_exp MINUS term
                 { 
                  $$=new_simple_exp(current_lineno, $1, MINUS, $3);
                 } 
            | term { $$ = $1; }
            ;
term        : term TIMES factor 
                 { 
                  $$=new_term(current_lineno, $1, TIMES, $3);
                 }
            | term OVER factor
                 { 
                  $$=new_term(current_lineno, $1, OVER, $3);
                 }
            | factor { $$ = $1; }
            ;
factor      : LPAREN exp RPAREN
                 { $$ = $2; }
            | NUM
                 { 
                  $$=new_num(current_lineno, atoi(yytext));
                 }
            | ID {
                  $$=new_id(current_lineno, yytext);
                 }
            | error { $$ = NULL; }
            ;

%%

int yyerror(const char *s) {
     fprintf(stderr, "Error: %s at line %d near %s\n", s, current_lineno, yytext);
     return 0;
}

tiny语言的表达式还挺难写的

  1. 这里需要先定义token,生成tiny-parser.tab.h文件,然后让tiny-lexer.l包含,这样可以解决重定义token的问题。
  2. 对于每一个表达式的值,如果没有指定的话,就是int型,指定就是你自己定义的类型
    1. 一定要是指针类型,因为局部变量在过了这个函数之后,他就会自动销毁
    2. 不要用c++的智能指针,bison是c语言写的,听说,不支持那么高级的语法
    3. 示例,就是这个宏
#define YYSTYPE TreeNode *
  1. 注意这里是**$4不是$3**,前面的SDT也算是一个$,这个bug特别难找,莫名其妙的空指针错误
assign_stmt : ID { $1=new_id(current_lineno, yytext);
               }
              ASSIGN exp
                 { 
                    $$ = new_assign_stmt(current_lineno, $1, $4);                
                 }
            ;
  1. 要声明yylex()函数,这个是从词法解析里定义的入口函数。

接口

#ifndef TREE_NODE_H
#define TREE_NODE_H
#include <string.h>
#include <vector>
#include "tiny-parser.tab.h"

using namespace std;

extern int lineno ;

enum NodeKind{StmtK=1,ExpK} ;
enum StmtKind{Stmt_K=3,IfK,RepeatK,AssignK,ReadK,WriteK} ;
enum ExpKind {OpK=9,ConstK,IdK} ;
/* ExpType is used for type checking */
enum ExpType{Void_type=9,Integer,Boolean} ;


#define Token_Kind int

class TreeNode{
private:
    void show_info(int depth,bool is_last_child);
public:
    NodeKind kind;
    int lineno;
    union { StmtKind stmt; ExpKind exp;} production_kind;
    union { Token_Kind op;
            int val;   
            char *name; } attr;
    ExpType type; /* for type checking of exps */
    vector<TreeNode *> children;


    void copy_name(const char *name){
        this->attr.name=new char(strlen(name));
        strcpy(this->attr.name,name);
    };
    void show_all_info();


    TreeNode(int line){
        this->lineno = line;
    }

    template<typename... Args>
    TreeNode(int line,TreeNode * first_child, Args... args){
        this->lineno = line;
        this->children.push_back(first_child);
        (this->children.push_back(args),...);
    };
    ~TreeNode(){
        if(this->production_kind.exp==IdK){
            delete this->attr.name;
        }
    }

    virtual void generate_code()=0;
};


TreeNode * new_stmt_node();

TreeNode * new_if_stmt(int line,TreeNode * cond, TreeNode * then_stmt, TreeNode * else_stmt);
TreeNode * new_if_stmt(int line,TreeNode * cond, TreeNode * then_stmt);

TreeNode * new_repeat_stmt(int line, TreeNode * body,TreeNode * cond);

TreeNode * new_assign_stmt(int line,TreeNode * var, TreeNode * exp);

TreeNode * new_read_stmt(int line,TreeNode * var);

TreeNode * new_write_stmt(int line,TreeNode * exp);

TreeNode * new_exp(int line,TreeNode * left, Token_Kind op, TreeNode * right);
TreeNode * new_exp(int line,TreeNode * simple_exp);

TreeNode* new_simple_exp(int line,TreeNode * left, Token_Kind op, TreeNode * right);

TreeNode * new_term(int line,TreeNode * left, Token_Kind op, TreeNode * right);
TreeNode * new_term(int line,TreeNode * factor);


TreeNode * new_num(int line,int val);
TreeNode * new_id(int line,const char *name);

#endif

就是使用TreeNode*节点,记录当前的需要的信息,比方说 表达式的符号,变量的id,当前节点的行号,每次进行到规约的时候,就执行这个,把节点增加到根节点里去。

接口实现

#ifndef TREE_NODE_EXTERN_H
#define TREE_NODE_EXTERN_H
#include <iostream>
#include "TreeNode.h"
using namespace std;

extern FILE *fout;

class StmtNode : public TreeNode {

public:
    StmtNode(int line):TreeNode(line){

    };

    void generate_code() override{
    
        for(auto &child:this->children){
            child->generate_code();
        }
    
    }
};



class IfNode : public TreeNode {
public:
    static int if_count ;
    IfNode(int line, TreeNode* condition, TreeNode* true_stmt, TreeNode* false_stmt):TreeNode(line){};

    IfNode(int line, TreeNode* condition, TreeNode* true_stmt):TreeNode(line,condition,true_stmt){};

    void generate_code() override{
        if_count++;
        this->children[0]->generate_code();
        if(this->children.size()==3){
            //if else
            fprintf(fout,"if_else_%d:\n",if_count);
            this->children[1]->generate_code();
            fprintf(fout,"jmp if_end_%d\n",if_count);

            fprintf(fout,"label if_else_%d\n",if_count);
            this->children[2]->generate_code();

            fprintf(fout,"label if_end_%d\n",if_count);
        }else{
            //if
            fprintf(fout,"if_end_%d:\n",if_count);
            this->children[1]->generate_code();
            fprintf(fout,"label if_end_%d\n",if_count);
        }
          fflush(fout);
    }
};
int IfNode::if_count = 0;

class RepeatNode : public TreeNode {
public:
    static int repeat_count ;
    RepeatNode(int line, TreeNode* condition, TreeNode* stmt):TreeNode(line,condition,stmt){};

    void generate_code() override{
        repeat_count++;
        fprintf(fout,"label repeat_%d\n",repeat_count);
        this->children[0]->generate_code();
        this->children[1]->generate_code();
        fprintf(fout,"repeat_%d\n",repeat_count);
        fflush(fout);
    }
};
int RepeatNode::repeat_count = 0;

class AssignNode : public TreeNode {
public:
    AssignNode(int line, TreeNode* left, TreeNode* right):TreeNode(line,left,right){};

    void generate_code() override{
        this->children[1]->generate_code();
        fprintf(fout,"pop %s\n",this->children[0]->attr.name);
          fflush(fout);
    }
};

class ReadNode : public TreeNode {
public:
    ReadNode(int line, TreeNode* left):TreeNode(line,left){};

    void generate_code() override{
        fprintf(fout,"read %s\n",children[0]->attr.name);
        fflush(fout);
    }
};

class WriteNode : public TreeNode {
public:
    WriteNode(int line, TreeNode* right):TreeNode(line,right){};

    void generate_code() override{
        fprintf(fout,"write %s\n",children[0]->attr.name);
    }
};

class ExpNode : public TreeNode {
public:
    ExpNode(int line, TreeNode* left, TreeNode* right):TreeNode(line,left,right){};
    ExpNode(int line, TreeNode* simple_exp):TreeNode(line,simple_exp){};
    ExpNode(int line):TreeNode(line){};
    void generate_code() override{
        if(this->children.size()==1){
            this->children[0]->generate_code();
        }
        else if(this->children.size()==2){
            //this->show_all_info();
            //cout<<endl;
            this->children[0]->generate_code();
            this->children[1]->generate_code();
            fprintf(fout,"sub\n");
            switch (this->attr.op)
            {
            case EQ:
                fprintf(fout,"jnz ");
                break;
            case LT:
                fprintf(fout,"jeg ");
                break;
            default:
                cerr<<"wrong operator in exp node"<<endl;
                break;
            }
        }
        fflush(fout);
    
    }
};

class SimpleExpNode : public TreeNode {
public:
    SimpleExpNode(int line, TreeNode* left, TreeNode* right,Token_Kind op):TreeNode(line,left,right){
        this->attr.op = op;
        this->kind = ExpK;
        this->type = Integer;
        this->production_kind.exp=OpK;
    };
    SimpleExpNode(int line, TreeNode* factor):TreeNode(line,factor){
        this->kind = ExpK;
        this->type = Integer;
        this->production_kind.exp=OpK;
    };

    void generate_code() override{
        if(this->children.size()==1){
            this->children[0]->generate_code();
        }else{
            switch (this->attr.op)
            {
            case PLUS:
                this->children[0]->generate_code();
                this->children[1]->generate_code();
                fprintf(fout,"add\n");
                break;
            case MINUS:
                this->children[0]->generate_code();
                this->children[1]->generate_code();
                fprintf(fout,"sub\n");
                break;
            default:
                cerr<<"wrong operator in simple exp node"<<endl;
                break;
            }
        }
        fflush(fout);
    }
};

class TermNode : public TreeNode {
public:
    TermNode(int line, TreeNode* left, TreeNode* right):TreeNode(line,left,right){};
    TermNode(int line, TreeNode* factor):TreeNode(line,factor){};
    void generate_code() override{
        if(this->children.size()==0){
            this->children[0]->generate_code();
        }else{
            this->children[0]->generate_code();
            this->children[1]->generate_code();
            switch (this->attr.op)
            {
            case TIMES:
                fprintf(fout,"mul\n");
                break;
            case OVER:
                fprintf(fout,"div\n");
                break;
            default:
                cerr<<"wrong operator in term node"<<endl;
                break;
            }
        }
        fflush(fout);
    }
};

class IdNode : public TreeNode {
public:
    IdNode(int line,const char *name):TreeNode(line){
        this->kind = ExpK;
        this->type = Integer;
        this->attr.name = new char[strlen(name)+1];
        strcpy(this->attr.name,name);
        this->production_kind.exp=IdK;
    }
    void generate_code() override{
        fprintf(fout,"push %s\n",this->attr.name);
        fflush(fout);
    }
};

class NumNode : public TreeNode {
    public:
    NumNode(int line,int value):TreeNode(line){
        this->kind = ExpK;
        this->type = Integer;
        this->attr.val = value;
        this->production_kind.exp=ConstK;
    }
    void generate_code() override{
        fprintf(fout,"push $%d\n",this->attr.val);
        fflush(fout);
    }
};
#endif

对于每一个不同类型的节点,都有子类和他对应,生成不同的汇编代码。

本来应该是像后面的那样,更加简洁一点的,但我懒得改了,哈哈,先做个垃圾出来再说。

抽象语法树的生成

program     : stmt_seq {root = $1; }

这里执行成功了之后,就可以对root调用get_info的接口了,输出所有节点的信息
在这里插入图片描述
这是最后的结果,这个就是树的前序遍历,没啥好说的,最后一行可以不用| 的。


void TreeNode::show_info(int depth,bool is_last_child){
        // 输出当前节点的信息
    for (int i = 0; i < depth; i++) {
        if (i == depth-1) {
            if (is_last_child) {
                cout << "└── ";
            } else {
                cout << "├── ";
            }
        } else {
            cout << "│   ";
        }
    }

    cout<<"line: "<<this->lineno<<", type: "<<getExpTypeName(type);
    cout<<", ";
    if(this->kind==StmtK){
        cout<<"stmt: "<<getStmtKindName(production_kind.stmt)<<endl;
    }else{
        cout<<"exp: "<<getExpKindName(production_kind.exp);
        switch (production_kind.exp)
        {
        case OpK:
            cout<<", op: "<<Token_Kind_to_string(attr.op)<<endl;
            break;
        case ConstK:
            cout<<", val: "<<attr.val<<endl;
            break;
        case IdK:
            cout<<", name: "<<attr.name<<endl;
            break;
        default:
            cout<<endl;
            break;
        }
    }
    
  // 递归输出子节点
    for (size_t i = 0; i < children.size(); i++) {
        if (children[i] != nullptr) {
            children[i]->show_info(depth + 1, (i == (children.size() - 1)));
        } else {
            if (i == children.size() - 1) {
                for (int j = 0; j < depth + 1; j++) {
                    if (j == depth) {
                        cout << "└── ";
                    } else {
                        cout << "│   ";
                    }
                }
            } else {
                for (int j = 0; j < depth + 1; j++) {
                    if (j == depth) {
                        cout << "├── ";
                    } else {
                        cout << "│   ";
                    }
                }
            }
        }
    }
};

虚拟机的实现

见VM文件夹的虚拟机指令,模拟的是一个栈型计算机# bison flex 实现tiny语言的编译器
在这里插入图片描述

这里用label代替的行号,因为我懒得回填行号了,大不了先遍历一遍,记录所有的label

#include <iostream>
#include <fstream>
#include <stack>
#include <string>
#include <unordered_map>
#include <vector>
#include <functional>

using namespace std;

class VirtualMachine {
public:
    VirtualMachine(const string& filename) {
        // 从文件中读取指令
        ifstream file(filename);
        if (file.is_open()) {
            string line;
            while (getline(file, line)) {
                instructions.push_back(line);
            }
            file.close();
        } else {
            cout << "Unable to open file" << endl;
            exit(1);
        }

        // 初始化指令映射表
        instructionMap.emplace("halt", [this]() { this->halt(); });
        instructionMap.emplace("add", [this]() { this->add(); });
        instructionMap.emplace("sub", [this]() { this->sub(); });
        instructionMap.emplace("mul", [this]() { this->mul(); });
        instructionMap.emplace("div", [this]() { this->div(); });

        instructionMapTwo.emplace("read", [this](string & tokens) { this->read(tokens); });
        instructionMapTwo.emplace("write", [this](string& tokens) { this->write(tokens); });
        instructionMapTwo.emplace("push", [this](string& tokens) { this->push(tokens); });
        instructionMapTwo.emplace("pop", [this](string& tokens) { this->pop(tokens); });
        instructionMapTwo.emplace("label", [this](string& tokens) { this->label(tokens); });
        instructionMapTwo.emplace("jmp", [this](string& tokens) { this->jmp(tokens); });
        instructionMapTwo.emplace("jnz", [this](string &tokens) { this->jnz(tokens); });
        instructionMapTwo.emplace("jeg", [this](string &tokens) { this->jeg(tokens); });
        instructionMapTwo.emplace("jel", [this](string &tokens) { this->jel(tokens); });
    }

    void run() {
        // 初始化label
        for(int i=0;i<instructions.size();i++){
            if(instructions[i].find("label")!=string::npos){
                labels[instructions[i].substr(6)]=i;
            }
        }
        // 运行虚拟机
        while (pc < instructions.size()) {
            
            execute(instructions[pc]);
            pc++;
        }
    }

private:
    stack<int> stack_;
    unordered_map<string, function<void()>> instructionMap;
    unordered_map<string,function<void(string &)>> instructionMapTwo;
    unordered_map<string, int> labels;
    unordered_map<string,int> variables;
    vector<string> instructions;
    int pc = 0; // 程序计数器

    void halt() {
        // 停止运行
        exit(0);
    }

    void read(string & tokens) {
        // 从输入流中读取一个值并存入寄存器
        cout << "Enter a value for variable " << tokens << ": ";
        int value;
        cin >> value;
        variables[tokens] = value;
    }

    void write(string &tokens) {
        // 将寄存器的值输出到输出流
        cout <<tokens << " = " << variables[tokens] << endl;
    }

    void push(string& tokens) {
        // 将寄存器或常数压入栈顶
        if (tokens.front() == '$') {
            int reg = stoi(tokens.substr(1));
            stack_.push(reg);
        } else if(variables.count(tokens)==1){
            stack_.push(variables[tokens]);
        }else{
            cerr<<"Undefined variable: "<<tokens<<endl;
        }
    }

    void pop(string& tokens) {
        // 将栈顶的值弹出并存入寄存器
        int reg = stack_.top();
        variables[tokens] = reg;
        stack_.pop();
    }

    void label(string& tokens) {
        // 啥也不用作,前面记录过了
  
    }

    void jmp(string &tokens) {
        // 跳转到标签
        if(labels.count(tokens)==0){
            cerr << "Undefined label: " << tokens << endl;
            exit(1);
        }
        pc = labels[tokens];
    }

    void jnz(string &tokens) {
        // 如果栈顶的值为0,则跳转到标签
        if (stack_.top() != 0) {
            if(labels.count(tokens)==0){
                cerr << "Undefined label: " << tokens << endl;
                exit(1);
            }
            pc = labels[tokens];
        }
        stack_.pop();
    }

    void jeg(string &tokens){
        // 如果栈顶的值大于等于0,则跳转到标签
        if (stack_.top() >= 0) {
            if(labels.count(tokens)==0){
                cerr << "Undefined label: " << tokens << endl;
            }
            pc=labels[tokens];
        }
        stack_.pop();
    }

    void jel(string &tokens){
        // 如果栈顶的值小于等于0,则跳转到标签
        if (stack_.top() <= 0) {
            if(labels.count(tokens)==0){
                cerr << "Undefined label: " << tokens << endl;
            }
            pc=labels[tokens];
        }
        stack_.pop();
    }

    void add() {
        // 栈顶两个元素相加
        int a = stack_.top();
        stack_.pop();
        int b = stack_.top();
        stack_.pop();
        stack_.push(b + a);
    }

    void sub() {
        // 栈顶两个元素相减
        int a = stack_.top();
        stack_.pop();
        int b = stack_.top();
        stack_.pop();
        stack_.push(b - a);
    }

    void mul() {
        // 栈顶两个元素相乘
        int a = stack_.top();
        stack_.pop();
        int b = stack_.top();
        stack_.pop();
        stack_.push(a * b);
    }

    void div() {
        // 栈顶两个元素相除
        int a = stack_.top();
        stack_.pop();
        int b = stack_.top();
        stack_.pop();
        stack_.push(b / a);
    }

    void execute(string& tokens) {
        if(tokens.front()=='#'){
            return ; // 注释
        }
        // 执行指令
        if (instructionMap.count(tokens)) {
            instructionMap[tokens]();
        } else {
            auto posi= tokens.find(' ');
            if(posi != string::npos){
                string instr= tokens.substr(0,posi);
                string arg= tokens.substr(posi+1);
                if(instructionMapTwo.count(instr)){
                    instructionMapTwo[instr](arg);
                }else{

                    cout << "Invalid instruction: " << tokens << endl;
                }
            }else{
                cout << "Invalid instruction: " << tokens << endl;
            }
        }
    }
};

int main(int argc, char* argv[]) {
    if (argc != 2) {
        cout << "Usage: " << argv[0] << " <input_file>" << endl;
        return 1;
    }

    VirtualMachine vm(argv[1]);
    vm.run();
    return 0;
}

这里写的非常好的地方在于,他把指令放到了一个map里,不用if判断了,耦合很低。

感想

  1. 最难的还是flex和bison的使用,资料和demo太少了,yyerror yylval yytexts啥的宏都要自己查官网文档。
  2. 虚拟机的实现很快,寄存器型的会难一些,需要把if while 这些翻译的逻辑搞懂。
  • 14
    点赞
  • 20
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值