简单的C语言编译器--语法分析器

  语法分析算是最难的一部分了。总而言之,语法分析就是先设计一系列语法,然后再用设计好的语法去归约词法分析中的结果。最后将归约过程打印出来,或者生成抽象语法树。

1. 设计文法

以下是我的文法(引入的M和N是方便以后的语义分析):

1.1、基本框架
Program -> Type main() Block
Type -> int | bool
Block -> { Stmts return Num ; }
Decl -> Type Array ;
Array -> Identifier [ Num ] | Identifier [ Identifier ] | Identifier
Stmts -> Stmts M Stmt | Stmt
1.2、标识符和常数
Letter -> A | B | C | D | E | F | G | H | I | J | K | L | M | N 
    | O | P | Q | R | S | T | U | V | W | X | Y | Z | a | b
    | c | d | e | f | g | h | i | j | k | l | m | n | o | p
    | q | r | s | t | u | v | w | x | y | z | _
Digit -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
Non_digit -> 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
Num -> Num Digit | Non_digit
Identifier -> Identifier Digit | Identifier Letter | Letter
Bool_value -> true | false
1.3、运算符
Self_op -> ++ | --
HLogic_op -> &&
LLogic_op -> ||
HMath_op -> * | / | %
LMath_op -> + | - 
Judge_op -> == | != | >= | <= | > | <
1.4、语句块框架
Stmt -> Assignment ;
    | Decl
    | if ( Bool ) M Stmt
    | if ( Bool ) M Stmt N else M Stmt
    | while M ( Bool ) M Stmt
    | for ( Fora ; M Forb ; Forc ) M Stmt
    | { Stmts }
Fora -> Assignment | ε
Forb -> Bool | ε
Forc -> ForAssignment | ε
1.5、赋值语句
Assignment -> LArray = Bool ; | LArray Self_op ; | Self_op LArray ; 

ForAssignment -> LArray = Bool ; | LArray Self_op ; | Self_op LArray ; 
1.6、条件语句
Factor -> ( Bool ) | Array | Num | Bool_value | ! ( Bool )
HExpr -> HExpr HMath_op Factor | Factor
LExpr -> LExpr LMath_op HExpr | HExpr
Rel -> Rel Judge_op LExpr | LExpr
HRel -> HRel HLogic_op M Rel | Rel
Bool -> Bool LLogic_op M HRel | HRel
1.7、辅助
M -> ε 
N -> ε  

注意,标识符和常数那块是为了词法分析构造DFA,实际语法分析的时候将所有标识符当做Identifier,把所有的常数当成Num即可。

2. 代码部分

  代码的关键是用getTable.h/cpp实现语法分析表,然后用状态栈分析测试的语法是否能够被归约成功,并且得到归约的顺序,即语法分析树。myParser.h/cpp便是利用得到的分析表进行栈分析,但是由于是制导翻译,所以分析这块和语义翻译一起做,所以下一节再介绍。

getTable.h

#ifndef MYPARSER_H
#define MYPARSER_H

#include <fstream>
#include <unordered_map>
#include <unordered_set>
#include <string>
#include <vector>
#include <cstddef>
#include <functional>


struct Project {
    int pro_num;  //表示第pro_num个产生式,0表示拓展的产生式
    int dot_position;  //当前点的位置,位置的取值比产生式右端长度大1
    std::unordered_set<std::string> successors;  //后继符
    bool operator==(Project &right);
};

struct Grammer {
    std::string left;
    std::vector<std::string> right;
};  

/*
 * hash函数返回哈希值
 * eq则为set提供相等操作
 * 这样就可以把Project作为set的关键词
 */


size_t hasher(const Project &p);
bool eq(const Project &p1, const Project &p2);


class GetTable {
    public:
        typedef std::unordered_set<Project, decltype(hasher) *, decltype(eq) *> Status; 
        typedef std::unordered_map<int, std::unordered_map<std::string, std::string>> Atable;
        typedef std::unordered_map<int, std::unordered_map<std::string, int>> Gtable;
        GetTable();
        void get_grammer(std::ifstream &in); //从文本中读取文法规则
        void get_first();   //获得first集合
        void get_follow(); //获得follow集合
        bool inVT(std::string s); //判断在V还是T里,1为V
        void judge_first(std::vector<std::string> s , std::unordered_set<std::string> & result); //判断串s的first集合是否包含ε
        void get_closure(Status &p); //求项目闭包
        bool judge_repeat(Status s1, Status s2); //判断两个状态是否重复
        bool judge_conflict(Status & s, std::unordered_set<std::string> &result); //判断一个状态是否有移进-归约冲突
        void get_status(); //获得状态集合
        void print(std::ofstream &o1, std::ofstream &o2); //打印归约产生式子
    private:
        std::unordered_map<std::string,std::unordered_set<int>> index; //存储各非终结符的定义的序列号集合
        std::vector<Grammer> G;  //存储文法
        std::vector<std::string> V, T; //存储非终结符和终结符
        std::unordered_map<std::string, std::unordered_set<std::string>> first, follow;  //存储first和follow集合
        std::unordered_map<int, Status> statuses; //规范状态集
        Atable action;
        Gtable goTo;

};

#endif

getTable.cpp

#include <iostream>
#include "getTable.h"


bool Project::operator==(Project &right) {
    if(this->pro_num == right.pro_num && this->dot_position == right.dot_position && this->successors.size() == right.successors.size()) {
        for(auto t : right.successors) {
            if(this->successors.find(t) == this->successors.end())
                return false;
        }

        return true;
    }

    return false;
}


size_t hasher(const Project &p) {
    return std::hash<int>() (p.pro_num*p.dot_position);
}


bool eq(const Project &p1, const Project &p2) {
    if(p1.pro_num == p2.pro_num && p1.dot_position == p2.dot_position) 
        return true;
    return false;
}


GetTable::GetTable(){
    //初始化非终结符和终结符
    V = {"S","Program","Type","Block","Stmts","Decl","Stmt","ForAssignment","Assignment","Bool", "Rel", "LExpr","HExpr","Factor","Self_op","HLogic_op","LLogic_op","HMath_op","LMath_op","Judge_op","Bool_value","Array","Fora","Forb","Forc","HRel","LArray","M","N"};

    T = {"(" ,")","main","int","bool","return",";","{","}","if","else","while","for","Identifier","Num","[","]","true","false","==","!=",">=","<=",">","<","+","-","*","/","%","||","&&","++","--","!","-",";","=","ε"};

    std::unordered_set<std::string> empty_set;
    std::unordered_set<int> empty_set_int;

    //初始化first和follow集
    for(auto i : V) { 
        first.insert({i, empty_set});
        follow.insert({i, empty_set});
        index.insert({i,empty_set_int});
    }


    follow["Program"].insert("#");
}


void GetTable::get_grammer(std::ifstream &in) {
    std::string tmp;
    Grammer gtmp;
    gtmp.left = "S";
    gtmp.right = {"Program"};
    G.push_back(gtmp);

    while(in >> tmp) {
        gtmp.left = tmp;
        gtmp.right.clear();
        in >> tmp >> tmp;
        while(tmp != "#") {
            gtmp.right.push_back(tmp);  
            in >> tmp;
        }
        G.push_back(gtmp);
    }
    
    for(int i = 0; i < static_cast<int>(G.size()); i++) 
        index[G[i].left].insert(i); 
}

bool GetTable::inVT(std::string s) {
    for(auto i : V) {
        if(i == s)
            return true;
    }

    return false;
}


void GetTable::get_first() {
    bool change = true; //表示若改动了一处,则需要重新便利
    bool is_empty; //表示产生式右端为空串
    int t; 
    //循环,直到没有改动为止,即change = false
    while(change) {
        change = false;
        //循环每个文法
        for(auto &g : G) {
            is_empty = true;
            t = 0;
            while(is_empty && t < static_cast<int>(g.right.size())) {
                is_empty = false;
                if(!inVT(g.right[t])) {
                    if(first[g.left].find(g.right[t]) == first[g.left].end()) {
                        first[g.left].insert(g.right[t]);
                        change = true;
                    }
                    continue;
                }  
                for(auto i : first[g.right[t]]) {
                    if(first[g.left].find(i) == first[g.left].end()) {
                        first[g.left].insert(i);
                        change = true;
                    }
                }

                if(first[g.right[t]].find("ε") != first[g.right[t]].end()) {
                    is_empty = true;
                    t++;
                }
            }
            if(t == static_cast<int>(g.right.size()) && first[g.left].find("ε") == first[g.left].end()) {
                first[g.left].insert("ε");
                change = true;
            }
        }
    }

    first.erase("S");
    
}

void GetTable::judge_first(std::vector<std::string> s, std::unordered_set<std::string> &result) {
    int count = 0;
    for(auto i : s) {
        if(!inVT(i)) {
            result.insert(i);
            break;
        }
        if(first[i].find("ε") == first[i].end()) {
            result.insert(first[i].begin(),first[i].end());
            break;
        } 

        result.insert(first[i].begin(), first[i].end());
        result.erase("ε");
        count++;
    
    }

    if(count == static_cast<int>(s.size()))
        result.insert("ε");
}

void GetTable::get_follow() {
    //与求first集类似,设置change
    bool change = true;
    while(change) {
        change = false;
        //循环每个文法
        std::vector<std::string>::iterator search_next; //向后扫描指针
        for(auto &g : G) {
            for(auto it = g.right.begin(); it != g.right.end(); it++) {
                if(!inVT(*it)) 
                    continue;
                auto original = follow[*it].size();
                search_next = it + 1;
                if(search_next != g.right.end()) {
                    std::vector<std::string> tmp(search_next,g.right.end());    
                    std::unordered_set<std::string> stmp;
                    judge_first(tmp,stmp);
                    follow[*it].insert(stmp.begin(),stmp.end());
                    if(stmp.find("ε") != stmp.end()) {
                        follow[*it].erase("ε");
                        follow[*it].insert(follow[g.left].begin(),follow[g.left].end());
                    } 
                    if(original < follow[*it].size())
                        change = true;
                } else {
                    follow[*it].insert(follow[g.left].begin(), follow[g.left].end());
                    if(original < follow[*it].size())
                        change = true;
                }
            }
        }
    }

    follow.erase("S");
}

void GetTable::get_closure(Status &p) {
    bool change = true;
    Status pptmp;
    while(change) {
        change = false;
        pptmp = p;
        for(auto &pro : pptmp) {
            if(pro.dot_position == static_cast<int>(G[pro.pro_num].right.size()))
                continue;
            std::string V = G[pro.pro_num].right[pro.dot_position];
            if(!inVT(V)) 
                continue;
            std::unordered_set<std::string> new_successor;

            //求新项目的后继符集
            if(pro.dot_position == static_cast<int>(G[pro.pro_num].right.size()) - 1)
                new_successor = pro.successors;
            else {
                std::vector<std::string> vtmp(G[pro.pro_num].right.begin()+pro.dot_position+1,G[pro.pro_num].right.end()); 
                judge_first(vtmp, new_successor);
                if(new_successor.find("ε") != new_successor.end()) {
                    new_successor.insert(pro.successors.begin(),pro.successors.end());
                    new_successor.erase("ε");
                }
            }
            Project ptmp;
            for(auto &i : index[V]) {
                ptmp.pro_num = i;
                ptmp.dot_position = 0;
                ptmp.successors = new_successor;
                if(p.find(ptmp) == p.end()) {
                    p.insert(ptmp);
                    change = true;
                }
                else {
                    auto original = p.find(ptmp)->successors.size(); 
                    ptmp.successors.insert(p.find(ptmp)->successors.begin(),p.find(ptmp)->successors.end());
                    p.erase(*p.find(ptmp));
                    p.insert(ptmp);

                    if(original < ptmp.successors.size()) 
                        change = true;
                }   
            }
        }
    }
}

bool GetTable::judge_repeat(Status s1, Status s2) {
    if(s1.size() == s2.size()) {
        for(auto pros : s1) {
            if(s2.find(pros) == s2.end())
                return false;   
            else {
                Project tmp = *(s2.find(pros));
                if(!(tmp == pros))
                    return false;
            }
        }
        return true;
    }
    return false;
}


bool GetTable::judge_conflict(Status & s, std::unordered_set<std::string> &result) {
    bool flag = false;
    std::unordered_set<std::string> tmp;
    for(auto pro : s) {
        if(pro.dot_position == static_cast<int>(G[pro.pro_num].right.size())) 
            tmp.insert(pro.successors.begin(),pro.successors.end());
    }

    for(auto pro : s) {
        if(pro.dot_position < static_cast<int>(G[pro.pro_num].right.size())) {
            std::string next = G[pro.pro_num].right[pro.dot_position];
            if(tmp.find(next) != tmp.end()) {
                result.insert(next);
                flag = true;
            }
        }
    }

    return flag;

}
    

void GetTable::get_status() {
    //初始化第一个状态0
    int t = 0;
    Project ptmp;
    ptmp.dot_position = 0;
    ptmp.pro_num = 0;
    ptmp.successors = {"#"};
    Status tmp_status(10,hasher,eq);
    tmp_status.insert(ptmp);
    get_closure(tmp_status);

    statuses.insert({t,tmp_status});

    //获取状态集合
    bool change = true;
    std::unordered_set<int> record; //记录已经求过的状态
    std::unordered_map<int, Status> sstmp;
    std::unordered_set<std::string> conflict;

    while(change) {
        change = false;
        sstmp = statuses;
        
        for(auto &sta : sstmp) {
            if(record.find(sta.first) != record.end()) 
                continue;
            record.insert(sta.first);
            //judge_conflict(sta.second,conflict);
            /*
             * 开始球状态转移,用一个set<string>记录被转移过的状态
             */
            std::unordered_set<std::string> record_status;
            for(auto pros : sta.second) {
                if(G[pros.pro_num].right[0] == "ε" || pros.dot_position == static_cast<int>(G[pros.pro_num].right.size())) {
                    for(auto sucess : pros.successors) {  
                        if(action[sta.first].find(sucess) == action[sta.first].end())
                        action[sta.first][sucess] = "r" + std::to_string(pros.pro_num);
                    }
                    continue;
                }
                std::string trans = G[pros.pro_num].right[pros.dot_position];
                if(record_status.find(trans) != record_status.end()) 
                    continue;
                record_status.insert(trans);
                tmp_status.clear();
                ptmp.pro_num = pros.pro_num;
                ptmp.dot_position = pros.dot_position + 1;
                ptmp.successors = pros.successors;
                tmp_status.insert(ptmp);
                for(auto protmp : sta.second) {
                    if(G[protmp.pro_num].right[protmp.dot_position] == trans && !(protmp == pros)) {
                        ptmp.pro_num = protmp.pro_num;
                        ptmp.dot_position = protmp.dot_position + 1;
                        ptmp.successors = protmp.successors;
                        tmp_status.insert(ptmp);
                    }
                }
                get_closure(tmp_status);
                bool flag = true;
                for(auto s : sstmp) {
                    if(judge_repeat(s.second,tmp_status)) {
                        if(inVT(trans))
                            goTo[sta.first][trans] = s.first;
                        else
                            action[sta.first][trans] = "s" + std::to_string(s.first);
                        flag = false;
                        break;
                    }                           
                }
                if(!flag) 
                    continue;
                statuses.insert({++t, tmp_status});
                change = true;

                //填写goTo表和action表
                if(inVT(trans)) 
                    goTo[sta.first][trans] = t;
                else 
                    action[sta.first][trans] = "s" + std::to_string(t);
            }
        }
    }
    //添加acc
    action[goTo[0]["Program"]]["#"] = "acc";

}


void GetTable::print(std::ofstream &o1, std::ofstream &o2) {
    for(auto f1 : action) {
        for(auto f2 : f1.second)
            o1 << f1.first << " " << f2.first << " " << f2.second << std::endl;
    }

    for(auto f1 : goTo) {
        for(auto f2 : f1.second)
            o2 << f1.first << " " << f2.first << " " << f2.second << std::endl;
    }
}


int main() {
    GetTable test;
    std::ifstream gin("grammer.txt");
    std::ifstream lin("lexer.out");
    std::ofstream action("action.out");
    std::ofstream goTo("goto.out");
    test.get_grammer(gin);
    test.get_first();
    test.get_follow();
    test.get_status();
    test.print(action,goTo);



    gin.close();
    lin.close();
    action.close();
    goTo.close();
    return 0;
}

转载于:https://www.cnblogs.com/vachester/p/6884350.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值