语法分析相关函数记录

本文介绍了如何通过C++实现一个递归下降分析器,包括定义Gramma和Prifuncnode类,处理输入语法,计算First集、Follow集,以及构建优先级矩阵来解析文法。
摘要由CSDN通过智能技术生成

以下函数,对象,方法为个人本学期课程实验记录留存,本年度(2023秋)希望同大家交流学习,切勿完全复制使用。

func.h

#ifndef __FUNC_H__
#define __FUNC_H__
#include <vector>
#include <string>
#include <set>
#include <unordered_map>
using namespace std;

#define tGramma         vector<Gramma>
#define tFirst          unordered_map<char, set<char>>
#define tFollow         unordered_map<char, set<char>>
#define tFirstvt        unordered_map<char, set<char>>
#define tLastvt         unordered_map<char, set<char>>
#define tMat            vector<vector<int>>
#define tPrifunc        unordered_map<char, Prifuncnode>

// a piece of gramma
class Gramma
{
public:

    Gramma();

    Gramma(string gm);
    
    inline char get_start() { return start; }

    inline const vector<char>& get_rule() { return rule; }

private:
    char start;
    vector<char> rule;
};

class Prifuncnode
{
public:
    Prifuncnode();
    int f;
    int g;
};

// Check it's vn or not
bool isVn(char);

//input gramma and create gramma bag
tGramma input_gramma();

tGramma read_gramma(string filename);

set<char> get_all_vn(tGramma&);

set<char> get_all_vt(tGramma&);

set<char> get_all_v(tGramma&);

tFirstvt get_firstvt(tGramma&);

tLastvt get_lastvt(tGramma&);

tMat get_priority_mat(tFirstvt&, tLastvt&, tGramma&, set<char>& );      //0: equal; -1: lower; 1: greater;

tPrifunc get_priority_func(tMat&, set<char>&);

void print_func(tPrifunc&);

void print_first(tFirst&);


//Recursive-Descent_parsing
bool set_insert(set<char>& target, char c, bool);



tFirst get_first(tGramma&);

tFollow get_follow(tGramma&, tFirst&, char);

#endif

func.cpp

#include "func.h"
#include <iostream>
#include <stack>
#include "fstream"

Gramma::Gramma() { }

Gramma::Gramma(string gm) {
    for (int i = 0; i < gm.length(); ++i) {
        if (i == 0) start = gm[0];
        else if (i == 1) continue;
        else if (gm[i] != '\n' && gm[i] != '\0') rule.emplace_back(gm[i]);
    }
}

Prifuncnode::Prifuncnode():f(0), g(0) { }

//Get grammas from user input
tGramma input_gramma() {
    tGramma bag;
    while (true) {
        string str;
        cin >> str;
        if (str == "exit")  break;
        bag.emplace_back(Gramma(str));
    }
    return bag;
}

tGramma read_gramma(string filename) {
    tGramma bag;
    ifstream file(filename, ios::in);
    if (!file) {
        cout << "filename error!" << endl;
        exit(-1);
    }
    while (file) {
        string str;
        getline(file, str);
        if (str.length() < 3) continue;
        bag.emplace_back(str);
    }
    file.close();
    return bag;
}


bool isVn(char c) { return c >= 'A' && c <= 'Z' ? true : false; }


set<char> get_all_vn(tGramma& bag) {
    set<char> vn;
    for (auto item : bag)   vn.insert(item.get_start());
    return vn;
}


set<char> get_all_vt(tGramma& bag) {
    set<char> vt;
    for (auto item : bag) {
        for (char c : item.get_rule()) if (!isVn(c)) vt.insert(c);
    }
    return vt;
}


tFirstvt get_firstvt(tGramma& bag) {
    tFirstvt firstvt;
    //init unordered_map
    set<char> vn = get_all_vn(bag);
    //for (auto item : vn)   firstvt[item] = set<char>();
    stack<pair<char, char>> st;

    //the first running
    for (auto item : bag) {
        const vector<char>& rule = item.get_rule();
        if (!isVn(rule[0])) {
            st.push({item.get_start(), rule[0]});
        }else if(rule.size() > 1 && !isVn(rule[1])) {
            st.push({item.get_start(), rule[1]});
        } 
    }
  
    // the second running
    while(!st.empty()) {
        pair<char, char> cur = st.top();
        st.pop();
        //add to firstvt
        firstvt[cur.first].insert(cur.second);
        //test
        // for (auto item: firstvt[cur.first]) {
        //     cout << item << ' ';
        // }
        // cout << endl;
        //test
        // Go through all grammas to get new item
        for (auto item: bag) {
            char start = item.get_start();
            const vector<char>& rule = item.get_rule();
            if (rule[0] == cur.first && firstvt[start].find(cur.second) == firstvt[start].end()) {
                st.push({start, cur.second});
            }
        }
    }

    return firstvt;
}

tLastvt get_lastvt(tGramma& bag) {
    tLastvt lastvt;
    stack<pair<char, char>> st;
    //init lastvt map
    set<char> vn = get_all_vn(bag);
    for (char c: vn)    lastvt[c] = set<char>();

    //the first running
    for (auto item: bag) {
        char start = item.get_start();
        const vector<char>& rule = item.get_rule();
        int ncount = rule.size() - 1;
        if (!isVn(rule[ncount])) {
            st.push({start, rule[ncount]});
        }
        else if(ncount - 1 >= 0 && !isVn(rule[ncount - 1])) {
            st.push({start, rule[ncount - 1]});
        }
    }

    //the second running
    while (!st.empty()) {
        pair<char, char> cur = st.top();
        st.pop();
        //add to lastvt
        lastvt[cur.first].insert(cur.second);

        //Iterate over the all grammas to get new item
        for (auto item: bag) {
            char start = item.get_start();
            const vector<char>& rule = item.get_rule();
            if (rule[0] == cur.first && lastvt[start].find(cur.second) == lastvt[start].end()) {
                st.push({start, cur.second});
            }
        }
    }

    return lastvt;
}


tMat get_priority_mat(tFirstvt& first, tLastvt& last, tGramma& bag, set<char>& vt) {
    //vt index table
    unordered_map<char, int> table;
    int tindex = 0;
    for (auto item: vt) table[item] = tindex++;

    //Init tmat
    tMat prmat;
    prmat.resize(tindex, vector<int>(tindex, -2)); //-2 is error

    //Iterable all grammas to construct the mat
    for (auto item: bag) {
        char start = item.get_start();
        const vector<char>& rule = item.get_rule();
        for (int i = 0; i < rule.size() - 1; ++i) {
            //aa
            if (!isVn(rule[i]) && !isVn(rule[i + 1])) {
                int i1 = table[rule[i]], i2 = table[rule[i + 1]];
                prmat[i1][i2] = 0;
            }
            //aVa
            if (i + 2 < rule.size() && !isVn(rule[i]) && isVn(rule[i + 1]) && !isVn(rule[i + 2])) {
                int i1 =table[rule[i]], i2 = table[rule[i + 2]];
                prmat[i1][i2] = 0;
            }
            //aV
            if (!isVn(rule[i]) && isVn(rule[i + 1])) {
                //Iterate over firstvt of V
                int i1 = table[rule[i]];
                for (auto item: first[rule[i + 1]]) {
                    int i2 = table[item];
                    prmat[i1][i2] = -1;
                }
            }
            //Va
            if (isVn(rule[i]) && !isVn(rule[i + 1])) {
                //iterate over lastvt of V
                int i2 = table[rule[i + 1]];
                for (auto item: last[rule[i]]) {
                    int i1 = table[item];
                    prmat[i1][i2] = 1;
                }
            }
        }
    }

    return prmat;
}


tPrifunc get_priority_func(tMat& prmat, set<char>& vt) {
    tPrifunc func;
    unordered_map<char, int> table;
    //init table
    int tindex = 0;
    for (auto item: vt) table[tindex++] = item;

    //init prifun
    for (auto item: vt) {
        func[item].f++;
        func[item].g++;
    }

    bool flag = false; // continue flag;

    do
    {
        flag = false;
        for (int i = 0; i < tindex; ++i) {
            for (int j = 0; j < tindex; ++j) {
                int relationship = prmat[i][j];
                char a = table[i], b = table[j];
                switch(relationship)
                {
                case 0: {
                    int ta = func[a].f, tb = func[b].g;
                    func[a].f = max(func[a].f, func[b].g); 
                    func[b].g = max(func[a].f, func[b].g); 
                    if (ta != func[a].f || tb != func[b].g) flag = true;
                    break;
                }
                case -1: {
                    if (func[b].g <= func[a].f) {
                        func[b].g ++; 
                        flag = true; 
                    }
                    break;
                }
                case 1: {
                    if (func[a].f <= func[b].g) {
                        func[a].f ++; 
                        flag = true; 
                    }
                    break;
                }
                default: break;
                }
            }
        }
    } while (flag);

    return func;

}

void print_func(tPrifunc& func) {
    for (auto item: func) {
        char start = item.first;
        Prifuncnode node = item.second;
        cout << "f(" << start << ") = " << node.f << "       " << "g(" << start << ") = " << node.g << endl; 
    }
}


set<char> get_all_v(tGramma& bag) {
    set<char> v;
    for (auto item: bag) {
        v.insert(item.get_start());
        for (char c : item.get_rule()) {
            v.insert(c);
        }
    }
    return v;
}

//给first集等集合插入一个符号,如果存在返回false用于控制flag, 否则插入返回true
bool set_insert(set<char>& target, char c, bool allowe) {
    if (target.find(c) != target.end()) return false;
    if (c != '#' || allowe)
        target.insert(c);
    return true;
}

tFirst get_first(tGramma& bag) {
    tFirst first;
    set<char> v = get_all_v(bag);
    set<char> vt = get_all_vt(bag);
    //Init first
    for (auto item: v) first[item] = set<char>();
    //初始化vt的first集
    for (auto item : vt) first[item].insert(item);

    while (true) {
        //Transthrough all the grammas
        bool change = false;
        for (auto item: bag) {
            char start = item.get_start();
            const vector<char>& rule = item.get_rule();
            bool eps = true;
            for (int i = 0; i < rule.size(); ++i) {
                //前面没有epsno了
                if (!eps) break;
                bool heps = false;
                for (auto nitem: first[rule[i]]) {
                    if (nitem == '#') heps = true;
                    if (set_insert(first[start], nitem, i == 0)) change = true; //如果i==0允许添加epsno
                }
                if (!heps) eps = false;
            }
            //第二种添加epsno的方法
            if (eps) if(set_insert(first[start], '#', true)) change = true;
        }
        
        if (!change) break;
    }

    return first;
}


tFollow get_follow(tGramma& bag, tFirst& first, char startsymbol = 'S') {
    tFollow follow;
    //获取非终结符号集
    set<char> vn = get_all_vn(bag);
    //初始化follow
    for (char c : vn) follow[c] = set<char>();

    /**
     * 构造follow集算法
     * 将‘#’加入开始符号中
     * 循环执行以下步骤直到任何一个follow集不再增加
     * 遇见一个非终结符号,看其后面一个,将其后面一个的first除epsilon外加入该非终结符号的follow集中
     * 如果非终结符号可以作为最后出现的,则将左部非终结符号的follow集全部添加
    */

    //规则一
    set_insert(follow[startsymbol], '#', true);

    while (true) {
        bool change = false;
        for (auto gramma: bag) {
            char start = gramma.get_start();
            const vector<char>& rule = gramma.get_rule();
            bool eps = true;
            
            for (int i = rule.size() - 1; i >= 0 ; --i) {
                //规则三
                if (isVn(rule[i]) && eps) {
                    //遍历start的所有follow集
                    bool eps2 = false;
                    for (char c: follow[start]) {
                        if (c == '#') eps2 = true;
                        if (set_insert(follow[rule[i]], c, true)) change = true;
                    }
                    eps = eps2;
                }
                else eps = false;

                //规则二
                if (isVn(rule[i]) && i + 1 < rule.size()) {
                    //遍历rule[i + 1]的所有first集
                    for (char c: first[rule[i + 1]]) {
                        if (set_insert(follow[rule[i]], c, false)) change = true;
                    }
                }
            }
        }

        if (!change) break;
    }
    return follow;
}


void print_first(tFirst& hashset) {
    for (auto item: hashset) {
        cout << "------------------------" << endl;
        cout << item.first << ":" << endl;
        for (auto item2: item.second) {
            cout << item2 << ' ';
        }
        cout << endl;
    }
} 

递归下降分析器

头文件

#ifndef __RECURSIONER_H__
#define __RECURSIONER_H__
#include <string>
#include "func.h"
using namespace std;

class Recursioner
{
public:
    Recursioner();
    
    inline void set_filename(string filename) { filename = filename; }

    void init();

    bool run(char);

    int Main();


private:
    string filename;
    string target;
    tGramma bag;
    tFirst first;
    tFollow follow;
    int cur;
    int nextcur;
};




#endif

cpp文件

#include "Recursioner.h"
#include <iostream>
using namespace std;

Recursioner::Recursioner(): filename("test.txt") { }

void Recursioner::init() {
    bag = read_gramma(filename);
    first = get_first(bag);
    follow = get_follow(bag, first, 'S');
    cur = 0;
    nextcur = cur + 1;
}

bool Recursioner::run(char start) {
    bool eps = (first[start].find('#') != first[start].end());
    if (cur == target.size()) return eps;
    cout << start << ' ';
    if (!isVn(start)) {
        if (start == target[cur]) {
            cur++;
            nextcur++;
            return true;
        }
        else return false;
    }

    if (first[start].find(target[cur]) != first[start].end()) { //在first集中
        tGramma tempbag;
        //除去epsilon产生式
        for (auto item: bag) {
            char tempstart = item.get_start();
            const vector<char> & tempr = item.get_rule();
            if (tempstart == start && !(tempr.size() == 1 && tempr[0] == '#')) {
                tempbag.emplace_back(item);
            }
        }
        for (auto item: tempbag) {
            const vector<char>& rule = item.get_rule();
            bool flag2 = false;
            for (char c: rule) {
                if (!run(c)) {
                    flag2 = true;
                    break;
                }
            }
            if (flag2) continue;
            return true;
        }
        return false;
    }
    else {
        if (!eps) return false;
        else return (follow[start].find(target[cur]) != follow[start].end());
    }
}

int Recursioner::Main() {
    init();

    while (true) {
        string command = "";
        cin >> command;
        if (command == "exit") break;
        cur = 0;
        nextcur = 1;
        target = command;
        bool res = run('S');
        cout << endl;
        if (res && cur == target.size()) cout << "yes" << endl;
        else {
            //报错信息
            cout << "第" << cur <<"个字符: " << target[cur] << " 异常" << endl;
            cout << "No" << endl;
        }
        cout << "----------------------" << endl;
    }
    return 0;
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值