合工大编译原理实验二:构造LL1分析程序

#include<unordered_set>
#include<unordered_map>
#include<iostream>
#include<vector>
#include<stack>
#include<string>
#include<algorithm>
#include<iomanip>
#include<fstream>

using namespace std;

#define START_SYMBOL "E"
typedef unordered_set<string> set;
typedef unordered_map<string, set> multimap;
typedef unordered_map<string, string> table;
set terminals = {"+", "-", "~", "*", "/", "(", ")", "i"};
set Terminals = {"+", "-", "*", "/", "(", ")", "i"};
set Non_Terminals = {"E", "T", "G", "F", "S"};

multimap init(multimap &production, string *grammar) {
    string *arr = grammar;
    while (*arr != "end") {           // 此处的返回值不为null,也就是无法判断为false
        string temp = *arr;
        string Nonterminator;
        set products;
        string temp_string;
        auto iter = std::begin(temp);
        if (*iter <= 'Z' && *iter >= 'A') {
            Nonterminator = *iter;
            iter++;
        }

        // 跳过箭头
        while (*iter != '-' && *iter != '>')
            iter++;
        iter++;
        iter++;

        auto set_iter = products.begin();
        while (*iter) {
            if (*iter != '|')
                temp_string.push_back(*iter);
            else {
                products.insert(temp_string);
                temp_string.clear();
            }
            iter++;
        }
        products.insert(temp_string);
        auto target_iter = production.find(Nonterminator);
        if (target_iter != production.end())    // 如果已经存在该非终结符
            target_iter->second.insert(products.begin(), products.end());   // 将产生式插入
        else {
            set temp_set;
            temp_set.insert(products.begin(), products.end());
            production.insert({Nonterminator, temp_set});
        }
        arr++;
    }
    return production;
}

bool isInSet(const set &sets, string str) {
    /**
     * @params:set,string
     * @return:bool
     * @function:if string str is in set,return true
     */
    bool isIn = std::any_of(sets.begin(), sets.end(), [&str](const std::string &item) {
        return item == str;
    });
    return isIn;
}

bool isInSet(const set &sets, char ch) {
    /**
     * @params:集合,字符
     * @return:bool
     * @function:if char ch is in set,return true;
     */
    bool isIn = std::any_of(sets.begin(), sets.end(), [&ch](const std::string &item) {
        string temp;
        temp += ch;
        return temp == item;
    });
    return isIn;
}

bool isBelongSet(const set &sets, const string &symbol) {
    /**
     * @params:set,string
     * @return:bool
     * @funciton:if string is substring of any item in set,return true
     */
    bool isBelong = std::any_of(sets.begin(), sets.end(), [&symbol](const std::string &item) {
        size_t found = item.find(symbol);
        if (found != std::string::npos)
            return true;
        else
            return false;
    });
    return isBelong;
}

bool isInString(const string &mother, const string &son) {
    size_t find = mother.find(son);
    if (find != std::string::npos)
        return true;
    return false;

}

multimap getFirst(multimap &production, const string &symbol) {
    multimap first;
    set first_set;
    auto iter = production.find(symbol);
    if (iter != first.end()) {
        //  找到了当前非终结符的产生式
        auto product = iter->second.begin();
        for (; product != iter->second.end(); product++) {
            // 如果是非终结符
            if (isInSet(Non_Terminals, (*product).at(0))) {
                string temp;
                temp += (*product).at(0);
                if (temp != symbol) {
                    multimap temp_map = getFirst(production, temp);
                    first_set.insert(temp_map[temp].begin(), temp_map[temp].end());
                    int i = 1;
                    while (isInSet(temp_map.begin()->second, '~')) {
                        string ch;
                        ch += (*product).at(i);     //此处ch可能是非终结符也可能是终结符
                        if (isInSet(Non_Terminals, (*product).at(i))) {
                            temp_map = getFirst(production, ch);
                            first[symbol].insert(temp_map[ch].begin(), temp_map[ch].end());
                            i++;
                        } else {
                            first_set.insert(ch);
                        }
                    }
                } else {
                    cout << "ERROR!当前文法为左递归文法!" << endl;
                    exit(0);
                }
            } else if (isInSet(terminals, (*product).at(0))) {
                string temp;
                temp += (*product).at(0);
                first_set.insert(temp);
            }
        }
    }

    first.insert({symbol, first_set});
    return first;
}

multimap getFollow(multimap &production, const string &symbol, multimap &first_set) {
    multimap follow;
    set follow_set;
    if (symbol == START_SYMBOL)
        follow_set.insert("#");
    for (auto &item: production) {  // 首先在各个生成式之中查找该非终结符
        if (isBelongSet(item.second, symbol)) {
            // 如果是在当前产生式集合之中,依旧需要对产生式集合进行遍历
            for (auto &product: item.second) {
                if (isInString(product, symbol)) { //存在于当前的产生式之中,
                    auto index = product.find(symbol);
                    if (index < product.size() - 1) {       // 如果当前字符不在产生式字符的尾部
                        if (isInSet(Non_Terminals, product.at(index + 1))) {
                            string temp;
                            temp += product.at(index + 1);
                            bool flag = false;
                            for (auto &temp_str: first_set[temp]) {
                                if (temp_str != "~")
                                    follow_set.insert(temp_str);
                                if (temp_str == "~")
                                    flag = true;
                            }
                            multimap left_symbol_follow;
                            if (flag && item.first != temp && item.first != symbol)
                                left_symbol_follow = getFollow(production, item.first, first_set);
                            follow_set.insert(left_symbol_follow[item.first].begin(),
                                              left_symbol_follow[item.first].end());
                        }
                    } else if (index == (product.length() - 1) && item.first != symbol) {
                        multimap left_symbol_follow = getFollow(production, item.first, first_set);
                        follow_set.insert(left_symbol_follow[item.first].begin(), left_symbol_follow[item.first].end());
                    }
                }
            }
        }
    }
    follow.insert({symbol, follow_set});
    return follow;
}

void initFirst(multimap &production, multimap &first) {
    for (const auto &Non_Terminal: Non_Terminals) {
        multimap first_of_symbol = getFirst(production, Non_Terminal);
        first.insert(first_of_symbol.begin(), first_of_symbol.end());
    }// 获取所有元素的FIRST集合
}

void initFollow(multimap &production, multimap &first, multimap &follow) {
    for (const auto &Non_Terminal: Non_Terminals) {
        multimap first_of_symbol = getFollow(production, Non_Terminal, first);
        follow.insert(first_of_symbol.begin(), first_of_symbol.end());
    }
}

string getFirstSymbol(string &production) {
    return production.substr(0, 1);
}

bool isEqual(string &str, char &ch) {
    string temp;
    temp += ch;
    return str == temp;
}

string QueryParsingTable(string &left, const string &terminal, table &parsing_table) {
    string queryString = left + "&" + terminal;
    return parsing_table[queryString];
}

string char2string(char ch) {
    string temp;
    temp += ch;
    return temp;
}

multimap getFirstOfProduction(multimap &production, multimap &first) {
    /**
     * @params:production,first_set
     * @return:the first set of a production,返回一个产生式的FIRST集合
     */
    multimap firstOfProduction;
    auto map_iter = production.begin();
    while (map_iter != production.end()) {
        auto product_iter = map_iter->second.begin();
        while (product_iter != map_iter->second.end()) {
            set firsts;
            auto string_iter = std::begin(*product_iter);
            // 如果产生式最左为终结符,那么将该终结符添加到parsing_table之中结束
            if (isInSet(Terminals, *string_iter)) {
                firsts.insert(char2string(*string_iter));
                firstOfProduction.insert({map_iter->first + "->" + *product_iter, firsts});
            } else if (isInSet(Non_Terminals, *string_iter)) {      // 如果最左符号是非终结符,那么需要取其FIRST集合。
                bool Flag = true;
                bool flag = true;           // 如果不存在空串,那么无需
                while (string_iter != std::end(*product_iter)) {
                    if (flag) {
                        set temp_set = first[char2string(*string_iter)];
                        flag = isInSet(temp_set, "~");       // 在temp_set之中是否存在空字符串,如果存在,那么
                        if (!flag) {
                            firsts.insert(temp_set.begin(), temp_set.end());
                            flag = false;
                        } else {
                            for (auto &item: temp_set)
                                if (item != "~")
                                    firsts.insert(item);                        }
                        Flag = Flag && flag;        // 如果所有的产生式中的符号都存在空串,那么直接Flag为true,表示可以向产生式的first集合中插入空串
                    }
                    string_iter++;
                }
                if (Flag)
                    firsts.insert("~");
            }
            firstOfProduction.insert({map_iter->first + "->" + *product_iter, firsts});
            product_iter++;
        }
        map_iter++;
    }
    return firstOfProduction;
}

table getParsingTable(string *grammar) {
    table parsing_table;      // parsing_table,也就是select集合
    multimap production;
    production = init(production, grammar);
    multimap FIRST;
    initFirst(production, FIRST);
    multimap FOLLOW;
    initFollow(production, FIRST, FOLLOW);
    multimap firstOfProduction = getFirstOfProduction(production, FIRST);
    auto iter = production.begin();

    // 输出FIRST集合
    cout << "FIRST set:" << endl;
    for (const auto &item: FIRST) {
        cout << item.first << " : ";
        for (const auto &str: item.second)
            cout << str << " ";
        cout << endl;
    }
    cout << endl;
    // 输出FOLLOW集合
    cout << "FOLLOW set:" << endl;
    for (const auto &item: FOLLOW) {
        cout << item.first << " : ";
        for (const auto &str: item.second)
            cout << str << " ";
        cout << endl;
    }
    cout << endl;
    // 输出产生式的FIRST集合
    cout << "FIRST set of production:" << endl;
    for (const auto &item: firstOfProduction) {
        cout << item.first << " : ";
        for (const auto &str: item.second)
            cout << str << " ";
        cout << endl;
    }
    cout << endl;

    while (iter != production.end()) {          // 遍历所有产生式
        // 查找产生式的first集合
        auto set_iter = iter->second.begin();
        while (set_iter != iter->second.end()) {
            // set_iter为产生式集合的迭代器。
            string queryString = iter->first + "->" + *set_iter;
            for (auto &item: firstOfProduction[queryString]) {        // 返回产生式右部的FIRST集合。item为终结符
                if (isInSet(Terminals, item)) {
                    parsing_table.insert({iter->first + "&" + item, queryString});
                }
            }
            if (isInSet(FIRST[iter->first], "~")) {     // 如果FIRST(S)中存在空串,那么对于FOLLOW(S)中的每个字符,添加S->~到parsing table
                for (const auto &item: FOLLOW[iter->first]) {
                    parsing_table.insert({iter->first + "&" + item, iter->first + "->" + "~"});
                    if (isInSet(FOLLOW[iter->first], "#")) {
                        parsing_table.insert({iter->first + "&" + "#", iter->first + "->~"});
                    }
                }

            }
            set_iter++;
        }
        iter++;
    }
    // 输出parsing_table
    cout << "parsing_table :" << endl;
    for (const auto &item: parsing_table) {
        cout << item.first << " : " << item.second << endl;
    }
    cout << endl;
    return parsing_table;
}

string getStackContent(stack<string> &Stack) {
    /**
     * @params:Stack:分析栈
     * @return:stackContent:分析栈的内容
     * @function:在对于原
     */
    string stackContent;
    stack temp_stack(Stack);
    while (!temp_stack.empty()) {
        stackContent += temp_stack.top();
        temp_stack.pop();
    }
    std::reverse(stackContent.begin(), stackContent.end());
    return stackContent;
}

string getRestInput(string &str, string::iterator iter) {
    /**
     * @params:str:输入串;iter:输入串的迭代器
     * @return:restInput:剩余的输入串
     * @function:在对于原
     */
    string restInput;
    while (iter != std::end(str)) {
        restInput += *iter;
        iter++;
    }
    return restInput;
}

string getMatchedInput(string &str, string::iterator iter) {
    /**
     * @params:str:输入串;iter:输入串的迭代器
     * @return:matchedInput:匹配的输入串
     * @function:在对于原
     */
    if (iter == std::begin(str))
        return "";
    else {
        string matchedInput;
        auto begin_iter = std::begin(str);
        while (begin_iter != iter) {
            matchedInput += *begin_iter;
            begin_iter++;
        }
        return matchedInput;
    }
}

void LL1(string &str, string *grammar) {
    /**
     * @params:str:该文法的句子;grammar:string*,文法
     * @return:none
     * @function:根据指定文法对输入串进行分析
     */
    stack<string> Stack;    // 分析栈
    table parsingTable;
    parsingTable = getParsingTable(grammar);

    cout << "Input String:" << str << endl;
    cout << setw(20) << std::left << "Matched" << setw(20) << std::left << "Stack" << setw(20) << std::left << "Input"
         << setw(20) << std::left << "Action" << endl;

    auto iter = std::begin(str);
    Stack.emplace("#");
    Stack.emplace(START_SYMBOL);
    while (Stack.top() != "#") {
        string top = Stack.top();
        if (isEqual(top, *iter)) {      // 栈顶符号与输出穿下一位成功匹配
            Stack.pop();
            iter++;
            cout << setw(20) << std::left << getMatchedInput(str, iter) << setw(20) << std::left
                 << getStackContent(Stack) << setw(20)
                 << std::left << getRestInput(str, iter)
                 << setw(20) << std::left << "Match:" + top << endl;
        } else if (isInSet(Terminals, top)) {
            cout << "Wrong;" << endl;
        } else if (QueryParsingTable(top, char2string(*iter), parsingTable).length() > 0) {
            string production = QueryParsingTable(top, char2string(*iter), parsingTable);
            Stack.pop();
            for (auto it = std::end(production) - 1; it != std::begin(production) && *it != '>'; it--)
                Stack.emplace(char2string(*it));
            cout << setw(20) << std::left << getMatchedInput(str, iter) << setw(20) << std::left
                 << getStackContent(Stack) << setw(20) << std::left
                 << getRestInput(str, iter)
                 << setw(20) << std::left << "Production:" + production << endl;
        } else if (!isEqual(top, *iter) && top == "~") {
            Stack.pop();
            cout << setw(20) << std::left << getMatchedInput(str, iter) << setw(20) << std::left
                 << getStackContent(Stack) << setw(20) << std::left
                 << getRestInput(str, iter)
                 << setw(20) << std::left << "Empty String ~" << endl;
        } else {
            cout << "Wrong Input String.The Stack top:" << top << ",doesn't match current input symbol-" << *iter
                 << endl;
            break;
        }
    }

}


int main() {
    string str = "i*i*ii+i#";
    string grammar[] = {"E->TG", "G->+TG|-TG", "G->~", "T->FS", "S->*FS|/FS", "S->~", "F->(E)", "F->i", "end"};

    LL1(str, grammar);
}
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值