#include<unordered_set>
#include<unordered_map>
#include<iostream>
#include<vector>
#include<stack>
#include<string>
#include<algorithm>
#include<iomanip>
#include<fstream>
using namespace std;
#define START_SYMBOL "E"
typedef unordered_set<string> set;
typedef unordered_map<string, set> multimap;
typedef unordered_map<string, string> table;
set terminals = {"+", "-", "~", "*", "/", "(", ")", "i"};
set Terminals = {"+", "-", "*", "/", "(", ")", "i"};
set Non_Terminals = {"E", "T", "G", "F", "S"};
multimap init(multimap &production, string *grammar) {
string *arr = grammar;
while (*arr != "end") { // 此处的返回值不为null,也就是无法判断为false
string temp = *arr;
string Nonterminator;
set products;
string temp_string;
auto iter = std::begin(temp);
if (*iter <= 'Z' && *iter >= 'A') {
Nonterminator = *iter;
iter++;
}
// 跳过箭头
while (*iter != '-' && *iter != '>')
iter++;
iter++;
iter++;
auto set_iter = products.begin();
while (*iter) {
if (*iter != '|')
temp_string.push_back(*iter);
else {
products.insert(temp_string);
temp_string.clear();
}
iter++;
}
products.insert(temp_string);
auto target_iter = production.find(Nonterminator);
if (target_iter != production.end()) // 如果已经存在该非终结符
target_iter->second.insert(products.begin(), products.end()); // 将产生式插入
else {
set temp_set;
temp_set.insert(products.begin(), products.end());
production.insert({Nonterminator, temp_set});
}
arr++;
}
return production;
}
bool isInSet(const set &sets, string str) {
/**
* @params:set,string
* @return:bool
* @function:if string str is in set,return true
*/
bool isIn = std::any_of(sets.begin(), sets.end(), [&str](const std::string &item) {
return item == str;
});
return isIn;
}
bool isInSet(const set &sets, char ch) {
/**
* @params:集合,字符
* @return:bool
* @function:if char ch is in set,return true;
*/
bool isIn = std::any_of(sets.begin(), sets.end(), [&ch](const std::string &item) {
string temp;
temp += ch;
return temp == item;
});
return isIn;
}
bool isBelongSet(const set &sets, const string &symbol) {
/**
* @params:set,string
* @return:bool
* @funciton:if string is substring of any item in set,return true
*/
bool isBelong = std::any_of(sets.begin(), sets.end(), [&symbol](const std::string &item) {
size_t found = item.find(symbol);
if (found != std::string::npos)
return true;
else
return false;
});
return isBelong;
}
bool isInString(const string &mother, const string &son) {
size_t find = mother.find(son);
if (find != std::string::npos)
return true;
return false;
}
multimap getFirst(multimap &production, const string &symbol) {
multimap first;
set first_set;
auto iter = production.find(symbol);
if (iter != first.end()) {
// 找到了当前非终结符的产生式
auto product = iter->second.begin();
for (; product != iter->second.end(); product++) {
// 如果是非终结符
if (isInSet(Non_Terminals, (*product).at(0))) {
string temp;
temp += (*product).at(0);
if (temp != symbol) {
multimap temp_map = getFirst(production, temp);
first_set.insert(temp_map[temp].begin(), temp_map[temp].end());
int i = 1;
while (isInSet(temp_map.begin()->second, '~')) {
string ch;
ch += (*product).at(i); //此处ch可能是非终结符也可能是终结符
if (isInSet(Non_Terminals, (*product).at(i))) {
temp_map = getFirst(production, ch);
first[symbol].insert(temp_map[ch].begin(), temp_map[ch].end());
i++;
} else {
first_set.insert(ch);
}
}
} else {
cout << "ERROR!当前文法为左递归文法!" << endl;
exit(0);
}
} else if (isInSet(terminals, (*product).at(0))) {
string temp;
temp += (*product).at(0);
first_set.insert(temp);
}
}
}
first.insert({symbol, first_set});
return first;
}
multimap getFollow(multimap &production, const string &symbol, multimap &first_set) {
multimap follow;
set follow_set;
if (symbol == START_SYMBOL)
follow_set.insert("#");
for (auto &item: production) { // 首先在各个生成式之中查找该非终结符
if (isBelongSet(item.second, symbol)) {
// 如果是在当前产生式集合之中,依旧需要对产生式集合进行遍历
for (auto &product: item.second) {
if (isInString(product, symbol)) { //存在于当前的产生式之中,
auto index = product.find(symbol);
if (index < product.size() - 1) { // 如果当前字符不在产生式字符的尾部
if (isInSet(Non_Terminals, product.at(index + 1))) {
string temp;
temp += product.at(index + 1);
bool flag = false;
for (auto &temp_str: first_set[temp]) {
if (temp_str != "~")
follow_set.insert(temp_str);
if (temp_str == "~")
flag = true;
}
multimap left_symbol_follow;
if (flag && item.first != temp && item.first != symbol)
left_symbol_follow = getFollow(production, item.first, first_set);
follow_set.insert(left_symbol_follow[item.first].begin(),
left_symbol_follow[item.first].end());
}
} else if (index == (product.length() - 1) && item.first != symbol) {
multimap left_symbol_follow = getFollow(production, item.first, first_set);
follow_set.insert(left_symbol_follow[item.first].begin(), left_symbol_follow[item.first].end());
}
}
}
}
}
follow.insert({symbol, follow_set});
return follow;
}
void initFirst(multimap &production, multimap &first) {
for (const auto &Non_Terminal: Non_Terminals) {
multimap first_of_symbol = getFirst(production, Non_Terminal);
first.insert(first_of_symbol.begin(), first_of_symbol.end());
}// 获取所有元素的FIRST集合
}
void initFollow(multimap &production, multimap &first, multimap &follow) {
for (const auto &Non_Terminal: Non_Terminals) {
multimap first_of_symbol = getFollow(production, Non_Terminal, first);
follow.insert(first_of_symbol.begin(), first_of_symbol.end());
}
}
string getFirstSymbol(string &production) {
return production.substr(0, 1);
}
bool isEqual(string &str, char &ch) {
string temp;
temp += ch;
return str == temp;
}
string QueryParsingTable(string &left, const string &terminal, table &parsing_table) {
string queryString = left + "&" + terminal;
return parsing_table[queryString];
}
string char2string(char ch) {
string temp;
temp += ch;
return temp;
}
multimap getFirstOfProduction(multimap &production, multimap &first) {
/**
* @params:production,first_set
* @return:the first set of a production,返回一个产生式的FIRST集合
*/
multimap firstOfProduction;
auto map_iter = production.begin();
while (map_iter != production.end()) {
auto product_iter = map_iter->second.begin();
while (product_iter != map_iter->second.end()) {
set firsts;
auto string_iter = std::begin(*product_iter);
// 如果产生式最左为终结符,那么将该终结符添加到parsing_table之中结束
if (isInSet(Terminals, *string_iter)) {
firsts.insert(char2string(*string_iter));
firstOfProduction.insert({map_iter->first + "->" + *product_iter, firsts});
} else if (isInSet(Non_Terminals, *string_iter)) { // 如果最左符号是非终结符,那么需要取其FIRST集合。
bool Flag = true;
bool flag = true; // 如果不存在空串,那么无需
while (string_iter != std::end(*product_iter)) {
if (flag) {
set temp_set = first[char2string(*string_iter)];
flag = isInSet(temp_set, "~"); // 在temp_set之中是否存在空字符串,如果存在,那么
if (!flag) {
firsts.insert(temp_set.begin(), temp_set.end());
flag = false;
} else {
for (auto &item: temp_set)
if (item != "~")
firsts.insert(item); }
Flag = Flag && flag; // 如果所有的产生式中的符号都存在空串,那么直接Flag为true,表示可以向产生式的first集合中插入空串
}
string_iter++;
}
if (Flag)
firsts.insert("~");
}
firstOfProduction.insert({map_iter->first + "->" + *product_iter, firsts});
product_iter++;
}
map_iter++;
}
return firstOfProduction;
}
table getParsingTable(string *grammar) {
table parsing_table; // parsing_table,也就是select集合
multimap production;
production = init(production, grammar);
multimap FIRST;
initFirst(production, FIRST);
multimap FOLLOW;
initFollow(production, FIRST, FOLLOW);
multimap firstOfProduction = getFirstOfProduction(production, FIRST);
auto iter = production.begin();
// 输出FIRST集合
cout << "FIRST set:" << endl;
for (const auto &item: FIRST) {
cout << item.first << " : ";
for (const auto &str: item.second)
cout << str << " ";
cout << endl;
}
cout << endl;
// 输出FOLLOW集合
cout << "FOLLOW set:" << endl;
for (const auto &item: FOLLOW) {
cout << item.first << " : ";
for (const auto &str: item.second)
cout << str << " ";
cout << endl;
}
cout << endl;
// 输出产生式的FIRST集合
cout << "FIRST set of production:" << endl;
for (const auto &item: firstOfProduction) {
cout << item.first << " : ";
for (const auto &str: item.second)
cout << str << " ";
cout << endl;
}
cout << endl;
while (iter != production.end()) { // 遍历所有产生式
// 查找产生式的first集合
auto set_iter = iter->second.begin();
while (set_iter != iter->second.end()) {
// set_iter为产生式集合的迭代器。
string queryString = iter->first + "->" + *set_iter;
for (auto &item: firstOfProduction[queryString]) { // 返回产生式右部的FIRST集合。item为终结符
if (isInSet(Terminals, item)) {
parsing_table.insert({iter->first + "&" + item, queryString});
}
}
if (isInSet(FIRST[iter->first], "~")) { // 如果FIRST(S)中存在空串,那么对于FOLLOW(S)中的每个字符,添加S->~到parsing table
for (const auto &item: FOLLOW[iter->first]) {
parsing_table.insert({iter->first + "&" + item, iter->first + "->" + "~"});
if (isInSet(FOLLOW[iter->first], "#")) {
parsing_table.insert({iter->first + "&" + "#", iter->first + "->~"});
}
}
}
set_iter++;
}
iter++;
}
// 输出parsing_table
cout << "parsing_table :" << endl;
for (const auto &item: parsing_table) {
cout << item.first << " : " << item.second << endl;
}
cout << endl;
return parsing_table;
}
string getStackContent(stack<string> &Stack) {
/**
* @params:Stack:分析栈
* @return:stackContent:分析栈的内容
* @function:在对于原
*/
string stackContent;
stack temp_stack(Stack);
while (!temp_stack.empty()) {
stackContent += temp_stack.top();
temp_stack.pop();
}
std::reverse(stackContent.begin(), stackContent.end());
return stackContent;
}
string getRestInput(string &str, string::iterator iter) {
/**
* @params:str:输入串;iter:输入串的迭代器
* @return:restInput:剩余的输入串
* @function:在对于原
*/
string restInput;
while (iter != std::end(str)) {
restInput += *iter;
iter++;
}
return restInput;
}
string getMatchedInput(string &str, string::iterator iter) {
/**
* @params:str:输入串;iter:输入串的迭代器
* @return:matchedInput:匹配的输入串
* @function:在对于原
*/
if (iter == std::begin(str))
return "";
else {
string matchedInput;
auto begin_iter = std::begin(str);
while (begin_iter != iter) {
matchedInput += *begin_iter;
begin_iter++;
}
return matchedInput;
}
}
void LL1(string &str, string *grammar) {
/**
* @params:str:该文法的句子;grammar:string*,文法
* @return:none
* @function:根据指定文法对输入串进行分析
*/
stack<string> Stack; // 分析栈
table parsingTable;
parsingTable = getParsingTable(grammar);
cout << "Input String:" << str << endl;
cout << setw(20) << std::left << "Matched" << setw(20) << std::left << "Stack" << setw(20) << std::left << "Input"
<< setw(20) << std::left << "Action" << endl;
auto iter = std::begin(str);
Stack.emplace("#");
Stack.emplace(START_SYMBOL);
while (Stack.top() != "#") {
string top = Stack.top();
if (isEqual(top, *iter)) { // 栈顶符号与输出穿下一位成功匹配
Stack.pop();
iter++;
cout << setw(20) << std::left << getMatchedInput(str, iter) << setw(20) << std::left
<< getStackContent(Stack) << setw(20)
<< std::left << getRestInput(str, iter)
<< setw(20) << std::left << "Match:" + top << endl;
} else if (isInSet(Terminals, top)) {
cout << "Wrong;" << endl;
} else if (QueryParsingTable(top, char2string(*iter), parsingTable).length() > 0) {
string production = QueryParsingTable(top, char2string(*iter), parsingTable);
Stack.pop();
for (auto it = std::end(production) - 1; it != std::begin(production) && *it != '>'; it--)
Stack.emplace(char2string(*it));
cout << setw(20) << std::left << getMatchedInput(str, iter) << setw(20) << std::left
<< getStackContent(Stack) << setw(20) << std::left
<< getRestInput(str, iter)
<< setw(20) << std::left << "Production:" + production << endl;
} else if (!isEqual(top, *iter) && top == "~") {
Stack.pop();
cout << setw(20) << std::left << getMatchedInput(str, iter) << setw(20) << std::left
<< getStackContent(Stack) << setw(20) << std::left
<< getRestInput(str, iter)
<< setw(20) << std::left << "Empty String ~" << endl;
} else {
cout << "Wrong Input String.The Stack top:" << top << ",doesn't match current input symbol-" << *iter
<< endl;
break;
}
}
}
int main() {
string str = "i*i*ii+i#";
string grammar[] = {"E->TG", "G->+TG|-TG", "G->~", "T->FS", "S->*FS|/FS", "S->~", "F->(E)", "F->i", "end"};
LL1(str, grammar);
}
06-27
06-27