还是先把代码给贴出来,回头再找时间写篇文章总结一下,分析一下数据结构和算法
#pragma once
#include <iostream>
#include <algorithm>
#include <fstream>
#include <map>
#include <set>
#include <string>
#include <stack>
using namespace std;
/*----------------------------------------全局变量存放终结符和非终结符---------------------------------------------------*/
set<string> non_terminal; //存放非终结符
set<string> productions; //存放产生式
std::map<string, string> match_map; //存放非终结符和其对应的产生式的文法的键值对
std::map<string, set<string>> first; //string:非终结符;set<string>:非终结符所对应的first集合
std::map<string, set<string>> follow; //string:非终结符;set<string>:非终结符所对应的follow集合
bool is_not_changing = false;
void divide_words(string grammar, map<string, string>& match_map) {
for (int i = 0; i < (int)grammar.length(); ++i) {
if (grammar[i] == '-' && grammar[i + 1] == '>') {
/* code */
string left = grammar.substr(0, i); //一句文法的左边即非终结符
string right = grammar.substr(i + 2, grammar.length() - 1); //一句文法的右边即非终结符对应的产生式
non_terminal.insert(left); //插入非终结符集合里
productions.insert(right); //插入产生式集合里
match_map.insert(make_pair(left, right)); //将一句文法里的非终结符和其对应的产生式作为键值对插入到匹配map里
break;
}
}
}
/*将被'|'隔开的产生式拆分成对应多个的单词*/
void divide_right(string grammar_right, set<string>& small_right) {
/*或许可以用grammar.find_first_of一个一个找|,然后用substr分开子串,最后再insert到small_right中去*/
size_t found = grammar_right.find('|');
if (found != string::npos) {
int i = 0;
string temp = "\0";
while ((size_t)i < grammar_right.length()) {
if (grammar_right[i] != '|') {
temp += grammar_right[i];
i = i + 1;
}
else {
i = i + 1;
small_right.insert(temp);
temp = "\0";
}
if (i == grammar_right.length()) {
small_right.insert(temp);
temp = "\0";
}
}
}
else {
small_right.insert(grammar_right);
}
}
/*对每个非终结符non_term寻找它的非终结符集合first*/
void find_first(string non_term, set<string>& first) {
set<string> or_words; //存放产生式中被'|'隔开的单词
auto search = match_map.find(non_term);
if (search != match_map.end()) {
divide_right(search->second, or_words);
//匹配非终结符是否在or_words的开头
for (set<string>::iterator i = or_words.begin(); i != or_words.end(); i++) {
for (set<string>::iterator j = non_terminal.begin(); j != non_terminal.end(); j++) {
if ((*i).find(*j) == 0) { //在or_words[i]的开头找到了一个非终结符
//递归寻找非终结符j的first集合
find_first((*j), first);
}
else { //在or_words[i]的开头如果没有找到非终结符,即终结符
if ((*i)[0] >= 'a' && (*i)[0] <= 'z') {
first.insert(*i);
}
switch ((*i)[0]) {
case '(':
first.insert(string("("));
break;
case ')':
first.insert(string(")"));
break;
case '+':
first.insert(string("+"));
break;
case '*':
first.insert(string("*"));
break;
case '#':
first.insert(string("#"));
break;
default: //如果没有匹配到符号的话就把这个单词插入到first集合中
//first.insert(*i);
break;
}
continue; //找到之后跳出循环,避免进行多余的遍历浪费时间
}
}
}
}
}
//对非终结符的follow集进行初始化,开始符号的follow集初始化成{$},其余的初始化成空集
void initial_follow() {
for (set<string>::iterator i = non_terminal.begin(); i != non_terminal.end(); i++) {
if (i == non_terminal.begin()) {
set<string> startFollow;
startFollow.insert("$");
auto pair = make_pair(*i, startFollow);
follow.insert(pair);
}
set<string> temp_follow;
auto pair = make_pair(*i, temp_follow);
follow.insert(pair);
}
}
//判断一个非终结符的first集合中是不是含有空串#
bool first_contains_null(set<string> &first) {
auto find = first.find("#");
if (find != first.end()) {
return true;
}
return false;
}
//判断一个字符串str是否是非终结符,如果是返回true,否则返回false
bool is_non_terminal(string str) {
auto find = non_terminal.find(str);
if (find != non_terminal.end()) {
return true;
}
return false;
}
bool is_letter(char a) { //是否是小写字母
if (a >= 'a' && a <= 'z') {
return true;
}
return false;
}
bool is_cap_letter(char a) {
return (a >= 'A' && a <= 'Z') ? true : false;
}
//返回一个产生式的右部str的最后一个终结符或者非终结符
string find_last(string &str) {
if ("\0" == str) {
return "\0";
}
if ('\'' == str.at(str.size() - 1)) {
string s = str.substr(str.size() - 2, 2);
str = str.substr(0, str.size() - 2);
return s;
}
else if (is_letter( str.at(str.size() - 1) ) && is_letter( str.at( str.size() - 2) ) ) {
string s = str.substr(str.size() - 2, 2);
str = str.substr(0, str.size() - 2);
return s;
}
else {
string s = str.substr(str.size() - 1, 1);
str = str.substr(0, str.size() - 1);
return s;
}
}
int cal_follow_total_size() { //计算所有follow集合的总size
int total_size = 0;
for (map<string, set<string>>::iterator i = follow.begin(); i != follow.end(); i++) {
total_size += i->second.size();
}
return total_size;
}
void find_follow(std::map<string, set<string>>& Follow) {
while (!is_not_changing) {
int fomer_size = cal_follow_total_size();
for (std::map<string, string>::iterator i = match_map.begin(); i != match_map.end(); i++) {//对每一个产生式进行遍历
set<string> or_words;
string left = (*i).first; //左边的非终结符A
string right = (*i).second; //右边的产生式A->b1b2b3B...
divide_right(right, or_words);
for (set<string>::iterator j = or_words.begin(); j != or_words.end(); j++) {
set<string> temp = Follow.find(left)->second;
string str;
string word = *j;
for (; word != "\0"; ) {
str = find_last(word);
if (!is_non_terminal(str)) { //是终结符
temp.clear();
temp.insert(str);
}
else {
for (set<string>::iterator k = temp.begin(); k != temp.end(); k++) {
if ("#" != (*k)) {
(Follow.find(str)->second).insert(*k);
}
}
if (!first_contains_null(first.find(str)->second)) {
temp = first.find(str)->second;
}
else {
for (set<string>::iterator m = first.find(str)->second.begin(); m != first.find(str)->second.end(); m++) {
temp.insert(*m);
}
}
}
}
}
}
//判断是否发生变化
int latter_size = cal_follow_total_size();
is_not_changing = fomer_size == latter_size ? true : false;
}
}
set<string> find_first_s(string left, string production) {
set<string> FIRST_S;
for (size_t i = 0; i < production.size(); i++) {
if (is_cap_letter(production.at(i))) {
string M;
if ('\'' == production.at(i + 1)) {
M = production.substr(i, 2);
}
else {
///M = "" + production.at(i);
M = production.at(i);
}
set<string> M_FIRST;
find_first(M, M_FIRST);
for (set<string>::iterator j = M_FIRST.begin(); j != M_FIRST.end(); j++) {
FIRST_S.insert(*j);
}
if (!first_contains_null(M_FIRST)) {
return FIRST_S;
}
}
else {
if (is_letter(production.at(i))) {
if (is_letter(production.at(i + 1))) {
FIRST_S.insert(production.substr(i, 2));
}
else {
FIRST_S.insert(production.substr(i, 1));
}
}
else {
string _temp_;
_temp_ = production.at(i);
FIRST_S.insert(_temp_);
}
return FIRST_S;
}
}
FIRST_S = follow.find(left)->second;
return FIRST_S;
}
int main() {
/*读取文法文件*/
const char* filename = "wenfa.txt";
ifstream inFile(filename);
if (!inFile) {
cout << "\nFiled to open file " << filename;
return -1;
}
string st = "\0";
char buf[100];
while (!inFile.eof()) {
inFile.getline(buf, 20);
st = buf;
if (strlen(buf) == 0 || st == "end") {
break;
}
divide_words(st, match_map); //对每一行文法进行分析找出非终结符和对应的产生式
}
inFile.close();
/*遍历非终结符集合,为每个非终结符寻找first集合*/
for (set<string>::iterator i = non_terminal.begin(); i != non_terminal.end(); ++i) {
set<string> the_first; //当前非终结符的first集合
find_first(*i, the_first);
first.insert(make_pair(*i, the_first));
}
cout << "非终结符" << "\t" << "First集合" << endl;
for (map<string, set<string>>::iterator i = first.begin(); i != first.end(); i++) {
cout << "-------------------------" << endl;
cout << i->first << "\t|\t";
cout << "{ ";
//倒序输出first集合中的元素与文法中出现的顺序保持一致
for (set<string>::reverse_iterator j = (i->second).rbegin(); j != (i->second).rend(); j++) {
cout << *j << ", ";
}
cout << "\b\b }";
cout << endl;
}
cout << endl;
initial_follow();
find_follow(follow);
cout << "非终结符" << "\t" << "Follow集合" << endl;
for (map<string, set<string>>::iterator i = follow.begin(); i != follow.end(); i++) {
cout << "------------------------------" << endl;
cout << i->first << "\t|\t";
cout << "{ ";
//倒序输出first集合中的元素与文法中出现的顺序保持一致
for (set<string>::reverse_iterator j = (i->second).rbegin(); j != (i->second).rend(); j++) {
cout << *j << ", ";
}
cout << "\b\b }";
cout << endl;
}
return 0;
/********输出FIRST_S集合,这个函数我没写但是经验证结果正确-_-懒……********/
}