编译原理 实验3 基于LR(0)方法的语法分析

#include<cstdio>
#include<map>
#include<iostream>
#include<fstream>
#include<iomanip>
#include<vector>
#include<set>
#include<string>
#include<algorithm>
using namespace std;
const int wordnumPerLine = 7;
bool writeFile = false,isErr = false;
FILE* grammerFile;
char fname[50], outfname[50], c, reply[50];
string analyzedString;
vector<string> syms;

#define readLine if( fscanf(grammerFile,"%[^\n]%c",theLine,&c)==EOF)	return;

struct Production {
	string left;
	vector<string>right;
	bool isEmpty = false;
	int dotPos;
	string getRight() {
		string ans = "";
		for (int i = 0; i < right.size() - 1; i++)
			ans += right[i] + " ";
		if (!right.empty()) {
			ans += right.back();
		}
		return ans;
	}
	string getProduction() {
		return left + "->" + getRight();
	}
	Production() {
		dotPos = 0;
	}
	string getItem() {
		string retStr = left+"->";
		if (dotPos == 0) {
			retStr += "· ";
		}
		for (int i = 0; i < right.size(); i++) {
			retStr += right[i]+" ";
			if (dotPos==i+1){
				retStr += "· ";
			}
		}
		return retStr;
	}
	bool operator == (Production other) {
		return getProduction()==other.getProduction()&&dotPos==other.dotPos;
	}
};
struct Closure {
	vector<Production> items;//项目集集合
	int coreLen = 0;	//核项目的长度
	vector<pair<string, int> >next;//pair< Action或goto的字符串,到达的状态编号>的数组
	bool guiYue = false;//记录是否可规约
	bool operator ==(Closure other) {//判断相等只比较核项目
		if (coreLen != other.coreLen)//核项目长度不等,return false
			return false;
		for (int i = 0; i < coreLen; i++) {//分别比较每一项,此处利用了Production对运算符==的重载
			if (!(items[i] == other.items[i])) {//不确定重载==是否会重载!= 此处保险起见这么写
				return false;
			}
		}
		return true;
	}
	string getClosure() {
		string ansStr = "";
		for (int i = 0; i < items.size(); i++) {
			ansStr += items[i].getItem() + "\n";
		}
		for (int i = 0; i < next.size(); i++) {
			ansStr += next[i].first + "/" + to_string(next[i].second)+" ";
		}
		return ansStr;
	}
	
};

struct Description
{
	vector<int> state;		//状态栈
	string symbol;	//符号
	string inputString;//输入串
	string action;
	string GOTO;
	Description() {}
	Description(vector<int> state,string symbol,string inputString, string action,string GOTO) {
		this->state = state;
		this->symbol = symbol;
		this->inputString = inputString;
		this->action = action;
		this->GOTO = GOTO;
	}
};

struct Grammer
{
	vector<Production> productions;//文法产生式
	vector<string> nonTerminal;//文法非终结符
	vector<string> terminal;//文法终结符
	string startSymbol = "";//开始符号
	map<pair<int, string>, string> predictionMap;//状态i遇到A/a时,Action或Goto为j
	vector<Closure>itemSet;//整个DFA
	vector<Description> description;//分析过程
	friend ostream & operator <<(ostream & os, const Grammer & g);
}grammer;

ostream & operator <<(ostream & os, Grammer & g) {
	filebuf fb;
	ostream* pos;
	if (writeFile) {
		fb.open(outfname, ios::out);
		pos = new ostream(&fb);
	}
	else {
		pos = &os;
	}
	*pos << "非终结符:\n";
	for (int i = 0; i < g.nonTerminal.size(); i++) {
		*pos << g.nonTerminal[i] << "\t" << " ";
		if ((i + 1) % wordnumPerLine == 0)
			*pos << endl;
	}
	*pos << endl;
	*pos << "终结符:\n";
	for (int i = 0; i < g.terminal.size(); i++) {
		*pos << g.terminal[i] << "\t" << " ";
		if ((i + 1) % wordnumPerLine == 0)
			*pos << endl;
	}
	*pos << endl;
	*pos << "文法:\n";
	for (int i = 0; i < g.productions.size(); i++) {
		*pos << g.productions[i].left << "->";
		for (int j = 0; j < g.productions[i].right.size(); j++) {
			*pos << " " << g.productions[i].right[j];
		}
		*pos << endl;
	}	
	*pos << "得到LR(0)项目集规范族:\n";
	for (int i = 0; i < g.itemSet.size(); i++) {
		*pos << "I" + to_string(i) << ":" << g.itemSet[i].getClosure()<<endl;
	}
	*pos << "得到LR(0)分析表:\n";
	*pos <<left<< setw(6)<< "状态"<<setw(6*(g.terminal.size()+1))<<"|ACTION"<< setw(6 * g.nonTerminal.size()) << "|GOTO"<<endl;
	for (int i = 0; i < g.itemSet.size(); i++) {
		*pos << setw(6) << i;
		for (int j = 0; j < syms.size(); j++) {
			if (g.predictionMap.find(pair<int, string>(i, syms[j]))!= g.predictionMap.end()){
				if (j==0||j==g.terminal.size()+1){
					*pos << setw(6) << "|"+g.predictionMap[pair<int, string>(i, syms[j])];
				}
				else
					*pos << setw(6) << g.predictionMap[pair<int, string>(i, syms[j])];
			}
			else {
				if (j == 0 || j == g.terminal.size() + 1) {
					*pos << setw(6) << "|";
				}
				else
					*pos << setw(6) << " ";
			}
		}
		*pos << endl;
	}
	*pos << "得到"+analyzedString+"LR(0)分析过程:\n";
	*pos << left << setw(4) << "状态" << setw(18) << "|状态栈" << setw(18) << "|符号栈"<< setw(18) << "|输入串"<< setw(18) << "|ACTION"<<setw(18) << "|GOTO" << endl;
	for (int i = 0; i < g.description.size(); i++) {
		*pos << left << setw(4) << i << setw(18);
		string symStr;
		for (int j = 0; j < g.description[i].state.size(); j++) {
			symStr += g.description[i].state[j] > 9 ? "(" + to_string(g.description[i].state[j]) + ")" : to_string(g.description[i].state[j]);
		}
		*pos<< "|" + symStr<< setw(18) << "|"+g.description[i].symbol << setw(18) << "|"+ g.description[i].inputString << setw(18) << "|"+ g.description[i].action << setw(18) << "|"+ g.description[i].GOTO << endl;
	}
	if (writeFile)
		fb.close();
	return *pos;
}

#pragma region 工具函数
template <class T>
inline int isInVector(vector<T> vec, T val) {//函数功能在vec中查找val,若找到返回下标+1,没找到就返回0
	for (int i = 0; i < vec.size(); i++) {
		if (val == vec[i])
			return i + 1;//为了与0区分,返回编号+1
	}
	return 0;
}

inline bool isNoneTerminal(string x) {
	return x.length() > 0 && x[0] >= 'A'&&x[0] <= 'Z';
}
inline vector<string> splitString(string oriString, char splitChar = ' ') {
	vector<string> vec;
	oriString += splitChar;
	int splitpos;
	while ((splitpos = oriString.find(splitChar)) != string::npos) {
		vec.push_back(oriString.substr(0, splitpos));
		oriString = oriString.substr(splitpos + 1);
	}
	return vec;
}
inline string vectorToString(vector<string> vec, bool reverse = false) {
	string ansStr = "";
	if (reverse) {
		for (int i = vec.size() - 1; i >= 0; i--) {
			ansStr += vec[i] + " ";
		}
	}
	else {
		for (int i = 0; i < vec.size(); i++) {
			ansStr += vec[i] + " ";
		}
	}
	return ansStr;
}
int getGuiyueIndex(Closure closure) {
	Production production = closure.items[0];
	production.dotPos = 0;
	int index = isInVector<Production>(grammer.productions, production) - 1;
	return index;
}
#pragma endregion

Production lineToProduction(string line) {
	Production production;
	string rightPart = "";//当前产生式右部的一个单词
	int leftIndex = line.find_first_of('-');
	production.left = line.substr(0, leftIndex);
	if (grammer.startSymbol == "")//开始符号为空则将production.left做为开始符号
		grammer.startSymbol = production.left;
	line = line.substr(leftIndex + 2);//当前line为只剩右部的字符串
	line += ' ';
	if (line[0] == '#') {//右部为空
		production.isEmpty = true;
		//production.right.push_back("#");
		return production;
	}
	for (int i = 0; i < line.length(); i++) {
		if (line[i] == ' ') {
			if (isNoneTerminal(rightPart)) {//非终结符以大写字母开头
				if (!isInVector<string>(grammer.nonTerminal, rightPart)) {//此非终结符不在文法非终结符里面
					grammer.nonTerminal.push_back(rightPart);
				}
			}
			else {//终结符以小写字母开头
				if (!isInVector<string>(grammer.terminal, rightPart)) {//此终结符不在文法终结符里面
					grammer.terminal.push_back(rightPart);
				}
			}
			production.right.push_back(rightPart);
			rightPart = "";
			continue;
		}
		rightPart += line[i];
	}
	return production;
}

string getErrorInfo(char index) {
	switch (index) {
		case 'A':
			return "错误类型[A]:dotpos超出界限;";
		case 'B':
			return "错误类型[B]:该文法不是LR(0)文法;";

	}

}


void getInput() {
	char theLine[100];//这一行产生式
	while (true) {
		readLine;
		Production production = lineToProduction(theLine);
		grammer.productions.push_back(production);
		if (!isInVector<string>(grammer.nonTerminal, production.left))
			grammer.nonTerminal.push_back(production.left);
		memset(theLine, 0, sizeof(theLine));
	}
}

void getItemSet() {
	Closure nowClosure;
	nowClosure.items.push_back(grammer.productions[0]);
	grammer.itemSet.push_back(nowClosure);
	int tp = 0;//指向In
	while (tp< grammer.itemSet.size()){
		int np = 0;//当前closureI的第几项的指针
		nowClosure = grammer.itemSet[tp];
		//判断当前Closure(I)是否含有可规约串
		for (int i = 0; i < nowClosure.items.size(); i++) {
			if (nowClosure.items[i].dotPos== nowClosure.items[i].right.size()){//说明可归约
				//查看是否有移入规约冲突或规约规约冲突
				if (nowClosure.items.size() > 1) {//LR(0)文法,当有可规约串,而当前closure的大小大于0说明会产生冲突
					cout << getErrorInfo('B') << endl;			
					isErr = true;
					return;//下同,发现error直接结束函数
				}
				else {
					//进行规约
					grammer.itemSet[tp].guiYue = true;
					break;
				}
			}
			else if(nowClosure.items[i].dotPos> nowClosure.items[i].right.size()){
				cout << getErrorInfo('A') << endl;
				isErr = true;
				return;
			}
		}
		if (grammer.itemSet[tp].guiYue) {
			++tp;
			continue;
		}

		//扩充当前Closure(I)
		while (np< nowClosure.items.size()){
			if (isNoneTerminal(nowClosure.items[np].right[nowClosure.items[np].dotPos])){//点在非终结符前
				for (int i = 0; i < grammer.productions.size(); i++) {
					if (nowClosure.items[np].right[nowClosure.items[np].dotPos]== grammer.productions[i].left&&!isInVector<Production>(nowClosure.items, grammer.productions[i])){//如果左部相等且当前ClosureI没有该项目
						nowClosure.items.push_back(grammer.productions[i]);//压入当前产生式
					}
				}			
			}
			++np;
		}
		grammer.itemSet[tp] = nowClosure;
		//根据当前Closure(I)扩展其他Closure
		map<string, Closure> nextState;//存储当前Closure能扩展出的Closure
		vector<string> alltransSym;//所有能发生动作或跳转的符号
		//计算nextState
		for (int i = 0; i < nowClosure.items.size(); i++) {
			Production production = nowClosure.items[i];
			string transSym = nowClosure.items[i].right[nowClosure.items[i].dotPos];
			if (!isInVector(alltransSym, transSym)) {
				alltransSym.push_back(transSym);
			}
			++production.dotPos;
			if (!isInVector(nextState[transSym].items, production)) {
				nextState[transSym].items.push_back(production);
			}
			
		}
		for (int i = 0; i < alltransSym.size(); i++) {
			nextState[alltransSym[i]].coreLen = nextState[alltransSym[i]].items.size();
			int index = isInVector(grammer.itemSet, nextState[alltransSym[i]]);
			if (index) {//找到了
				grammer.itemSet[tp].next.push_back(pair<string,int>(alltransSym[i],index-1));//直接连上线
			}
			else {//没找到
				grammer.itemSet.push_back(nextState[alltransSym[i]]);//将此Closeure添加到DFA中
				grammer.itemSet[tp].next.push_back(pair<string,int>(alltransSym[i], grammer.itemSet.size()-1));//连上线
			}
		}
		++tp;
	}
}

void getPredict() {
	syms.assign(grammer.terminal.begin(), grammer.terminal.end());
	syms.push_back("#");
	for (auto s:grammer.nonTerminal) {
		if (s != grammer.startSymbol) {
			syms.push_back(s);
		}
	}
	//map<pair<int, string>, string> predictionMap;//状态i遇到A/a时,Action或Goto为...
	for (int i = 0; i < grammer.itemSet.size(); i++) {//该循环将规约填入表中
		if (grammer.itemSet[i].guiYue){//如果能规约
			if (grammer.itemSet[i].items[0].left == grammer.startSymbol) {//左部为开始符号,acc
				grammer.predictionMap[pair<int, string>(i, "#")] = "acc";
			}
			else {//赋规约的编号
				for (auto s : syms) {
					if (isNoneTerminal(s))
						break;
					string val = "r" +to_string(getGuiyueIndex(grammer.itemSet[i]));
					grammer.predictionMap[pair<int, string>(i, s)] = val;
				}
			}
		}
	}
	for (int i = 0; i < grammer.itemSet.size();i++) {
		for(int j = 0;j< grammer.itemSet[i].next.size();j++)
			grammer.predictionMap[pair<int, string>(i, grammer.itemSet[i].next[j].first)] = (isNoneTerminal(grammer.itemSet[i].next[j].first)? "":"S") +to_string( grammer.itemSet[i].next[j].second);
	}
}
/*
1.若Aciton[S,a]=Sj,a->符号栈,j->状态栈
2.若Aciton[S,a]=rj,第j个产生式规约,两者指针减去右部符号串的长度,在GOTO[S,A]
3.GOTO[S,A]=j,A->符号栈,j->状态栈
4.Acion[S,a]为空白,移入出错处理
5.若Aciton[S,a]=acc,接受
*/

void analyzeString() {
	vector<int> stateStack;//状态栈
	vector<string> symbolStack;//符号栈
	vector<string> inputStr = splitString(analyzedString);//输入串
	inputStr.push_back("#");
	stateStack.push_back(0);
	symbolStack.push_back("#");
	while (true){
		Description description(stateStack,vectorToString(symbolStack), vectorToString(inputStr),"","");
		string action = grammer.predictionMap[pair<int, string>(stateStack.back(), inputStr.front())];
		if (action[0] == 'S') {//Action
			stateStack.push_back(stoi(action.substr(1)));
			symbolStack.push_back(inputStr.front());
			inputStr.erase(inputStr.begin());
			description.action = action;
			grammer.description.push_back(description);
		}
		else if (action[0] == 'r') {//规约
			int productionIndex = stoi(action.substr(1));
			int popNum = grammer.productions[productionIndex].right.size();
			if (popNum>stateStack.size()){
				cout << "err:此句子不是本文法的句子" << endl;
				break;
			}
			while (popNum--){
				stateStack.pop_back();
				symbolStack.pop_back();
			}

			symbolStack.push_back(grammer.productions[productionIndex].left);//
			string goTo = grammer.predictionMap[pair<int, string>(stateStack.back(), symbolStack.back())];
			if (goTo[0] >= '0'&&goTo[0] <= '9'){//进行GOTO
				description.GOTO = goTo;
				stateStack.push_back(stoi(goTo));
			}
			else {
				cout << "err:此句子不是本文法的句子" << endl;
				break;
			}
			description.action = action;//填入该条action
			grammer.description.push_back(description);

		}
		else if (action == "acc") {//接受
			description.action = action;
			grammer.description.push_back(description);
			break;
		}
		else {
			
			description.action = action;
			grammer.description.push_back(description);
			cout << "err:此句子不是本文法的句子" << endl;
			break;
		}
		//grammer.description.back().action = grammer.predictionMap[pair<int, string>(des.state, des.symbol)];
	}
}

int main() {
	printf("Input grammer file?\n");
	scanf("%s", fname);
	printf("Write in file?(Y/N)\n");
	scanf("%s", reply);
	if (reply[0] == 'Y' || reply[0] == 'y') {
		printf("Write in file?(Y/N)\n");
		scanf("%s", outfname);
		writeFile = true;
	}
	grammerFile = fopen(fname, "r");
	if (&grammerFile) {//文件读取成功
		getInput();//读入文法生成grammer
	}
	getItemSet();//构造DFA
	getPredict();
	if (isErr)
		return 0;
	printf("Input the string?\n");
	getchar();
	getline(cin, analyzedString);
	analyzeString();
	cout << grammer;
	getchar();
	getchar();
	return 0;
}
//begin d ; s end
//b c c d

测试文法一 t1.txt(以右侧产生式以空格隔开,未终结符以大写字母开头)

S'->E
E->a A
E->b B
A->c A
A->d
B->c B
B->d

句子(空格隔开):b c c d
运行结果:
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
测试文法二 t2.txt(以右侧产生式以空格隔开,未终结符以大写字母开头)

Program->Block
Program->CompoundStatement
Block->BlockHead ; CompoundTail
BlockHead->begin d
BlockHead->Blockhead ; d
CompoundTail->s end
CompoundTail->s ; CompoundTail
CompoundStatement->begin CompoundTail

句子(空格隔开):begin d ; s end
运行结果:
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

在这里插入图片描述

  • 5
    点赞
  • 45
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
LR语法分析器是编译原理中的一种语法分析方法,它可以根据给定的文法规则,对输入的程序代码进行分析和解析。LR语法分析器使用自底向上的分析方法,通过构建一个状态机来识别输入的符号串是否符合给定的文法规则。 LR语法分析器的实现通常使用工具,如Bison(Yacc的GNU版本),它可以根据给定的文法规则自动生成相应的语法分析器。LR语法分析器的工作原理如下: 1. 构建LR分析表:根据给定的文法规则,LR语法分析器会构建一个分析表,该表记录了在不同状态下,对应不同输入符号的移进、规约或接受操作。 2. 状态转移:LR语法分析器通过状态转移来处理输入符号串。它从初始状态开始,根据当前状态和下一个输入符号,查找分析表中对应的操作,并执行相应的移进或规约操作。 3. 移进操作:当遇到终结符号时,LR语法分析器会将该符号移入栈中,并读取下一个输入符号。 4. 规约操作:当遇到非终结符号时,LR语法分析器会根据文法规则进行规约操作,将栈中的符号按照规约规则进行替换。 5. 接受操作:当输入符号串被完全分析并符合文法规则时,LR语法分析器会执行接受操作,表示输入符号串被成功地分析和解析。 通过使用LR语法分析器,可以对程序代码进行语法分析,并生成相应的抽象语法树(AST)。抽象语法树可以用于后续的语义分析和代码生成等编译过程。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值