编译原理C++实现LR0语法分析器(课程设计报告)

环境:VS2019debug模式

准备:在代码目录下新建三个文件分别是fsym.txt存储终结符,以end结尾,注意#;nfsym.txt存储非终结符以end结尾,expression.txt存储增广文法(A空格->空格B)。

例如:

fsym.txt中存储

a b c d # end

nfsym.txt中存储

E A B end

expression.txt中存储

S' -> E
E -> aA
E -> bB
A -> cA
A -> d
B -> cB
B -> d

注意:输入以#结尾的待分析串。

报告中因为部分图片无法上传就放到百度网盘中自行下载参考(永久有效)。

链接:https://pan.baidu.com/s/1CaAIBcoEzjcKtPYnl3XGQQ
提取码:8lns

程序的代码设计及注释

#include<iostream>
#include<string>
#include<cstring>
#include<map>
#include<fstream>
#include<vector>
#include<stack>
#include<iomanip>
using namespace std;

string fsym[200];
int flag1 = 0;
string nfsym[200];
int flag2 = 0;

struct production
{
	string left;
	string  right;
};
vector<production*>p;

struct DFA//状态转换图的一个状态
{
	int id;
	vector<production*>p;
	int pre;
	char prea;
	vector<char> nexta;
	vector<int>nextn;//与下一个“int”状态,通过char推导
};
vector<DFA*>itemState;

map<int, map<string, string>>ACTION;//ACTION表
map<int, map<string, int>>GOTO;     //GOTO表

stack<string>symbol;//符号栈
stack<int>status; //状态栈

void readFile()
{
	ifstream infile;
	infile.open("fsym.txt", ios::in);
	if (!infile) 
	{
		cout << "不能打开终结符文件,请将在程序存储路径下建立“fsym.txt”存储终结符" << endl;
		system("pause");
		exit(-1);
	}
	string tmp;
	while (!infile.eof())
	{
		infile >> tmp;
		if (tmp == "end")
			break;
		fsym[flag1++] = tmp;
	}
	cout << "读入的终结符如下:" << endl;
	for (int i = 0; i < flag1; i++)
		cout << fsym[i] << " ";
	cout << endl;
	infile.close();//从文件读入终结符
	infile.open("nfsym.txt", ios::in);
	if (!infile)
	{
		cout << "不能打开非终结符文件,请将在程序存储路径下建立“nfsym.txt”存储终结符" << endl;
		system("pause");
		exit(-1);
	}
	while (!infile.eof())
	{
		infile >> tmp;
		if (tmp == "end")
			break;
		nfsym[flag2++] = tmp;
	}
	cout << "读入的非终结符如下:" << endl;
	for (int i = 0; i < flag2; i++)
		cout << nfsym[i] << " ";
	cout << endl;
	infile.close();//从文件读入非终结符
	infile.open("expression.txt", ios::in);
	if (!infile)
	{
		cout << "不能打开表达式文件,请将在程序存储路径下建立“expression.txt”存储终结符" << endl;
		system("pause");
		exit(-1);
	}
	while (!infile.eof()) {/*读到产生式的左部,  右部*/

		production* tmp = new production();
		string s1, s2, s3;
		infile >> s1 >> s2 >> s3;
		tmp->left = s1;

		tmp->right = s3;
		p.push_back(tmp);
	}
	cout << "从文件读入的文法式如下:" << endl;
	for (int i = 0; i < p.size(); i++)
	{
		cout << p[i]->left << " -> " << p[i]->right << endl;
	}
}

int getDotPos(const string& str)//获得 . 的位置
{
	int length = str.length();
	for (int i = 0; i < length; i++) {
		if (str[i] == '*')
			return i;
	}
	return -1;

}

bool check(string &x,string &r, vector<string> &left, vector<string> &right)
{
	for (int i = 0; i < left.size(); i++)
	{
		if (x == left[i] && r == right[i])
		{
			return false;
		}
	}
	return true;
}

void closesure(vector<string> &left, vector<string> &right,string x)//求闭包

{
	int sz = p.size();
	for (int i = 0; i < sz; i++) 
	{
		if (x == p[i]->left)//找到x所在的产生式子,加入新状态下的项目
		{
			string r= "*" + p[i]->right;
			if (check(x, r,left,right))//与已有的闭包中的某个表达式不重复
			{
			left.push_back(x);
			right.push_back(r);
			if (p[i]->right[0] >= 'A' && p[i]->right[0] <= 'Z') 
			{
				string nextArg = "";
				nextArg += p[i]->right[0];
				closesure(left, right, nextArg);//递归扩展
			}
			}
		}
	}
	return;
}

void extendFirstState(DFA *&x)
{
	string rg = x->p[0]->right;//该项目右部串比如A->*E,获得*E
	int length = rg.length();
	int pos = getDotPos(rg);
	if (pos < length - 1 && (x->p[0]->right[pos + 1] >= 'A' && x->p[0]->right[pos + 1] <= 'Z')) //如果黑点后面是非终结符
	{
		vector<string>tmpLeft;
		vector<string>tmpRight;
		string Arg = "";
		Arg += x->p[0]->right[pos + 1];
		closesure(tmpLeft, tmpRight, Arg);//扩展该项目
		int sz = tmpLeft.size();
		for (int i = 0; i < sz; i++)//加入状态x
		{
			production* t = new production;
			t->left = tmpLeft[i];
			t->right = tmpRight[i];
			x->p.push_back(t);
		}
	}
	return;
}

void initFirstState()
{
	int id = itemState.size();
	DFA* dfa = new DFA;
	dfa->id = id;
	production* pro = new production;
	pro->left = p[0]->left;	
	pro->right = "*" + p[0]->right;
	dfa->p.push_back(pro);
	itemState.push_back(dfa);
	extendFirstState(dfa);
}

bool canMoveDot(const string& str) {// 看 . 是否移动到了最右边
	int length = str.length();
	if (str[length - 1] == '*')
		return false;
	return true;

}

void moveDot(string& str) 
{//把点往右移动一位
	int length = str.length();
	int pos = getDotPos(str);
	if (pos == length - 1) 
	{
		return;
	}
	string tmp = "";
	for (int i = 0; i < pos; i++)
		tmp += str[i];
	tmp += str[pos + 1];
	tmp += '*';
	for (int i = pos + 2; i < length; i++)
		tmp += str[i];
	str = tmp;
	return;

}

void createItem(string lf, string rg) {//创建一个新状态,新状态的首行是lf -> (rg右移一个黑点的串
	DFA* tmp = new DFA();
	production* p = new production;
	tmp->id = itemState.size();
	moveDot(rg);//改变右部点位置再压入右部
	p->left = lf;
	p->right = rg;
	tmp->p.push_back(p);
	itemState.push_back(tmp);//加入集合
}

int  excludeDupli()//去掉重复的状态
{
	int sz = itemState.size();
	for (int i = 0; i < sz - 1; i++) {
		if (itemState[i]->p[0]->left == itemState[sz - 1]->p[0]->left && itemState[i]->p[0]->right == itemState[sz - 1]->p[0]->right)
		{
			itemState.pop_back(); 
			{
				return i;
			}
		}
	}
	return -1;
}

void extendFirstLine(DFA*& x) 
{//扩展状态的首行项目
	string rg = x->p[0]->right;//该项目右部串比如A->*E,获得*E
	int length = rg.length();
	int pos = getDotPos(rg);
	if (pos < length - 1 && (x->p[0]->right[pos + 1] >= 'A' && x->p[0]->right[pos + 1] <= 'Z')) //如果黑点后面是非终结符
	{		
		vector<string>tmpLeft;
		vector<string>tmpRight;
		string Arg = "";
		Arg += x->p[0]->right[pos + 1];
		closesure(tmpLeft, tmpRight, Arg);//扩展该项目

		int sz = tmpLeft.size(); 
		for (int i = 0; i < sz; i++)//加入状态x
		{
			production* t = new production;
			t->left = tmpLeft[i];
			t->right = tmpRight[i];
			x->p.push_back(t);
		}
	}
	return;
}

void checkItem() 
{
	int f=0,f1=0;
	cout << "----------检查所有的状态------------------" << endl;
	int sz = itemState.size();
	for (int i = 0; i < sz; i++) {
		cout << "I" << i << endl;
		for (int j = 0; j < itemState[i]->p.size(); j++) {
			cout << itemState[i]->p[j]->left << "->" << itemState[i]->p[j]->right << endl;
		}
		cout << "------------------" << endl;
	}
	sz = itemState.size();
	for (int j = 0; j < sz; j++)
	{
		if (itemState[j]->p.size() != 1)
		{
			for (int k = 0; k < itemState[j]->p.size(); k++)
			{
				int len = itemState[j]->p[k]->right.length();
				if (itemState[j]->p[k]->right[len - 1] == '*')
					f++;
			}
			if (f == 0)
				cout << "项目I" << j << "不存在冲突" << endl;
			else if (f > 0 && f < itemState[j]->p.size())
			{
			cout << "项目I" << j << "存在移进规约冲突" << endl;
			f1++;
			}
			else
			{
				cout<< "项目I" << j << "存在规约规约冲突" << endl;
				f1++;
			}		
		}
			
	}
	if (f1 == 0)
		cout << "此文法是LR0文法" << endl;
	else
	{
		cout << "此文法不是LR0文法,请在程序目录下修改“expression.txt”" << endl;
		exit(-1);
	}
}

void addItem() //增加一个新的状态
{
	for (int i = 0; i < itemState.size(); i++)
	{	
		int t=0,flag=0;
		for (int j = 0; j < itemState[i]->p.size(); j++) 
		{
			string rg = itemState[i]->p[j]->right;
			if (!canMoveDot(rg))//不可移动黑点了
				continue;
			createItem(itemState[i]->p[j]->left, itemState[i]->p[j]->right);//黑点可以移动则创建一个新的项目
			int dup = excludeDupli();//若有重复则去除重复的状态并返回重复状态的编号,若无重复则返回-1
			if (dup == -1) //状态不是重复的
			{
				extendFirstLine(itemState[itemState.size() - 1]);//用第一行项目扩展
				int dotPos = getDotPos(rg);
				itemState[i]->nextn.push_back( itemState.size() - 1);//记录状态i到状态itemSet.size()-1经过的路径
				itemState[i]->nexta.push_back(rg[dotPos+1]);
				itemState[itemState.size() - 1]->pre = i;
				itemState[itemState.size() - 1]->prea = rg[getDotPos(rg)+1];
				for (int b=0;b<itemState.size()-1;b++)
				{
					if (itemState[b]->pre == itemState[itemState.size() - 1]->pre && itemState[b]->prea == itemState[itemState.size() - 1]->prea)
					{
						for (int k = 0; k < itemState[j]->p.size(); k++)
						{
							itemState[b]->p.push_back(itemState[itemState.size() - 1]->p[k]);
						}
					itemState.pop_back();
					flag = 1;
					break;
					}	
				}
				if (flag == 1)
				{
					itemState[i]->nextn.pop_back();//记录状态i到状态itemSet.size()-1经过的路径
					itemState[i]->nexta.pop_back();
					flag = 0;
				}

			}
			else //状态是和以前的重复的
			{
				int dotPos = getDotPos(rg);
				itemState[i]->nextn.push_back( dup);
				itemState[i]->nexta.push_back(rg[getDotPos(rg)+1]);
			}
		}	
	}
	return;
}

string deleteDot(const string& str)//在初始化表程序中删除串str的小黑点,从而查阅表达式在文法中的位置
{
	int length = str.length();
	string res = "";
	for (int i = 0; i < length; i++) {
		if (str[i] != '*') {
			res += str[i];
		}
	}
	return res;

}

int address(const string& left, const string& right) 
{
	int sz = p.size();
	for (int i = 1; i < sz; i++) {
		if (p[i]->left == left && p[i]->right == right)
			return i;
	}
	return -1;
}

void initTable() //填写ACITON表和GOTO表
{
	for (int i = 0; i < itemState.size(); i++) 
	{
		for (int j = 0; j < flag1; j++)
		{
			ACTION[i][fsym[j]] = "error";
		}
		for (int j = 0; j < flag2; j++) 
		{
			GOTO[i][nfsym[j]] = -1;	//GOTO表全部为-1
		}

	}
	int sz = itemState.size();
	for (int i = 0; i < sz; i++) //先填写移进关系,比如s1, s2到ACTION表和GOTO表
	{
		for (int it = 0; it != itemState[i]->nexta.size(); it++) 
		{
			if (itemState[i]->nexta[it] >= 'a' && itemState[i]->nexta[it] <= 'z') //是移进关系
			{
				string tmp = "";
				tmp += itemState[i]->nexta[it];
				int status = itemState[i]->nextn[it];
				cout << "状态" << i << " 经过" << itemState[i]->nexta[it] << " 到达状态" << status << endl;
				ACTION[i][tmp] = "S" + to_string(status);
			}
			else //规约关系
			{
				string tmp = "";
				tmp += itemState[i]->nexta[it];
				int status = itemState[i]->nextn[it];
				cout << "状态" << i << " 经过" << itemState[i]->nexta[it] << " 到达状态" << status << endl;
				GOTO[i][tmp] = status;
			}
		}
	}
	//找到并填写表的acc
	for (int i = 0; i < sz; i++)
	{
			string rg = itemState[i]->p[0]->right;
			int len = rg.length();
			//cout << rg << endl;
			if (itemState[i]->p.size()==1&& len == 2 && rg[0] == p[0]->right[0] && rg[1] =='*')
			{
			ACTION[i]["#"] = "acc";
			break;
			}
				
	}
	for (int i = 0; i < sz; i++) 
	{
		if (itemState[i]->p.size() == 1 && ACTION[i]["#"] != "acc")
		{//规约项目并且不是acc状态
			string lf = itemState[i]->p[0]->left;
			string rg = itemState[i]->p[0]->right;
			rg = deleteDot(rg);
			int index = address(lf, rg);
			for (int j = 0; j < flag1; j++) {
				ACTION[i][fsym[j]] = "r" + to_string(index+1);
			}
		}
	}
	
}

void checkTable() 
{//检查GOTO表和ACTION表

	cout << "****************************************************" << endl << "ACTION表没有数据的用error填充,GOTO表没有数据的用-1填充" << endl<< "****************************************************"<<endl;
	cout << "-----------ACTION表和GOTO表如下---------------------------" << endl;
	cout << setw(6) << setiosflags(ios::left) << "状态" << "	";
	for (int i = 0; i < flag1; i++) {
		cout << setw(6) << setiosflags(ios::left) << fsym[i] << "	";
	}

	for (int i = 0; i < flag2; i++) {
		cout << setw(6) << setiosflags(ios::left) << nfsym[i] << "	";
	}
	cout << endl;
	for (int i = 0; i < itemState.size(); i++) {
		cout << setw(6) << setiosflags(ios::left) << i << "	";
		for (int j = 0; j < flag1; j++) {
			cout << setw(6) << setiosflags(ios::left) << ACTION[i][fsym[j]] << "	";
		}
		for (int j = 0; j < flag2; j++) {
			cout << setw(6) << setiosflags(ios::left) << GOTO[i][nfsym[j]] << "	";
		}
		cout << endl;
	}


	cout << "******************以上为GOTO表和ACTION表*******************" << endl;

}

string outStatusStack(stack<int>status) {//打印状态栈
	string res = "";
	int tmp;
	while (status.size()) {
		tmp = status.top();
		res = " " + to_string((long long)tmp) + res;
		status.pop();
	}
	return res;
}

string outSymbolStack(stack<string>symbol) {//打印符号栈
	string res;
	string tmp;
	while (symbol.size()) {
		tmp = symbol.top();
		res = tmp + res;

		symbol.pop();
	}

	return res;
}

string getStr(const string& str, const int& p)//截取串str从下标p之后的串
{
	string res = "";
	int length = str.length();
	for (int i = p; i < length; i++)
		res += str[i];
	return res;
}

bool analyze(const string& str) {
	status.push(0);
	symbol.push("#");
	cout << setw(16) << setiosflags(ios::left) << "状态栈" << "	" 
		<<setw(16) << setiosflags(ios::left) << "符号栈" << "	"
		<<setw(16) << setiosflags(ios::left) << "输入串" << "	" 
		<<setw(16) << setiosflags(ios::left) << "动作" << "	" << endl;

		int length = str.length();
		int now = 0;

	while (now < length) {
		//输出表头
		cout << setw(16) << setiosflags(ios::left) << outStatusStack(status) << "	"
			<< setw(16) << setiosflags(ios::left) << outSymbolStack(symbol) << "	"
			<< setw(16) << setiosflags(ios::left) << getStr(str, now) << "	";
		int topStatus = status.top();//状态栈顶
		string symb = "";
		symb += str[now];//输入串当前符号,比较状态栈栈顶topStatus和当前输入符号对比
		if (ACTION[topStatus][symb] == "acc") 
		{
			cout << "acc 分析成功" << endl;
			return true;
		}
		if (ACTION[topStatus][symb] == "error") {//ACTION表找不到分析失败

			cout << "分析失败" << endl;
			return false;
		}

		if (ACTION[topStatus][symb][0] == 'S')
		{//是移进关系
			string tmp;
			for (int i = 1; i < ACTION[topStatus][symb].length(); i++)
			{
				tmp += ACTION[topStatus][symb][i];
			}
			int st = std::stoi(tmp);//获得查ACITON表之后的状态
			status.push(st);//状态入状态栈
			symbol.push(symb);//符号入符号栈
			now++;//输入串指针右移
			cout << "ACTION[" << topStatus << "]["
				<< symb << "]=" << ACTION[topStatus][symb] << "即状态" << st << "入栈" << endl;
			continue;
		}
		else 
		{//是规约关系,获得第num条产生式
			string tmp="";
			for (int i = 1; i < ACTION[topStatus][symb].length(); i++)
			{
				tmp += ACTION[topStatus][symb][i];	
			}
			int num = std::stoi(tmp); 
			num -= 1;
			string right = p[num]->right;//第num条产生式右部的长度
			for (int i = 0; i < right.size(); i++)
			{//同时出栈
				symbol.pop();
				status.pop();
			}
			symbol.push(p[num]->left);//产生式左部入符号栈
			//查GOTO表
			int newStatus = GOTO[status.top()][symbol.top()];

			if (newStatus == -1)
				return false;

			cout << "r" << num+1 << ": " << "用" << p[num]->left << "->" << p[num]->right << "规约且"
				<< "GOTO(" << status.top() << "," << symbol.top() << ")=";
			cout << newStatus << "入栈" << endl;
			//把查GOTO表得到的状态入栈
			status.push(newStatus);
			cout << endl;
			continue;
		}
	}
	return false;
}

int main()
{
	readFile();
	cout << endl;
	initFirstState();
	addItem();
	checkItem();
	initTable();//填写ACTION GOTO表
	checkTable();
	string str;
	while (1)
	{
		cout << "输入要分析的串,并以#键结束,退出输入end" << endl;
		cin >> str;
		if (str == "end")
			break;
		if (analyze(str))
		{

			cout << "输入串文法正确" << endl;
			cout << "------------------------------------------" << endl;
		}
		else {
			cout << "输入串文法不正确" << endl;
			cout << "------------------------------------------" << endl;
		}
		status = stack<int>();//清空状态栈
		symbol = stack<string>();//清空符号栈
	}
}

 

  • 4
    点赞
  • 42
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
LR语法分析器是一种自底向上的语法分析器,其实现算法比较复杂。下面是一个简单的Python实现,以LR(0)语法分析器为例。 首先,我们需要定义一些数据结构来表示语法规则和状态集合。我们可以用一个字典来表示语法规则,其键是非终结符,值是由该非终结符推导出的终结符和非终结符组成的列表。我们还需要一个类来表示状态,每个状态包含一个项目集合和一个转移函数集合。 ```python class State: def __init__(self, items): self.items = set(items) self.transitions = {} class LR0Parser: def __init__(self, grammar): self.grammar = grammar self.states = [] self.build_states() ``` 接下来,我们需要实现一个函数来项目集闭包。给定一个项目集合,它应该返回包含该集合所有项目以及可以从这些项目推导出的其他项目的集合。 ```python def closure(self, items): closure = set(items) while True: new_items = set() for item in closure: symbol = item.symbol_after_dot() if symbol in self.grammar.nonterminals: for rule in self.grammar.rules[symbol]: new_item = Item(symbol, [], 0) if new_item not in closure: new_items.add(new_item) if not new_items: return closure closure.update(new_items) ``` 然后,我们需要实现一个函数来算从一个项目集合出发的所有转移。对于每个终结符和非终结符,该函数应该返回一个新的状态和一个转移函数,这个函数将输入符号移动到项目符号之后。 ```python def goto(self, items, symbol): goto = set() for item in items: if item.symbol_after_dot() == symbol: new_item = Item(item.nonterminal, item.production, item.dot + 1) goto.add(new_item) return State(self.closure(goto)) ``` 接下来,我们可以实现一个函数来构建所有状态。我们从文法的开始符号开始,算其闭包,并将其标记为初始状态。然后,我们对于每个状态和每个符号,算其转移,并将新状态添加到状态集合。 ```python def build_states(self): start_rule = Rule(self.grammar.start, [self.grammar.start_symbol]) start_item = Item(start_rule.nonterminal, start_rule.production, 0) initial_state = State(self.closure([start_item])) self.states.append(initial_state) unprocessed_states = [initial_state] while unprocessed_states: state = unprocessed_states.pop() for symbol in self.grammar.terminals + self.grammar.nonterminals: next_state = self.goto(state.items, symbol) if next_state.items and next_state not in self.states: unprocessed_states.append(next_state) self.states.append(next_state) state.transitions[symbol] = next_state ``` 最后,我们可以实现一个函数来解析输入。我们从初始状态开始,读入输入符号并使用转移函数移动到下一个状态,直到我们达到接受状态或无法继续移动。 ```python def parse(self, input): stack = [self.states[0]] for symbol in input: state = stack[-1] if symbol in state.transitions: stack.append(state.transitions[symbol]) else: return False return True if stack[-1] in self.accept_states else False ``` 这就是一个简单的LR(0)语法分析器的Python实现。当然,这只是一个基础版本,可以根据需要进行更改和扩展。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值