编译原理--实验2 语法分析

最新推荐文章于 2024-05-30 20:06:09 发布

杨大熊的代码世界

最新推荐文章于 2024-05-30 20:06:09 发布

阅读量1.1w

点赞数 26

分类专栏： c++ 文章标签： c++

本文链接：https://blog.csdn.net/weixin_45775703/article/details/122471396

版权

c++ 专栏收录该内容

12 篇文章 1 订阅

订阅专栏

文章目录

前言
1.1实验目的
1.2 实验任务
1.3 实验内容
1.4 程序
1.5 总结

前言

编译原理课程实验的实验课内容—构造自顶向下的语法分析程序。通过本次实验，可以熟练掌握对于LL(1)分析表的构造方法。

1.1实验目的

（1）熟练掌握 LL(1)分析表的构造方法。

（2）掌握设计、编制和调试典型的语法分析程序，进一步掌握常用的语法分析方法。

1.2 实验任务

根据 LL(1)分析法自己编写一个语法分析程序，语言不限，文法不限。

1.3 实验内容

1.3.1 实验要求

你的程序应具有通用性，能够识别由词法分析得出的词法单元序列是否是给定文法的正确句子（程序），并能够输出分析过程和识别结果。

1.3.2 输入格式

输入一行源代码，通过一次扫描完成语法分析。

1.3.3 输出格式

实验要求通过标准输出打印程序的运行结果（包括 First 集、Follow 集、LL(1)分析表），此外，要求可以保存 LL(1)分析表。你的程序需要输出语法分析过程和相应的分析结果（即此串是否为 LL(1)文法的句子）。

1.3.4 样例

输入示例1：

在这里插入图片描述

输出示例1：

在这里插入图片描述

输入示例2：

在这里插入图片描述

输出示例2：

在这里插入图片描述

1.4 程序

1.4.1 程序流程图

在这里插入图片描述

1.4.2 算法描述

本 LL（1）文法分析程序只针对一般的 LL（1）文法，不能将一个很复杂的非 LL（1）文法转化为 LL（1）文法，因为太复杂了，时间也不太够，本人目前无法实现，于是也不可以分析 C—语言文法，这里只针对以大写字母表示非终结符的文法进行分析。另外，因为 Visual Studio 无法正常打印ε这个代表空的字符，于是这里采用另一套标准：即$代表一个空串，#代表一个空字符。

程序的整体设计思路上，通过几个函数来分别实现计算并输出first集，计算并输出follow集，保存并输出LL(1)分析表，最后利用分析表求一个给定输入串的分析过程。

首先说说First集合的求法，大致的思路就是先从右部的第一个文法符号开始，如果是终结符直接加入first集并结束，如果是非终结符，那么就将该非终结符作为新的参数，递归调用求first的函数，直到遇到终结符。

对于follow集，首先看该符号后面是终结符还是非终结符还是在末尾，对于终结符，直接将其假如follow集中，如果是非终结符，那么要去求非终结符的first，如果是末尾，那么要用到左部非终结符的follow集，并通过几次不断的重复调用follow集函数，来求完整的follow集（也就是所谓的当follow集不再增加时）。

对于输入符号串求分析表的过程，我们可以通过建立二维数组的方式，存入LL（1）分析表，然后在分析过程中，对于指针指向的符号，根据分析栈的栈顶信息，在分析表中通过行和列判断是移入文法还是进行匹配操作，同时输出整个分析过程。

本程序采用直接输入根据已知文法构造的分析表 M。

1.4.3 程序源码

#include<iostream>
#include<cstring>
#include<cstdio>
#include<algorithm>
#include<stack>
#include<set>
#include<map>
#include <Windows.h>
#include <fstream>
#include<string>
using namespace std;

//表格数组   (所使用的文法为教材P93页不含左递归的样例文法)
//                          i       +       *      (        )      #
char LL1[50][50][100] = { {"->TM","null" ,"null","->TM" ,"null","null" },  //一个容量是100个字符的字符串，总共50行，50列
						 {"null","->+TM" ,"null","null" ,"->$", "->$"},
						 {"->FN","null" ,"null" ,"->FN","null","null"  },
						 {"null","->$" ,"->*FN" ,"null"  ,"->$","->$"  },
						 {"->i" ,"null","null","->(E)" ,"null","null" } };
char H[200] = "EMTNF";
char L[200] = "i+*()#";
stack<char>cmp;

int findH(char a)
{
	for (int i = 0; i < 5; i++) //找到对应的非终结符
	{
		if (a == H[i])
		{
			return i;
		}
	}
	return -1;
}
int findL(char b)
{
	for (int i = 0; i < 6; i++) //找到对应的终结符
	{
		if (b == L[i])
		{
			return i;
		}
	}
	return -1;
}
int error(int i, int cnt, int len, char p[], char str[])
{
	printf("%d\t%s\t", cnt, p);
	for (int q = i; q < len; q++)
	{
		cout << str[q];
	}
	printf("\t报错\n");
	return len;
}

void analyze(char str[], int len)
{
	int cnt = 1; //输出Step专用
	int i = 0;
	char p[200] = "#E"; //输出stack专用，即输出分析栈
	int pindex = 2;
	printf("Step\tStack\tString\tRule\n");
	while (i < len)
	{
		int x, y;
		char ch = cmp.top();  //cmp是分析状态栈，ch是栈顶符号
		if (ch >= 'A'&&ch <= 'Z')
		{
			cmp.pop();  //出栈
			x = findH(ch);   //x是对应的非终结符位置
			y = findL(str[i]);  //y是对应的终结符位置
			if (x != -1 && y != -1)
			{
				int len2 = strlen(LL1[x][y]);  //记录这个字符串的长度
				if (strcmp(LL1[x][y], "null") == 0)
				{
					i = error(i, cnt, len, p, str);
					continue;
				}
				printf("%d\t%s\t", cnt, p);  //输出状态栈信息
				if (p[pindex - 1] != '#')
				{
					p[pindex] = '\0';  //字符串结束标志
					pindex--;
				}
				if (LL1[x][y][2] != '$')  //如果文法不是推出空
				{
					for (int q = len2 - 1; q > 1; q--)  
					{
						p[pindex++] = LL1[x][y][q];  //将候选产生式倒着装入分析栈中
						cmp.push(LL1[x][y][q]);  //装入分析栈
					}
				}
				else
				{
					p[pindex] = '\0';  //消除当前非终结符
					pindex--;
				}
				for (int q = i; q < len; q++)
				{
					cout << str[q];    //输出剩余输入串
				}
				printf("\t%c%s\n", ch, LL1[x][y]);  //输出所用到的产生式或匹配
			}
			else
			{
				i = error(i, cnt, len, p, str);
				continue;
				///未找到，报错
			}

		}
		else  //是终结符
		{
			if (ch == str[i])
			{
				cmp.pop();   //直接弹栈
				printf("%d\t%s\t", cnt, p);   //输出该终结符
				if (ch == '#'&&str[i] == '#')   //直接接受并返回
				{
					printf("#\t接受\n");
					return;
				}
				for (int q = i; q < len; q++)  //输出剩余输入串的信息
				{
					cout << str[q];
				}
				printf("\t%c匹配\n", ch);
				pindex--;
				p[pindex] = '\0';
				i++;
			}
			else
			{
				i = error(i, cnt, len, p, str);
				continue;
				///报错
			}
		}
		cnt++;
	}


}

//输出follow和first集
class FF {
public:
	string fileName = "productions.txt";   //包含文法规则的文本文件
	set<string> productions; //产生式集合，set会自动根据首字母的ASCII码进行从小到大的排序
	map<string, set<string>> split_productions; //分解后的产生式集合 
	set<string> Vt; //终结符集合
	set<string> Vn; //非终结符集合
	map<string, set<string>> first; //First集
	map<string, set<string>> follow; //Follow集

	void init(); //从文件读取产生式
	void splitProductions(); //分解产生式
	void findVtAndVn(); //获得终结符和非终结符
	bool isVn(string s);
	bool isVt(string s);
	set<string> getOneFirst(string s); //获得单个非终结符first集
	void getFirst(); //获得所有非终结符first集
	void getFollow(); //获得所有非终结符follow集
	void get_follow_again(); //求folloe集的步骤3（主要是位于文法末尾的非终结符）
};

void FF::init() {
	string line;
	ifstream in(fileName);
	if (in) {
		//文法开始符号的follow集中放入$
		getline(in, line);
		productions.insert(line);  //产生式集合加入第一行
		follow[line.substr(0, 1)].insert("$");
		cout << line << endl;
		while (getline(in, line)) {
			productions.insert(line);
			cout << line << endl;
		}
	}
}

void FF::splitProductions() {  //分割产生式
	int position = 0;
	for (set<string>::iterator it = productions.begin(); it != productions.end(); it++) {
		string temp = *it;
		for (int i = 0; i < temp.length(); i++) {
			position = temp.find("->");  //找到->
			string s = temp.substr(0, position);  //截取->之前的
			string ss = temp.substr(position + 2);  //截取->之后的
			set<string>sss;
			string t;
			for (int j = 0; j < ss.length(); j++) {
				if (ss[j] == '|') {   //如果ss[j]=='|'，代表有新的产生式
					sss.insert(t);
					t = "";
				}
				else
				{
					t.append(ss.substr(j, 1));  
				}

			}
			sss.insert(t);
			split_productions.insert(pair<string, set<string>>(s, sss));  //插入的一个非终结符对应的所有文法
		}
	}

	//输出一下分割的元素
	for (map<string, set<string>>::iterator it = split_productions.begin(); it != split_productions.end(); it++) {
		cout << it->first << "    ";
		for (set<string>::iterator ii = it->second.begin(); ii != it->second.end(); ii++) {
			cout << *ii << "    ";
		}
		cout << endl;
	}
}

void FF::findVtAndVn() {
	for (set<string>::iterator it = productions.begin(); it != productions.end(); it++) {
		string temp = *it;
		for (int i = 0; i < temp.length(); i++) {
			if (temp[i] == '-' || temp[i] == '>' || temp[i] == '|')
				continue;
			//是大写字母
			if (temp[i] >= 'A' && temp[i] <= 'Z') {
				//后面带'
				if (temp[i + 1] == '\'') {  //这一步其实用不到，我把E'改成了M把T'改成了N
					Vn.insert(temp.substr(i, 2));
					i++;
				}
				else {
					Vn.insert(temp.substr(i, 1));
				}
			}
			//是终结符
			else
			{
				Vt.insert(temp.substr(i, 1));   //终结符直接插入
			}
		}
	}

	cout << "非终结符" << endl;
	for (set<string>::iterator it = Vn.begin(); it != Vn.end(); it++) {
		cout << *it << endl;
	}
	cout << endl;
	cout << "终结符" << endl;
	for (set<string>::iterator it = Vt.begin(); it != Vt.end(); it++) {
		cout << *it << endl;
	}
}
bool FF::isVn(string s) {
	if (Vn.find(s) != Vn.end()) {
		return true;
	}
	return false;
}
bool FF::isVt(string s) {
	if (Vt.find(s) != Vt.end()) {
		return true;
	}
	return false;
}
set<string> FF::getOneFirst(string s) {  //该算法基于First集定义的递归查找思想实现
	if (split_productions.count(s) > 0) {
		set<string>temp = split_productions[s];
		for (set<string>::iterator it = temp.begin(); it != temp.end(); it++) {
			string stemp = *it;
			if (stemp == "#") {
				first[s].insert("#");
			}
			else {
				int flagAll = 0; //所有的非终结符的first集都有#；
				for (int i = 0; i < stemp.length(); i++) {
					int flag = 0; //当前的非终结符的first集有#；
					if (stemp[i + 1] == '\'') {//带'的非终结符
						set<string>t1 = getOneFirst(stemp.substr(i, 2));
						for (set<string>::iterator ii = t1.begin(); ii != t1.end(); ii++) {
							if (*ii == "#") {//此时空串不可插入
								flag = 1;
							}
							else {
								first[s].insert(*ii);
							}
						}
						i++;
					}
					else if (isVn(stemp.substr(i, 1)))//单个非终结符
					{
						set<string>t2 = getOneFirst(stemp.substr(i, 1));   //对于首字符还是非终结符的推导，按该非终结符继续递归
						for (set<string>::iterator ii = t2.begin(); ii != t2.end(); ii++) {
							if (*ii == "#") {//此时空串不可插入
								flag = 1;
							}
							else {
								first[s].insert(*ii);
							}
						}
					}
					else {//终结符
						first[s].insert(stemp.substr(i, 1));
					}
					if (i == stemp.length() - 1 && flag == 1) {
						flagAll = 1;
					}
					if (flag == 0)
						break;

				}
				if (flagAll == 1) {
					first[s].insert("#");
				}
			}
		}
	}
	return first[s];
}
void FF::getFirst() {  //按顺序输出first集内容
	for (map<string, set<string>>::iterator it = split_productions.begin(); it != split_productions.end(); it++) {
		getOneFirst(it->first);
	}
	cout << "First集" << endl;
	for (map<string, set<string>>::iterator it = first.begin(); it != first.end(); it++) {
		cout << it->first << ":  ";
		for (set<string>::iterator ii = it->second.begin(); ii != it->second.end(); ii++)
		{
			cout << *ii << "    ";
		}
		cout << endl;
	}

}
void FF::getFollow() {  
	for (map<string, set<string>>::iterator it = split_productions.begin(); it != split_productions.end(); it++) {
		string left = it->first;
		set<string>right = it->second;
		for (set<string>::iterator ii = right.begin(); ii != right.end(); ii++) {
			string temp = *ii;

			for (int i = 0; i < temp.length(); i++) {
				if (isVt(temp.substr(i, 1))) {  //终结符没有follow集，跳过
					continue;
				}
				else if (i + 1 < temp.length() && temp[i + 1] == '\'') {//带有’的非终结符
					if (isVt(temp.substr(i + 2, 1))) {//非终结符后面是终结符
						follow[temp.substr(i, 2)].insert(temp.substr(i + 2, 1));
						i++;
					}
					else {//非终结符后面是非终结符s
						//把后面非终结符的first集ff加入follow集中
						string s;
						if (i + 3 < temp.length() && temp[i + 3] == '\'') {
							s = temp.substr(i + 2, 2);
						}
						else {
							s = temp.substr(i + 2, 1);
						}
						set<string> ff = first[s];
						for (set<string>::iterator nn = ff.begin(); nn != ff.end(); nn++) {
							if (*nn != "#")
								follow[temp.substr(i, 2)].insert(*nn);
						}
					}
				}
				else {//不带’的非终结符

					if (i + 1 < temp.length() && isVt(temp.substr(i + 1, 1))) {  //非终结符后面是终结符
						follow[temp.substr(i, 1)].insert(temp.substr(i + 1, 1));
						i++;
					}
					else {//非终结符后面是非终结符s
						//把后面非终结符的first集ff加入follow集中
						string s;
						if (i + 2 < temp.length() && temp[i + 2] == '\'') {
							s = temp.substr(i + 1, 2);
						}
						else {
							s = temp.substr(i + 1, 1);   //找到后面的非终结符
						}
						set<string> ff = first[s];   //把该非终结符的follow集加入
						for (set<string>::iterator nn = ff.begin(); nn != ff.end(); nn++) {
							if (*nn != "#")
								follow[temp.substr(i, 1)].insert(*nn);
						}
					}
				}
			}
		}
	}
	//这一个需要多进行几次，因为follow是不断增长的
	get_follow_again();
	get_follow_again();

	cout << "Follow集" << endl;
	for (map<string, set<string>>::iterator it = follow.begin(); it != follow.end(); it++) {
		cout << it->first << ":  ";
		for (set<string>::iterator ii = it->second.begin(); ii != it->second.end(); ii++)
		{
			cout << *ii << "    ";
		}
		cout << endl;
	}
}
void FF::get_follow_again() {
	for (map<string, set<string>>::iterator it = split_productions.begin(); it != split_productions.end(); it++) {
		string left = it->first;
		set<string>right = it->second;
		for (set<string>::iterator ii = right.begin(); ii != right.end(); ii++) {
			string temp = *ii;
			for (int j = temp.length() - 1; j > 0; j--) {  //倒着检查
				string now;
				if (temp[j] == '\'') {
					now = temp.substr(j - 1, 2);
					j--;
				}
				else now = temp.substr(j, 1);
				if (isVt(now)) {//产生式最后是终结符
					break;
				}
				else {//产生式最后是非终结符
					set<string>aa = follow[left];   //最后是非终结符，需要把左部非终结符的部分放到这里。 
					for (set<string>::iterator pp = aa.begin(); pp != aa.end(); pp++) {
						follow[now].insert(*pp);
					}
				}
				if (first[now].find("#") == first[now].end())
					break;
			}
		}
	}
}

int main()
{
	//cout<<H[0]<<H[4]<<endl;
	//cout<<L[0]<<L[5]<<endl;
	/*for(int i = 0; i < 5; i++)
	{
		for(int j = 0 ; j < 6; j++)
			printf("%5s",LL1[i][j]);
			cout<<endl;
	}*/
	
	FF ff;
	ff.init();
	ff.splitProductions();
	cout << endl;
	ff.findVtAndVn();
	cout << endl;
	ff.getFirst();
	cout << endl;
	ff.getFollow();
	cout << endl;
	cout << "请输入将要被分析的句子：" << endl;
	char str[200];
	cin >> str;
	cout << endl;
	//输出分析表（由于本程序采用直接输入已知分析表的形式，因此只需要直接输出分析表即可）
	cout << "LL(1)分析表：" << endl;
	ifstream infile("LL(1).txt");
	string ans;
	while (infile.good())
	{
		getline(infile, ans);
		cout << ans << endl;
	}
	infile.close();
	cout << endl;
	int len = strlen(str);
	cmp.push('#');
	cmp.push('E');
	analyze(str, len + 1);
	return 0;
}

1.5 总结

在这之中我遇到最棘手的问题就是基本的数据结构搭建和first集合的求解算法，因为first集合要用到递归去求（递归真是永远的痛），虽然思路大概有，但是很久也没有敲出这个能运行的算法，最后还是靠着csdn上大佬的算法实现了这个功能。

参考文献：(40条消息) 编译原理-实验二-LL(1)语法分析程序的设计_wys5的博客-CSDN博客_编译原理实验ll1语法分析

杨大熊的代码世界

关注

26
点赞
踩
288

收藏

觉得还不错? 一键收藏
打赏
0
评论
编译原理--实验2 语法分析

文章目录前言1.1实验目的1.2 实验任务1.3 实验内容1.3.1 实验要求1.3.2 输入格式1.3.3 输出格式1.3.4 样例1.4 程序1.4.1 程序流程图1.4.2 算法描述1.4.3 程序源码1.5 总结前言编译原理课程实验的实验课内容—构造自顶向下的语法分析程序。通过本次实验，可以熟练掌握对于LL(1)分析表的构造方法。1.1实验目的（1）熟练掌握 LL(1)分析表的构造方法。（2）掌握设计、编制和调试典型的语法分析程序，进一步掌握常用的语法分析方法。1.2 实验任务根据
复制链接

扫一扫