编译原理程序段的单词识别

Frank_杨

已于 2024-01-05 11:11:55 修改

阅读量398

点赞数 8

文章标签： c# 开发语言

于 2024-01-05 11:11:37 首次发布

本文链接：https://blog.csdn.net/Frank_Yy03Aug/article/details/135404886

版权

实验名称

程序段的单词识别

实验目的

1.掌握根据DFA构造此法分析程序的方法；

2.解决词法分析程序构造的相关问题。

实验内容和要求

实验环境

VS 2022

算法设计思想

根据设定好的DFA设计程序，即根据DFA有n个状态，每个状态接收k个终结符到达另一个状态，设置n个case，每个case含k多条以上语句，对应一类状态。

字符以字符流接收处理，每接收一个待分析字符，则到达一个状态，进入该状态之后，先将该字符装入WORD[]集，然后再接收新字符，并且根据该字符，转入下一个状态，若该字符非预想的接受字符，则打包当前WORD[wordsum]（WORD[wordsum]为每次当前最新的一个单词），然后重新回到state0。

识别过程中关键字和保留字都先默认为标识符进行打包，待输出的时候再对标识符类的单词进行进一步区分，识别出关键字和保留字。具体过程为，输出时，如果遇到类别为标识符，则进入该分支，根据单词表，对关键字和保留字进行比较，如果相同，则输出该类关键字类别和单词，否则输出标识符类别和单词，其他则正常输出单词类别和单词。

主要问题与解决方法

状态分析是在一个while循环里嵌套一个switch里进行的，字符是用getchar()函数接收字符的，如何退出此循环，这里用到一个getout标记，当字符接收到#号，则getout置为1，默认状态下为0，当退出swtich，再次进入while循环时，第一条语句是判断是否退出，即if（getout）break；因此当接收到结束符号，即#号，则getout置为1，则退出while循环。
如何对空格和换行符进行处理。当接收到的字符为非预想接收的字符，则不做任何动作，直接接收下一个字符，即可跳过空格和换行符，以及更多其它非使用字符。

实验结果

体会、质疑、建议

代码中的奥妙，其乐无穷！！！

源代码

#include<string>
#include<iostream>
using namespace std;
#define  N 100
void  recognize();
struct {		//单词集内，单词类别
	string word;
	int state;
}WORD[N];                     //代输出状态集
int main()
{
	/***********************************主函数***************************************************************/
	int choice = 0;
	recognize();
	/***********************************为用户设计简单的交互*************************************************/
	while (1) {
		cout << "1.Try again./2.Exit." << endl;
		cout << "Press:";
		cin >> choice;
		if (choice == 1)recognize();
		else return 0;
	}
	return 0;
}                       

void recognize()       //程序段单词识别
{
	/***********************************主要识别分析部分*****************************************************/
	/***********************************输出自定义单词表*****************************************************/
	cout << "                                           单词类别" << endl;
	cout << "单词" << "    标识符" << "      整数" << "      界符" << "       var" << "     begin" << "       end" << "       str" << "   integer" << endl;
	cout << "类别" << "       1  " << "       2  " << "       3 " << "         4" << "        5 " << "         6" << "         7" << "       8" << endl;
	cout << "请输入要识别的程序段(以#号结束):" << endl;
	char now_char;				//取一个字符做处理
	int wordsum = 0;			//待输出单词总数
	now_char = getchar();		//开始取第一个字符
	int state=0,getout=0;		//state为到达状态，初始为0状态，getout作为退出while分析循环
	while (1) {
		if (getout)break;
		switch (state)
		{
		case 0:                   //0为初态
			if (now_char >= 'a' && now_char <= 'z' || now_char >= 'A' && now_char <= 'Z')state = 1;
			else if (now_char >= '0' && now_char <= '9')state = 2;
			else if (now_char == '+' || now_char == '-' || now_char == '=' || now_char == ':' || now_char == ';' || now_char == ',' || now_char == '\'')state = 3;
			else {				//如果啥也不是，就继续取下一个字符做分析
				now_char = getchar();
				if (now_char == '#')getout = 1;
				state = 0;
			}
			break;
		
		case 1:               //1为标识符
			WORD[wordsum].word += now_char;		//入包
			now_char = getchar();				//接受新字符
			if (now_char >= 'a' && now_char <= 'z' || now_char >= 'A' && now_char <= 'Z'||now_char>='0'&&now_char<='9')state = 1;
			else {								//如果接收到的字符为非上类别，则打包
				WORD[wordsum].state = 1;		//此处先不管是否有关键字，统一先置为标识符
				wordsum++;						//打包完单词总数加一
				if (now_char == '#')getout = 1;
				state = 0;
			}
			break;
		
		case 2:              //2为整数
			WORD[wordsum].word += now_char;	   //入包
			now_char = getchar();			   //接受新字符
			if (now_char >= '0' && now_char <= '9')state = 2;
			else{							   //分析同上state为1时打包过程
				WORD[wordsum].state = 2;
				wordsum++;
				if (now_char == '#')getout = 1;
				state = 0;
			}
			break;
		
		case 3:             //3为{ ; , ' :  = + - } 界符
			WORD[wordsum].word = now_char;		//界符，直接打包
			WORD[wordsum].state = 3;			//
			wordsum++;
			now_char = getchar();
			if (now_char == '#')getout = 1;
			state = 0;
		}
	}
	/***********************************输出单词总集*****************************************************/
	for (int i = 0;i < wordsum;i++)					//4为var，5为begin，6为integer,7为str，8为integer
		if (WORD[i].state == 1) {
			if (!WORD[i].word.compare("var"))cout << "<" << WORD[i].word << ",4>" << endl;
			else if (!WORD[i].word.compare("begin"))cout << "<" << WORD[i].word << ",5>" << endl;
			else if (!WORD[i].word.compare("end"))cout << "<" << WORD[i].word << ",6>" << endl;
			else if (!WORD[i].word.compare("str"))cout << "<" << WORD[i].word << ",7>" << endl;
			else if (!WORD[i].word.compare("integer"))cout << "<" << WORD[i].word << ",8>" << endl;
			else cout << "<" << WORD[i].word << ",1>" << endl;
		}
		else cout << "<" << WORD[i].word << "," << WORD[i].state << ">" << endl;
}
/* 案例数据
var name:str;
age,a,b;
integer;
name='lanyan';
begin=12;
b=14;
age=a+h;
end;
#
*/