编译原理词法分析(正规式转NFA)

【问题描述】

正规表达式→NFA问题的一种描述是:
编写一个程序,输入一个正规表达式,输出与该文法等价的有穷自动机。

【基本要求】

设置FA初始状态X,终态Y,过程态用数字表示:0 1 2 3………

【测试用例】

测试数据:

(a|b)*abb

输出结果应为:

X X - ~ ->3
Y
0 0-a->1
1 1-b->2
2 2-b->Y
3 3-~->0 3-a->3 3-b->3

【解决步骤】

正规式转NFA方法步骤:

  1. 判断一下正规式有没有错误,括号对不对,并添加符号“+”
  2. 运算符的优先级为:*>|>+
  3. 根据符号优先级,把中缀表达式转后缀表达式便于计算
  4. 根据后缀表达式依次构建图。有三种运算:*、|、+

图的构建过程:

(1) 定义边结构体用来储存边:edge

//NFA边
struct edge{
	        int start;
	        int end;
	        char accept; 
};

(2) 定义一个类用来表示图结构:grup

//NFA单元,一个大的NFA单元可以是由很多小单元通过规则拼接起来
class grup{
public:
	       vector<edge> edges;  //这个NFA拥有的边
	      int stateCount; //状态数
	      int StartState;  //开始状态
	      int EndState;  //结束状态
public:
	       grup() {
		     this->stateCount = 2;
		     this->StartState = 0;
		     this->EndState = 1;
	        }
};

(3) 构建出的图结构边是从状态是从0开始编号的 构建过程:以(a|b)*abb为例

  • 输入字符串添加+号,并判断是否合法:
    算法:

    • 定义一个栈s用来匹配括号,判断括号是否符合条件。
      从左往右扫描字符串,如果是字母或数字,则放入输出表达式后
      然后判断后一个是什么符号判断是否要加上+号
      对于类似如下几种情况需要加上+的:AA、A(、A、(、)A、)(。
      完成后(a|b)abb变为:(a|b)+a+b+b
  • 中缀表达式转后缀表达式
    算法:

    • 定义一个栈s,从左忘右扫描表达式,
      遇见字母或数字拼接在输出表达式后
      遇见“(”压入栈中
      遇见“)”弹出栈中元素,直到遇到了“(”
      遇到“、|、+”判断栈顶元素的优先级,把比自己优先级高或等的弹出去
      然后自己再压栈。
      扫描完成后再把栈中元素依次弹出,拼接在输出字符串后
      完成后(a|b)
      +a+b+b应该变为:ab|*a+b+b+
      核心算法:
Void change_text(string text) {
	stack<char> s;
	string new_text = "";
	for (int i = 0; i<int(text.length()); i++) {
	if ((text[i] <= 'z' && text[i] >= 'a') || (text[i] <= 'Z' && text[i] >= 'A') || (text[i]<= '9' && text[i] >= '0')) {new_text = new_text + text[i];}
		else {
			if (text[i] == '(') {s.push(text[i]);}
			else if (text[i] == ')') {
				while (s.top() != '(') {new_text = new_text + s.top();s.pop();}
				s.pop();}
			else if (text[i] == '*') {s.push(text[i]);}
			else if (text[i] == '|') {
				if (s.empty()) {s.push(text[i]);}
				else {
					while (!s.empty()) {
						if (s.top() == '*') {new_text = new_text + s.top();s.pop();}
						else if (s.top() == '|') {new_text = new_text + s.top();s.pop();}
						else {break;}
					}
					s.push(text[i]);}
			}
			else if (text[i] == '+') {
				if (s.empty()) {s.push(text[i]);}
				else {
					while(!s.empty()){
						if (s.top() == '*') {new_text = new_text + s.top();s.pop();}
else if (s.top() == '+') {new_text = new_text + s.top();s.pop();}
						else if (s.top() == '|') {new_text = new_text + s.top();s.pop();}
						else {break;}}
					s.push(text[i]);}}}}
	while(!s.empty()) {new_text += s.top();s.pop();}
	return new_text;}

  • 构架图结构:
    算法:
    • 定义一个栈用来储存图结构,从左向右扫描后缀表达式
      如果遇到的是字母或数字,构建一个如下状态的图:一条边两个状态
      1111

压入栈中
如果遇到运算符分三种情“、|、+”
遇到“
”,栈顶元素出栈
1111111

遇到“+”
在这里插入图片描述

遇到“|”
在这里插入图片描述

  • 核心算法:
getNFA(string text) {
	stack<grup> s;
	for (int i = 0; i < int(text.length()); i++) {
		if ((text[i] <= 'z' && text[i] >= 'a') || (text[i] <= 'Z' && text[i] >= 'A')||(text[i] <= '9' && text[i] >= '0')) {
			grup tempG;
			edge tempE;
			tempE.accept = text[i];
			tempE.start = 0;
			tempE.end = 1;
			tempG.edges.push_back(tempE);
			s.push(tempG);
		}
		else {
			if (text[i] == '+') {
				grup tempG2 = s.top();
				s.pop();
				grup tempG1 = s.top();
				s.pop();
				//图和图合成
				grup tempG = tempG1;
				tempG.stateCount = tempG1.stateCount + tempG2.stateCount-1;
				tempG.StartState = 0;
				tempG.EndState = tempG.stateCount - 1;
				//获取后一条边
				for (int i = 0; i < int(tempG2.edges.size());i++) {
					edge e2 = tempG2.edges[i];
					e2.start += tempG1.stateCount-1;
					e2.end += tempG1.stateCount-1;
					tempG.edges.push_back(e2);
				}
				//合成图压栈
				s.push(tempG);
			}
			else if (text[i] == '*') {
				grup tempG2 = s.top();
				s.pop();
				grup tempG;
				if (tempG2.StartState!=tempG2.EndState) {
					tempG.stateCount = tempG2.stateCount + 1;
				}
				else {
					tempG.stateCount = tempG2.stateCount + 2;
				}
				tempG.EndState = tempG.stateCount-1;
				//图内的边编号加1
				for (int i = 0; i < int(tempG2.edges.size());i++) {
					edge e2 = tempG2.edges[i];
					if (e2.end==tempG2.EndState) {
						e2.start += 1;
						e2.end = e2.start;
					}
					else {
						e2.start += 1;
						e2.end += 1;
					}
					tempG.edges.push_back(e2);
				}
				//添加两条边
				edge e1;
				e1.accept = '~';
				e1.start = 0;
				e1.end = 1;
				edge e2;
				e2.accept = '~';
				e2.start = tempG.EndState-1;
				e2.end = e2.start + 1;
				tempG.edges.push_back(e1);
				tempG.edges.push_back(e2);
				//合成图压栈
				s.push(tempG);
			}
			else if (text[i] == '|') {
				grup tempG2 = s.top();
				s.pop();
				grup tempG1 = s.top();
				s.pop();
				grup tempG;
				tempG.StartState = 0;
				tempG.EndState = 0;
				tempG.stateCount = tempG1.stateCount + tempG2.stateCount-3;
				for (int i = 0; i < int(tempG1.edges.size());i++) {
					edge e1 = tempG1.edges[i];
					if (e1.end == tempG1.EndState) {
						e1.end = 0;
					}
					tempG.edges.push_back(e1);
				}
				for (int i = 0; i < int(tempG2.edges.size());i++) {
					edge e2 = tempG2.edges[i];
					if (e2.end == tempG2.EndState) {
						e2.end = 0;
					}
					tempG.edges.push_back(e2);
				}
				//合成图压栈
				s.push(tempG);
			}
		}
	}
	return s.top();
}
  • 输出图结构:
    输出时按照格式输出,构建好的图状态为0到n,把0输出为X最后一个输出为Y中间状态减一就可以了

完整代码:

#include<iostream>
#include<fstream>
#include<stack>
#include<vector>
#include<string>
#define MAX 100
using namespace std;

//NFA边
struct edge{
	int start;
	int end;
	char accept;
};

//NFA单元,一个大的NFA单元可以是由很多小单元通过规则拼接起来
class grup
{
public:
	vector<edge> edges;  //这个NFA拥有的边
	int stateCount; //状态数
	int StartState;  //开始状态
	int EndState;  //结束状态
public:
	grup() {
		this->stateCount = 2;
		this->StartState = 0;
		this->EndState = 1;
	}
};

class WordAnstary {
private :
	string input_file = "testfile.txt";
	string output_file = "output.txt";
public:
	//主函数
	int start();
	//整理字符串
	string clear_text(string text);
	//中缀表达式转后缀表达式 
	string change_text(string text);
	//根据正规式获得NFA
	grup getNFA(string text);
	//输出结果
	void printNFA(grup out);
};
//主函数
int WordAnstary::start() {
	//文件操作工具
	ifstream read_file;
	//读取文件
	string text;
	//read_file.open(input_file);
	//read_file >> text;
	cin >> text;
	//cout << text;
	//分析正规式,得到有穷自动机
	//cout << "原表达式为:" << text << endl;
	string new_text = this->clear_text(text);
	if (new_text != "") {
		cout << "添加+的表达式:"<<new_text << endl;
		new_text = this->change_text(new_text);
		cout << "后缀表达式:"<<new_text << endl;
		grup out = this->getNFA(new_text);
		this->printNFA(out);
	}
	else {
		cout << "输入不合法" << endl;
	}
	read_file.close();
	return 0;
};

void WordAnstary::printNFA(grup out) {
	cout << "X ";
	for (int j = 0; j < int(out.edges.size()); j++) {
		edge eX = out.edges[j];
		if (eX.start == 0) {
			cout << "X-" << eX.accept << "->";
			if (eX.end==out.stateCount-1) {
				cout << "Y ";
			}
			else {
				cout << eX.end - 1<<" ";
			}
		}
	}
	cout << endl;
	cout << "Y ";
	for (int j = 0; j < int(out.edges.size()); j++) {
		edge eX = out.edges[j];
		if (eX.start == out.stateCount - 1) {
			cout << "Y-" << eX.accept << "->";
			if (eX.end == out.stateCount - 1) {
				cout << "Y ";
			}
			else {
				cout << eX.end - 1<<" ";
			}
		}
	}
	cout << endl;
	for (int i = 1; i < out.stateCount - 1; i++) {
		cout << i - 1 << " ";
		for (int j = 0; j < int(out.edges.size()); j++) {
			edge e = out.edges[j];
			if (e.start == i&&e.accept=='~') {
				cout << e.start - 1 << "-" << e.accept << "->";
				if (e.end == out.stateCount - 1) {
					cout << "Y ";
				}
				else {
					cout << e.end - 1 << " ";
				}
			}
		}
		for (int j = 0; j < int(out.edges.size()); j++) {
			edge e = out.edges[j];
			if (e.start == i&&e.accept!='~') {
				cout << e.start - 1 << "-" << e.accept << "->";
				if (e.end == out.stateCount - 1) {
					cout << "Y ";
				}
				else {
					cout << e.end - 1<<" ";
				}
			}
		}
		cout << endl;
	}
}

grup WordAnstary::getNFA(string text) {
	stack<grup> s;
	for (int i = 0; i < int(text.length()); i++) {
		if ((text[i] <= 'z' && text[i] >= 'a') || (text[i] <= 'Z' && text[i] >= 'A')||(text[i] <= '9' && text[i] >= '0')) {
			grup tempG;
			edge tempE;
			tempE.accept = text[i];
			tempE.start = 0;
			tempE.end = 1;
			tempG.edges.push_back(tempE);
			s.push(tempG);
		}
		else {
			if (text[i] == '+') {
				grup tempG2 = s.top();
				s.pop();
				grup tempG1 = s.top();
				s.pop();
				//图和图合成
				grup tempG = tempG1;
				tempG.stateCount = tempG1.stateCount + tempG2.stateCount-1;
				tempG.StartState = 0;
				tempG.EndState = tempG.stateCount - 1;
				//获取后一条边
				for (int i = 0; i < int(tempG2.edges.size());i++) {
					edge e2 = tempG2.edges[i];
					e2.start += tempG1.stateCount-1;
					e2.end += tempG1.stateCount-1;
					tempG.edges.push_back(e2);
				}
				//合成图压栈
				s.push(tempG);
			}
			else if (text[i] == '*') {
				grup tempG2 = s.top();
				s.pop();
				grup tempG;
				if (tempG2.StartState!=tempG2.EndState) {
					tempG.stateCount = tempG2.stateCount + 1;
				}
				else {
					tempG.stateCount = tempG2.stateCount + 2;
				}
				tempG.EndState = tempG.stateCount-1;
				//图内的边编号加1
				for (int i = 0; i < int(tempG2.edges.size());i++) {
					edge e2 = tempG2.edges[i];
					if (e2.end==tempG2.EndState) {
						e2.start += 1;
						e2.end = e2.start;
					}
					else {
						e2.start += 1;
						e2.end += 1;
					}
					tempG.edges.push_back(e2);
				}
				//添加两条边
				edge e1;
				e1.accept = '~';
				e1.start = 0;
				e1.end = 1;
				edge e2;
				e2.accept = '~';
				e2.start = tempG.EndState-1;
				e2.end = e2.start + 1;
				tempG.edges.push_back(e1);
				tempG.edges.push_back(e2);
				//合成图压栈
				s.push(tempG);
			}
			else if (text[i] == '|') {
				grup tempG2 = s.top();
				s.pop();
				grup tempG1 = s.top();
				s.pop();
				grup tempG;
				tempG.StartState = 0;
				tempG.EndState = 0;
				tempG.stateCount = tempG1.stateCount + tempG2.stateCount-3;
				for (int i = 0; i < int(tempG1.edges.size());i++) {
					edge e1 = tempG1.edges[i];
					if (e1.end == tempG1.EndState) {
						e1.end = 0;
					}
					tempG.edges.push_back(e1);
				}
				for (int i = 0; i < int(tempG2.edges.size());i++) {
					edge e2 = tempG2.edges[i];
					if (e2.end == tempG2.EndState) {
						e2.end = 0;
					}
					tempG.edges.push_back(e2);
				}
				//合成图压栈
				s.push(tempG);
			}
		}
	}
	return s.top();
}

string WordAnstary::clear_text(string text) {
	//储存添加好+号的正规式
	string new_text = "";
	//分析栈用来分析()匹配
	stack<char> s;
	for (int i = 0; i < int(text.length()); i++) {
		if ((text[i] <= 'z' && text[i] >= 'a') || (text[i] <= 'Z' && text[i] >= 'A') || (text[i] <= '9' && text[i] >= '0')) {
			new_text = new_text + text[i];
			if (i < int(text.length()) - 1) {
				if ((text[i + 1] <= 'z' && text[i+1] >= 'a') || (text[i + 1] <= 'Z' && text[i + 1] >= 'A') || (text[i+1] <= '9' && text[i+1] >= '0')) {
					new_text = new_text + '+';
				}
				else if (text[i + 1] == '(') {
					new_text = new_text + '+';
				}
			}
		}
		else if (text[i] == '|') {
			new_text = new_text + text[i];
			continue;
		}
		else if (text[i] == '*') {
			new_text = new_text + text[i];
			if (i < int(text.length()) - 1) {
				if ((text[i + 1] <= 'z' && text[i+1] >= 'a') || (text[i + 1] <= 'Z' && text[i + 1] >= 'A') || (text[i] <= '9' && text[i] >= '0'))
					new_text = new_text + '+';
				else if (text[i + 1] == '(')
					new_text = new_text + '+';
			}
		}
		else if (text[i] == '(') {
			new_text = new_text + text[i];
			s.push(text[i]);
		}
		else if (text[i] == ')') {
			new_text = new_text + text[i];
			if (i < int(text.length()) - 1) {
				if ((text[i + 1] <= 'z' && text[i+1] >= 'a') || (text[i + 1] <= 'Z' && text[i + 1] >= 'A') || (text[i] <= '9' && text[i] >= '0'))
					new_text = new_text + '+';
				else if (text[i + 1] == '(')
					new_text = new_text + '+';
			}
			if (s.empty()) {
				return "";
			}
			else {
				s.pop();
			}
		}
		else
			return "";
	}
	if (s.empty())
		return new_text;
	else
		return "";
}

string WordAnstary::change_text(string text) {
	stack<char> s;
	string new_text = "";
	for (int i = 0; i<int(text.length()); i++) {
		if ((text[i] <= 'z' && text[i] >= 'a') || (text[i] <= 'Z' && text[i] >= 'A') || (text[i] <= '9' && text[i] >= '0')) {
			new_text = new_text + text[i];
		}
		else {
			if (text[i] == '(') {
				s.push(text[i]);
			}
			else if (text[i] == ')') {
				while (s.top() != '(') {
					new_text = new_text + s.top();
					s.pop();
				}
				s.pop();
			}
			else if (text[i] == '*') {
				s.push(text[i]);
			}
			else if (text[i] == '|') {
				if (s.empty()) {
					s.push(text[i]);
				}
				else {
					while (!s.empty()) {
						if (s.top() == '*') {
							new_text = new_text + s.top();
							s.pop();
						}
						else if (s.top() == '|') {
							new_text = new_text + s.top();
							s.pop();
						}
						else {
							break;
						}
					}
					s.push(text[i]);
				}
			}
			else if (text[i] == '+') {
				if (s.empty()) {
					s.push(text[i]);
				}
				else {
					while(!s.empty()){
						if (s.top() == '*') {
							new_text = new_text + s.top();
							s.pop();
						}else if (s.top() == '+') {
							new_text = new_text + s.top();
							s.pop();
						}
						else if (s.top() == '|') {
							new_text = new_text + s.top();
							s.pop();
						}
						else {
							break;
						}
					}
					s.push(text[i]);
				}
			}
		}
	}
	while(!s.empty()) {
		new_text += s.top();
		s.pop();
	}
	return new_text;
}


//int main() {
//	WordAnstary wordAustary;
//	wordAustary.start();
//}```

  • 12
    点赞
  • 61
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值