实验一:RegExp转NFA

1.中缀表达式转后缀表达式

后缀表达式是什么?

https://blog.csdn.net/a8425/article/details/119253258

核心操作:

*属于操作数(数字/字母)直接写上
*操作符*``|``(``)``^与栈内元素对比
*如果当前操作符为),则依次退出栈内元素写上,直到出栈符号为(
*如果当前的优先级小于等于栈顶元素,则退出栈内元素写上,直到栈顶元素的优先级小于该元素,并入栈
image.png

2.后缀表达式转NFA

//NFA结构体
struct NFA {
	set<int> Q;
	set<char> alpha;
	vector<int> sigma[250][250];
	int start;
	set<int> end;
}NFA_instance;

STL set的使用

https://c.biancheng.net/view/7196.html
https://c.biancheng.net/view/7192.html

核心操作:

*核心算法:
image.png
image.png
image.pngimage.pngimage.png
*stack_start栈保存开始状态
*stack_end栈保存结束状态
*status_new保存最新编号
*边的转换的表示采用vector stl而不是数组,因为一个状态可能对应多个输入

具体实现

#include <iostream>
#include <stack>
#include <string>
#include <set>
#include <fstream>
#include <algorithm>
#include <vector>
using namespace std;
bool isOperator(char str) {
	if (str >= 'A' && str <= 'Z')
		return true;
	if (str >= 'a' && str <= 'z')
		return true;
	if (str >= '0' && str <= '9')
		return true;
	return false;
}
int getPriority(char str) {
	if (str == '(' || str == ')')
		return 1;
	if (str == '|')
		return 2;
	if (str == '^')
		return 3;
	if (str == '*')
		return 4;
	return 5;
}
string mid2after(string& mid_str) {
	char after_str[255] = { ' ' };
	int index = 0;
	stack<char> char_st;
	for (int i = 0; i < mid_str.length(); i++) {
		//mid2after 入栈操作
		if (isOperator(mid_str[i])) {
			after_str[index++] = mid_str[i];
		}
		else {
			//栈空
			if (char_st.empty())
			{
				char_st.push(mid_str[i]);
			}
			else {
				//栈不为空
				//当前为(
				if (mid_str[i] == '(')
				{
					char_st.push(mid_str[i]);
				}
				else {
					//栈不为空
					//当前为)
					//退栈直到(退出
					char top = char_st.top();
					if (mid_str[i] == ')') {
						while (top != '(') {
							after_str[index++] = top;
							char_st.pop();
							top = char_st.top();
						}
						char_st.pop();
					}
					else {
						//栈不为空
						//当前符号优先级小于等于栈顶优先级元素
						//退栈直到当前符号优先级大于栈顶元素
						//且防止空栈的情况
						if (getPriority(mid_str[i]) <= getPriority(top)) {
							while (getPriority(mid_str[i]) <= getPriority(top)) {
								after_str[index++] = top;
								char_st.pop();
								if (char_st.empty()) {
									break;
								}
								top = char_st.top();
							}
							char_st.push(mid_str[i]);
						}
						else {
							char_st.push(mid_str[i]);
						}
					}
				}
			}
		}
		//判断是否添加连接符^
		if (i < mid_str.length() - 1) {
			if ((isOperator(mid_str[i]) && isOperator(mid_str[i + 1])) ||
				(isOperator(mid_str[i]) && mid_str[i + 1] == '(') ||
				(mid_str[i] == ')' && mid_str[i + 1] == '(') ||
				(mid_str[i] == '*' && isOperator(mid_str[i + 1])) ||
				(mid_str[i] == '*' && mid_str[i + 1] == '(') ||
				(mid_str[i] == ')' && isOperator(mid_str[i + 1]))
				)
			{
				if (char_st.empty()) {
					char_st.push('^');
				}
				else {
					if (getPriority('^') <= getPriority(char_st.top())) {
						char top = char_st.top();
						while (getPriority('^') <= getPriority(top)) {
							after_str[index++] = top;
							char_st.pop();
							if (char_st.empty()) {
								break;
							}
							top = char_st.top();
						}
						char_st.push('^');
					}
					else {
						char_st.push('^');
					}
				}
			}
		}
	}
	//pop栈内剩余元素
	while (!char_st.empty()) {
		after_str[index++] = char_st.top();
		char_st.pop();
	}
	after_str[index] = '\0';
	string temp(after_str);
	return temp;
}

//NFA结构体
struct NFA {
	set<int> Q;
	set<char> alpha;
	vector<int> sigma[250][250];
	int start;
	set<int> end;
}NFA_instance;

//保存子图的开始状态和结束状态
stack<int> status_start;
stack<int> status_end;

//后缀表达式转NFA
void afer_exp2NFA(string after_exp) {
	//status_new标记当前最新状态编号
	int status_new = 0;
	for (int i = 0; i < after_exp.length(); i++) {
		if (isOperator(after_exp[i])) {
			NFA_instance.alpha.insert(after_exp[i]);
			NFA_instance.Q.insert(status_new);
			status_start.push(status_new);
			status_new++;
			NFA_instance.Q.insert(status_new);
			status_end.push(status_new);
			NFA_instance.sigma[status_start.top()][after_exp[i]].push_back(status_end.top());
			status_new++;
		}
		else {
			if (after_exp[i] == '*') {
				int pre = status_start.top();
				int nxt = status_end.top();
				int pre1 = status_new;
				status_new++;
				int nxt1 = status_new;
				NFA_instance.Q.insert(pre1);
				NFA_instance.Q.insert(nxt1);
				NFA_instance.sigma[nxt]['$'].push_back(pre);
				NFA_instance.sigma[nxt]['$'].push_back(nxt1);
				NFA_instance.sigma[pre1]['$'].push_back(pre);
				NFA_instance.sigma[pre1]['$'].push_back(nxt1);
				status_start.pop();
				status_end.pop();
				status_start.push(pre1);
				status_end.push(nxt1);
				status_new++;
			}
			else {
				if (after_exp[i] == '^') {
					int pre2 = status_start.top();
					int nxt2 = status_end.top();
					status_start.pop();
					status_end.pop();
					int pre1 = status_start.top();
					int nxt1 = status_end.top();
					status_start.pop();
					status_end.pop();
					NFA_instance.sigma[nxt1]['$'].push_back(pre2);
					status_start.push(pre1);
					status_end.push(nxt2);
				}
				else {
					if (after_exp[i] == '|') {
						int pre2 = status_start.top();
						int nxt2 = status_end.top();
						status_start.pop();
						status_end.pop();
						int pre1 = status_start.top();
						int nxt1 = status_end.top();
						status_start.pop();
						status_end.pop();
						int pre3 = status_new;
						status_new++;
						int nxt3 = status_new;
						NFA_instance.Q.insert(pre3);
						NFA_instance.Q.insert(nxt3);
						NFA_instance.sigma[pre3]['$'].push_back(pre2);
						NFA_instance.sigma[pre3]['$'].push_back(pre1);
						NFA_instance.sigma[nxt2]['$'].push_back(nxt3);
						NFA_instance.sigma[nxt1]['$'].push_back(nxt3);
						status_start.push(pre3);
						status_end.push(nxt3);
						status_new++;
					}
				}
			}
		}
	}
	NFA_instance.start = status_start.top();
	NFA_instance.end.insert(status_end.top());
	NFA_instance.alpha.insert('$');
}

void printNFA(ofstream &outputFile) {
	outputFile << "graph NFA" << endl;
	for (auto q = NFA_instance.Q.begin(); q != NFA_instance.Q.end(); q++) {
		for (auto q1 = NFA_instance.alpha.begin(); q1 != NFA_instance.alpha.end(); q1++) {
			if (!NFA_instance.sigma[*q][*q1].empty()) {
				for (auto q2 = NFA_instance.sigma[*q][*q1].begin(); q2 != NFA_instance.sigma[*q][*q1].end(); q2++) {
					if (NFA_instance.start == *q) {
						outputFile << "(start)" << *q << " " << *q2 << " " << *q1 << endl;
					}
					else {
						if (NFA_instance.end.find(*q2) != NFA_instance.end.end()) {
							outputFile << *q << " " << *q2 << "(end) " << *q1 << endl;
						}
						else {
							outputFile << *q << " " << *q2 << " " << *q1 << endl;
						}
					}
				}
			}
		}
	}
}

//DFA结构体
struct DFA {
	set<int> Q;
	set<char> alpha;
	int sigma[25][250];
	int start;
	set<int> end;
}DFA_instance;

set<int> e_closure[25];
stack<int> bfs;
set<int> Dstates[25];
set<int> temp;
set<char> temp_c;

void NFA2DFA() {
	test 初始化NFA
	//for (int i = 0; i <= 9; i++) {
	//	NFA_instance.Q.insert(i);
	//}
	//NFA_instance.start = 0;
	//NFA_instance.end.insert(9);
	//NFA_instance.alpha.insert('$');
	//NFA_instance.alpha.insert('a');
	//NFA_instance.alpha.insert('b');
	//NFA_instance.sigma[0]['$'].push_back(1);
	//NFA_instance.sigma[0]['$'].push_back(7);
	//NFA_instance.sigma[1]['$'].push_back(2);
	//NFA_instance.sigma[1]['$'].push_back(4);
	//NFA_instance.sigma[2]['a'].push_back(3);
	//NFA_instance.sigma[3]['$'].push_back(6);
	//NFA_instance.sigma[4]['b'].push_back(5);
	//NFA_instance.sigma[5]['$'].push_back(6);
	//NFA_instance.sigma[6]['$'].push_back(1);
	//NFA_instance.sigma[6]['$'].push_back(7);
	//NFA_instance.sigma[7]['a'].push_back(8);
	//NFA_instance.sigma[8]['b'].push_back(9);

	//DFA和NFA的alpha字符集合并
	set_union(NFA_instance.alpha.begin(), NFA_instance.alpha.end(), temp_c.begin(), temp_c.end(), inserter(DFA_instance.alpha,DFA_instance.alpha.begin()));
	//DFA中无$边
	DFA_instance.alpha.erase('$');
	//求出各状态的e-closure
	for (auto q = NFA_instance.Q.begin(); q != NFA_instance.Q.end(); q++) {
		if (!NFA_instance.sigma[*q]['$'].empty()) {
			bfs.push(*q);
			while (!bfs.empty()) {
				int top = bfs.top();
				bfs.pop();
				e_closure[*q].insert(top);
				if (!NFA_instance.sigma[top]['$'].empty()) {
					for (auto q1 = NFA_instance.sigma[top]['$'].begin(); q1 != NFA_instance.sigma[top]['$'].end(); q1++) {
						bfs.push(*q1);
					}
				}
			}
		}
		else {
			e_closure[*q].insert(*q);
		}
	}
	//验证e-closure输出的正确性
	//for (auto q = NFA_instance.Q.begin(); q != NFA_instance.Q.end(); q++) {
	//	cout << "e-closure of " << *q << " : ";
	//	for (auto q1 = e_closure[*q].begin(); q1 != e_closure[*q].end(); q1++) {
	//		cout << *q1 << " ";
	//	}
	//	cout << endl;
	//}

	//求出第一个状态子集
	//Dstates[0]对应NFA_instance.start的闭包
	set_union(e_closure[NFA_instance.start].begin(), e_closure[NFA_instance.start].end(), temp.begin(), temp.end(), inserter(Dstates[0], Dstates[0].begin()));
	int j = 0, p = 0;
	//转换矩阵DFA_instance.sigma
	for (int i = 0; i < 25; i++) {
		for (int j = 0; j < 250; j++) {
			DFA_instance.sigma[i][j] = -1;
		}
	}
	//j表示当前处理的状态子集
	//p表示总状态子集数
	while (j <= p) {
		
		for (auto alpha = DFA_instance.alpha.begin(); alpha != DFA_instance.alpha.end(); alpha++) {
			set<int> temp_Dstate;
			set<int> e_temp_Dstate;
			//得到Dstates[j]有符号边的转换集合temp_Dstate
			//Dstates[j]面对每一种符号输入的处理
			for (auto q = Dstates[j].begin(); q != Dstates[j].end(); q++) {
				if (!NFA_instance.sigma[*q][*alpha].empty()) {
					for (auto q1 = NFA_instance.sigma[*q][*alpha].begin(); q1 != NFA_instance.sigma[*q][*alpha].end(); q1++) {
						temp_Dstate.insert(*q1);
					}
				}
			}

			//test 测试符号闭包
			//发现未清空temp_Dstate和e_temp_Dstate导致错误
			//cout << "closure of " << j << " with alpha " << *alpha << " : ";
			//for (auto q = temp_Dstate.begin(); q != temp_Dstate.end(); q++) {
			//	cout << *q << " ";
			//}
			//cout << endl;

			//求出temp_Dstate转换集合的e闭包e_temp_Dstate
			//转换集合temp_Dstate为空,一定没有产生新状态,产生的e_temp_Dstate是原本的Dstate[j]
			//转换集合temp_Dstate不为空,可能产生新状态,需要将产生的新状态与之前的Dstates中记录的对比
			if (!temp_Dstate.empty()) {
				for (auto state = temp_Dstate.begin(); state != temp_Dstate.end(); state++) {
					int index = *state;
					set_union(e_closure[index].begin(), e_closure[index].end(), temp.begin(), temp.end(), inserter(e_temp_Dstate, e_temp_Dstate.begin()));
				}
				//flag 1表示产生新状态
				//	   0	未产生新状态
				int flag = 1;
				int index = -1;
				//index用于未产生新状态时,标记当前符号输入下产生的状态对应哪个旧状态
				for (int i = 0; i <= p; i++) {
					set<int> diff;
					set_difference(e_temp_Dstate.begin(), e_temp_Dstate.end(), Dstates[i].begin(), Dstates[i].end(), inserter(diff, diff.begin()));
					if (diff.empty()) {
						flag = 0;
						index = i;
						break;
					}
				}
				if (flag == 1) {
					p++;
					set_union(e_temp_Dstate.begin(), e_temp_Dstate.end(), temp.begin(), temp.end(), inserter(Dstates[p], Dstates[p].begin()));
					DFA_instance.sigma[j][*alpha] = p;
				}
				else {
					DFA_instance.sigma[j][*alpha] = index;
				}
			}
			else {
				DFA_instance.sigma[j][*alpha] = -1;
			}
		}

		//cout << "closure of " << j << " : ";
		//for (auto q = Dstates[j].begin(); q != Dstates[j].end(); q++) {
		//	cout << *q << " ";
		//}
		//cout << endl;

		j++;
	}
	for (int i = 0; i <= p; i++) {
		DFA_instance.Q.insert(i);
	}
	DFA_instance.start = 0;
	for (auto q = NFA_instance.end.begin(); q != NFA_instance.end.end(); q++) {
		for (int i = 0; i <= p; i++) {
			if (Dstates[i].find(*q) != Dstates[i].end()) {
				DFA_instance.end.insert(i);
			}
		}
	}
}

void printDFA(ofstream& outputFile) {
	outputFile << "graph DFA" << endl;
	for (auto q = DFA_instance.Q.begin(); q != DFA_instance.Q.end(); q++) {
		for (auto q1 = DFA_instance.alpha.begin(); q1 != DFA_instance.alpha.end(); q1++) {
			if (DFA_instance.sigma[*q][*q1] != -1) {
				int q2 = DFA_instance.sigma[*q][*q1];
				if (DFA_instance.start == *q) {
					outputFile << "(start)" << *q << " " << q2 << " " << *q1 << endl;
				}
				else {
					if (DFA_instance.end.find(q2) != DFA_instance.end.end()) {
						outputFile << *q << " " << q2  << "(end) " << *q1 << endl;
					}
					else {
						outputFile << *q << " " << q2 << " " << *q1 << endl;
					}
				}
			}
		}
	}
}

void clear() {
	//重新配置全局变量
	NFA_instance.Q.clear();
	NFA_instance.alpha.clear();
	NFA_instance.end.clear();
	NFA_instance.start = -1;
	for (int i = 0; i < 250; i++) {
		for (int j = 0; j < 250; j++) {
			NFA_instance.sigma[i][j].clear();
		}
	}
	while (!status_start.empty()) {
		status_start.pop();
	}
	while (!status_end.empty()) {
		status_end.pop();
	}
	DFA_instance.Q.clear();
	DFA_instance.alpha.clear();
	for (int i = 0; i < 25; i++) {
		for (int j = 0; j < 250; j++) {
			DFA_instance.sigma[i][j] = -1;
		}
	}
	DFA_instance.start = -1;
	DFA_instance.end.clear();
	for (int i = 0; i < 25; i++) {
		e_closure[i].clear();
		Dstates[i].clear();
	}
	while (!bfs.empty()) {
		bfs.pop();
	}
	temp.clear();
	temp_c.clear();
}

int main() {
	string file_path = "./reg.txt";
	ifstream file(file_path);
	string arr[100];
	int count = 0;
	while (getline(file, arr[count]) && count < 100) {
		if (arr[count].empty()) {
			break;
		}
		count++;
	}
	file.close();
	ofstream outputFile("./output.txt");
	for (int i = 0; i < count; i++) {
		string mid_exp = arr[i];
		clear();
		outputFile << "input mid_exp is : " << mid_exp << endl;
		string after_exp = mid2after(mid_exp);
		outputFile << "after_exp is : ";
		outputFile << after_exp << endl;
		afer_exp2NFA(after_exp);
		printNFA(outputFile);
		NFA2DFA();
		printDFA(outputFile);
		outputFile << endl;
	}
	return 0;
}
  • 30
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值