【问题描述】
正规表达式→NFA问题的一种描述是:
编写一个程序,输入一个正规表达式,输出与该文法等价的有穷自动机。
【基本要求】
设置FA初始状态X,终态Y,过程态用数字表示:0 1 2 3………
【测试用例】
测试数据:
(a|b)*abb
输出结果应为:
X X - ~ ->3
Y
0 0-a->1
1 1-b->2
2 2-b->Y
3 3-~->0 3-a->3 3-b->3
【解决步骤】
正规式转NFA方法步骤:
- 判断一下正规式有没有错误,括号对不对,并添加符号“+”
- 运算符的优先级为:*>|>+
- 根据符号优先级,把中缀表达式转后缀表达式便于计算
- 根据后缀表达式依次构建图。有三种运算:*、|、+
图的构建过程:
(1) 定义边结构体用来储存边:edge
//NFA边
struct edge{
int start;
int end;
char accept;
};
(2) 定义一个类用来表示图结构:grup
//NFA单元,一个大的NFA单元可以是由很多小单元通过规则拼接起来
class grup{
public:
vector<edge> edges; //这个NFA拥有的边
int stateCount; //状态数
int StartState; //开始状态
int EndState; //结束状态
public:
grup() {
this->stateCount = 2;
this->StartState = 0;
this->EndState = 1;
}
};
(3) 构建出的图结构边是从状态是从0开始编号的 构建过程:以(a|b)*abb为例
-
输入字符串添加+号,并判断是否合法:
算法:- 定义一个栈s用来匹配括号,判断括号是否符合条件。
从左往右扫描字符串,如果是字母或数字,则放入输出表达式后
然后判断后一个是什么符号判断是否要加上+号
对于类似如下几种情况需要加上+的:AA、A(、A、(、)A、)(。
完成后(a|b)abb变为:(a|b)+a+b+b
- 定义一个栈s用来匹配括号,判断括号是否符合条件。
-
中缀表达式转后缀表达式
算法:- 定义一个栈s,从左忘右扫描表达式,
遇见字母或数字拼接在输出表达式后
遇见“(”压入栈中
遇见“)”弹出栈中元素,直到遇到了“(”
遇到“、|、+”判断栈顶元素的优先级,把比自己优先级高或等的弹出去
然后自己再压栈。
扫描完成后再把栈中元素依次弹出,拼接在输出字符串后
完成后(a|b)+a+b+b应该变为:ab|*a+b+b+
核心算法:
- 定义一个栈s,从左忘右扫描表达式,
Void change_text(string text) {
stack<char> s;
string new_text = "";
for (int i = 0; i<int(text.length()); i++) {
if ((text[i] <= 'z' && text[i] >= 'a') || (text[i] <= 'Z' && text[i] >= 'A') || (text[i]<= '9' && text[i] >= '0')) {new_text = new_text + text[i];}
else {
if (text[i] == '(') {s.push(text[i]);}
else if (text[i] == ')') {
while (s.top() != '(') {new_text = new_text + s.top();s.pop();}
s.pop();}
else if (text[i] == '*') {s.push(text[i]);}
else if (text[i] == '|') {
if (s.empty()) {s.push(text[i]);}
else {
while (!s.empty()) {
if (s.top() == '*') {new_text = new_text + s.top();s.pop();}
else if (s.top() == '|') {new_text = new_text + s.top();s.pop();}
else {break;}
}
s.push(text[i]);}
}
else if (text[i] == '+') {
if (s.empty()) {s.push(text[i]);}
else {
while(!s.empty()){
if (s.top() == '*') {new_text = new_text + s.top();s.pop();}
else if (s.top() == '+') {new_text = new_text + s.top();s.pop();}
else if (s.top() == '|') {new_text = new_text + s.top();s.pop();}
else {break;}}
s.push(text[i]);}}}}
while(!s.empty()) {new_text += s.top();s.pop();}
return new_text;}
- 构架图结构:
算法:- 定义一个栈用来储存图结构,从左向右扫描后缀表达式
如果遇到的是字母或数字,构建一个如下状态的图:一条边两个状态
- 定义一个栈用来储存图结构,从左向右扫描后缀表达式
压入栈中
如果遇到运算符分三种情“、|、+”
遇到“”,栈顶元素出栈
遇到“+”
遇到“|”
- 核心算法:
getNFA(string text) {
stack<grup> s;
for (int i = 0; i < int(text.length()); i++) {
if ((text[i] <= 'z' && text[i] >= 'a') || (text[i] <= 'Z' && text[i] >= 'A')||(text[i] <= '9' && text[i] >= '0')) {
grup tempG;
edge tempE;
tempE.accept = text[i];
tempE.start = 0;
tempE.end = 1;
tempG.edges.push_back(tempE);
s.push(tempG);
}
else {
if (text[i] == '+') {
grup tempG2 = s.top();
s.pop();
grup tempG1 = s.top();
s.pop();
//图和图合成
grup tempG = tempG1;
tempG.stateCount = tempG1.stateCount + tempG2.stateCount-1;
tempG.StartState = 0;
tempG.EndState = tempG.stateCount - 1;
//获取后一条边
for (int i = 0; i < int(tempG2.edges.size());i++) {
edge e2 = tempG2.edges[i];
e2.start += tempG1.stateCount-1;
e2.end += tempG1.stateCount-1;
tempG.edges.push_back(e2);
}
//合成图压栈
s.push(tempG);
}
else if (text[i] == '*') {
grup tempG2 = s.top();
s.pop();
grup tempG;
if (tempG2.StartState!=tempG2.EndState) {
tempG.stateCount = tempG2.stateCount + 1;
}
else {
tempG.stateCount = tempG2.stateCount + 2;
}
tempG.EndState = tempG.stateCount-1;
//图内的边编号加1
for (int i = 0; i < int(tempG2.edges.size());i++) {
edge e2 = tempG2.edges[i];
if (e2.end==tempG2.EndState) {
e2.start += 1;
e2.end = e2.start;
}
else {
e2.start += 1;
e2.end += 1;
}
tempG.edges.push_back(e2);
}
//添加两条边
edge e1;
e1.accept = '~';
e1.start = 0;
e1.end = 1;
edge e2;
e2.accept = '~';
e2.start = tempG.EndState-1;
e2.end = e2.start + 1;
tempG.edges.push_back(e1);
tempG.edges.push_back(e2);
//合成图压栈
s.push(tempG);
}
else if (text[i] == '|') {
grup tempG2 = s.top();
s.pop();
grup tempG1 = s.top();
s.pop();
grup tempG;
tempG.StartState = 0;
tempG.EndState = 0;
tempG.stateCount = tempG1.stateCount + tempG2.stateCount-3;
for (int i = 0; i < int(tempG1.edges.size());i++) {
edge e1 = tempG1.edges[i];
if (e1.end == tempG1.EndState) {
e1.end = 0;
}
tempG.edges.push_back(e1);
}
for (int i = 0; i < int(tempG2.edges.size());i++) {
edge e2 = tempG2.edges[i];
if (e2.end == tempG2.EndState) {
e2.end = 0;
}
tempG.edges.push_back(e2);
}
//合成图压栈
s.push(tempG);
}
}
}
return s.top();
}
- 输出图结构:
输出时按照格式输出,构建好的图状态为0到n,把0输出为X最后一个输出为Y中间状态减一就可以了
完整代码:
#include<iostream>
#include<fstream>
#include<stack>
#include<vector>
#include<string>
#define MAX 100
using namespace std;
//NFA边
struct edge{
int start;
int end;
char accept;
};
//NFA单元,一个大的NFA单元可以是由很多小单元通过规则拼接起来
class grup
{
public:
vector<edge> edges; //这个NFA拥有的边
int stateCount; //状态数
int StartState; //开始状态
int EndState; //结束状态
public:
grup() {
this->stateCount = 2;
this->StartState = 0;
this->EndState = 1;
}
};
class WordAnstary {
private :
string input_file = "testfile.txt";
string output_file = "output.txt";
public:
//主函数
int start();
//整理字符串
string clear_text(string text);
//中缀表达式转后缀表达式
string change_text(string text);
//根据正规式获得NFA
grup getNFA(string text);
//输出结果
void printNFA(grup out);
};
//主函数
int WordAnstary::start() {
//文件操作工具
ifstream read_file;
//读取文件
string text;
//read_file.open(input_file);
//read_file >> text;
cin >> text;
//cout << text;
//分析正规式,得到有穷自动机
//cout << "原表达式为:" << text << endl;
string new_text = this->clear_text(text);
if (new_text != "") {
cout << "添加+的表达式:"<<new_text << endl;
new_text = this->change_text(new_text);
cout << "后缀表达式:"<<new_text << endl;
grup out = this->getNFA(new_text);
this->printNFA(out);
}
else {
cout << "输入不合法" << endl;
}
read_file.close();
return 0;
};
void WordAnstary::printNFA(grup out) {
cout << "X ";
for (int j = 0; j < int(out.edges.size()); j++) {
edge eX = out.edges[j];
if (eX.start == 0) {
cout << "X-" << eX.accept << "->";
if (eX.end==out.stateCount-1) {
cout << "Y ";
}
else {
cout << eX.end - 1<<" ";
}
}
}
cout << endl;
cout << "Y ";
for (int j = 0; j < int(out.edges.size()); j++) {
edge eX = out.edges[j];
if (eX.start == out.stateCount - 1) {
cout << "Y-" << eX.accept << "->";
if (eX.end == out.stateCount - 1) {
cout << "Y ";
}
else {
cout << eX.end - 1<<" ";
}
}
}
cout << endl;
for (int i = 1; i < out.stateCount - 1; i++) {
cout << i - 1 << " ";
for (int j = 0; j < int(out.edges.size()); j++) {
edge e = out.edges[j];
if (e.start == i&&e.accept=='~') {
cout << e.start - 1 << "-" << e.accept << "->";
if (e.end == out.stateCount - 1) {
cout << "Y ";
}
else {
cout << e.end - 1 << " ";
}
}
}
for (int j = 0; j < int(out.edges.size()); j++) {
edge e = out.edges[j];
if (e.start == i&&e.accept!='~') {
cout << e.start - 1 << "-" << e.accept << "->";
if (e.end == out.stateCount - 1) {
cout << "Y ";
}
else {
cout << e.end - 1<<" ";
}
}
}
cout << endl;
}
}
grup WordAnstary::getNFA(string text) {
stack<grup> s;
for (int i = 0; i < int(text.length()); i++) {
if ((text[i] <= 'z' && text[i] >= 'a') || (text[i] <= 'Z' && text[i] >= 'A')||(text[i] <= '9' && text[i] >= '0')) {
grup tempG;
edge tempE;
tempE.accept = text[i];
tempE.start = 0;
tempE.end = 1;
tempG.edges.push_back(tempE);
s.push(tempG);
}
else {
if (text[i] == '+') {
grup tempG2 = s.top();
s.pop();
grup tempG1 = s.top();
s.pop();
//图和图合成
grup tempG = tempG1;
tempG.stateCount = tempG1.stateCount + tempG2.stateCount-1;
tempG.StartState = 0;
tempG.EndState = tempG.stateCount - 1;
//获取后一条边
for (int i = 0; i < int(tempG2.edges.size());i++) {
edge e2 = tempG2.edges[i];
e2.start += tempG1.stateCount-1;
e2.end += tempG1.stateCount-1;
tempG.edges.push_back(e2);
}
//合成图压栈
s.push(tempG);
}
else if (text[i] == '*') {
grup tempG2 = s.top();
s.pop();
grup tempG;
if (tempG2.StartState!=tempG2.EndState) {
tempG.stateCount = tempG2.stateCount + 1;
}
else {
tempG.stateCount = tempG2.stateCount + 2;
}
tempG.EndState = tempG.stateCount-1;
//图内的边编号加1
for (int i = 0; i < int(tempG2.edges.size());i++) {
edge e2 = tempG2.edges[i];
if (e2.end==tempG2.EndState) {
e2.start += 1;
e2.end = e2.start;
}
else {
e2.start += 1;
e2.end += 1;
}
tempG.edges.push_back(e2);
}
//添加两条边
edge e1;
e1.accept = '~';
e1.start = 0;
e1.end = 1;
edge e2;
e2.accept = '~';
e2.start = tempG.EndState-1;
e2.end = e2.start + 1;
tempG.edges.push_back(e1);
tempG.edges.push_back(e2);
//合成图压栈
s.push(tempG);
}
else if (text[i] == '|') {
grup tempG2 = s.top();
s.pop();
grup tempG1 = s.top();
s.pop();
grup tempG;
tempG.StartState = 0;
tempG.EndState = 0;
tempG.stateCount = tempG1.stateCount + tempG2.stateCount-3;
for (int i = 0; i < int(tempG1.edges.size());i++) {
edge e1 = tempG1.edges[i];
if (e1.end == tempG1.EndState) {
e1.end = 0;
}
tempG.edges.push_back(e1);
}
for (int i = 0; i < int(tempG2.edges.size());i++) {
edge e2 = tempG2.edges[i];
if (e2.end == tempG2.EndState) {
e2.end = 0;
}
tempG.edges.push_back(e2);
}
//合成图压栈
s.push(tempG);
}
}
}
return s.top();
}
string WordAnstary::clear_text(string text) {
//储存添加好+号的正规式
string new_text = "";
//分析栈用来分析()匹配
stack<char> s;
for (int i = 0; i < int(text.length()); i++) {
if ((text[i] <= 'z' && text[i] >= 'a') || (text[i] <= 'Z' && text[i] >= 'A') || (text[i] <= '9' && text[i] >= '0')) {
new_text = new_text + text[i];
if (i < int(text.length()) - 1) {
if ((text[i + 1] <= 'z' && text[i+1] >= 'a') || (text[i + 1] <= 'Z' && text[i + 1] >= 'A') || (text[i+1] <= '9' && text[i+1] >= '0')) {
new_text = new_text + '+';
}
else if (text[i + 1] == '(') {
new_text = new_text + '+';
}
}
}
else if (text[i] == '|') {
new_text = new_text + text[i];
continue;
}
else if (text[i] == '*') {
new_text = new_text + text[i];
if (i < int(text.length()) - 1) {
if ((text[i + 1] <= 'z' && text[i+1] >= 'a') || (text[i + 1] <= 'Z' && text[i + 1] >= 'A') || (text[i] <= '9' && text[i] >= '0'))
new_text = new_text + '+';
else if (text[i + 1] == '(')
new_text = new_text + '+';
}
}
else if (text[i] == '(') {
new_text = new_text + text[i];
s.push(text[i]);
}
else if (text[i] == ')') {
new_text = new_text + text[i];
if (i < int(text.length()) - 1) {
if ((text[i + 1] <= 'z' && text[i+1] >= 'a') || (text[i + 1] <= 'Z' && text[i + 1] >= 'A') || (text[i] <= '9' && text[i] >= '0'))
new_text = new_text + '+';
else if (text[i + 1] == '(')
new_text = new_text + '+';
}
if (s.empty()) {
return "";
}
else {
s.pop();
}
}
else
return "";
}
if (s.empty())
return new_text;
else
return "";
}
string WordAnstary::change_text(string text) {
stack<char> s;
string new_text = "";
for (int i = 0; i<int(text.length()); i++) {
if ((text[i] <= 'z' && text[i] >= 'a') || (text[i] <= 'Z' && text[i] >= 'A') || (text[i] <= '9' && text[i] >= '0')) {
new_text = new_text + text[i];
}
else {
if (text[i] == '(') {
s.push(text[i]);
}
else if (text[i] == ')') {
while (s.top() != '(') {
new_text = new_text + s.top();
s.pop();
}
s.pop();
}
else if (text[i] == '*') {
s.push(text[i]);
}
else if (text[i] == '|') {
if (s.empty()) {
s.push(text[i]);
}
else {
while (!s.empty()) {
if (s.top() == '*') {
new_text = new_text + s.top();
s.pop();
}
else if (s.top() == '|') {
new_text = new_text + s.top();
s.pop();
}
else {
break;
}
}
s.push(text[i]);
}
}
else if (text[i] == '+') {
if (s.empty()) {
s.push(text[i]);
}
else {
while(!s.empty()){
if (s.top() == '*') {
new_text = new_text + s.top();
s.pop();
}else if (s.top() == '+') {
new_text = new_text + s.top();
s.pop();
}
else if (s.top() == '|') {
new_text = new_text + s.top();
s.pop();
}
else {
break;
}
}
s.push(text[i]);
}
}
}
}
while(!s.empty()) {
new_text += s.top();
s.pop();
}
return new_text;
}
//int main() {
// WordAnstary wordAustary;
// wordAustary.start();
//}```