正则表达式转DFA&NFA
显然,正则表达式、NFA、DFA的概念都很简单,所以直接上代码,注释应该解释地比较清楚,没有万能头文件的自行替换需求库,如果有疑问的可以留言。
网盘链接
[自行补全]/s/1pbGT_wpB662TwFrnukXgGQ?pwd=TSIT
提取码:TSIT
运行截图
原理可以参考这篇博客传送门
本次程序由四个文件组成
文件名 | 描述 |
---|---|
Regular_Expression.cpp | 处理正则表达式 |
NFA.cpp | 正则表达式转NFA |
DFA.cpp | NFA转DFA |
MainProcess.cpp | 主要处理程序 |
//Regular_Expression.cpp
#ifndef __REGULAR_EXPRESSION__
#define __REGULAR_EXPRESSION__
#include<bits/stdc++.h>
using namespace std;
const int OPERANDS = 1; // 操作符
const int OPERATOR = 2; // 运算符
const int ILLEGAL_CHAR = -1; // 非法字符
// 正则表达式包含非法字符
class Regular_Expression_Input_Exception
{
public:
char c;
Regular_Expression_Input_Exception(char c)
{
this->c = c;
}
string what()const
{
string ret;
ret = c + " is not in the charset" ;
return ret;
}
};
// 正则表达式格式错误
class Regular_Expression_Format_Exceprion
{
public:
Regular_Expression_Format_Exceprion(){
}
string what()const
{
string ret;
ret = "Format Error ";
return ret;
}
};
class Regular_Expression
{
public:
Regular_Expression(string & s)
{
expression = s;
expression = Regular_Expression_Pretreatment(expression);
expression = Regular_Expression_Infix2PostFix(expression);
}
string get_postfix()
{
return expression;
}
private:
string expression;
inline string Regular_Expression_Pretreatment(string&);
inline string Regular_Expression_Infix2PostFix(string&);
};
// 返回字符类型
int get_chartype(char c)
{
if ( ('a' <= c && c <= 'z')||
('A' <= c && c <= 'Z')||
('0' <= c && c <= '9')||
(c == '.')
) return OPERANDS;
if ( (c == '|') ||
(c == '*') ||
(c == '(') ||
(c == ')') ||
(c == '&')
) return OPERATOR;
return ILLEGAL_CHAR;
}
// 设置操作符优先级
int get_priorioty(char c)
{
switch (c)
{
case '*': return 3; break;
case '&': return 2; break;
case '|': return 1; break;
case '(': return 0; break;
default: return ILLEGAL_CHAR; break;
}
}
//检查并预处理正则表达式 添加&作为连接符
string Regular_Expression::Regular_Expression_Pretreatment(string & pre_expression)
{
assert(pre_expression.size() > 0);
string treated_expression;
try
{
char prechar ;
int pretype ;
char nchar = pre_expression[0];
int ntype = get_chartype(nchar);
if(ntype == ILLEGAL_CHAR) throw Regular_Expression_Input_Exception(nchar);
treated_expression.push_back(nchar);
prechar = nchar;
pretype = ntype;
int len = pre_expression.length();
for(int i = 1 ; i<len ; ++i)
{
nchar = pre_expression[i];
ntype = get_chartype(nchar);
if(ntype == ILLEGAL_CHAR) throw Regular_Expression_Input_Exception(nchar);
// 当第一位是操作数 , * , )且第二位为操作数或(
if( (pretype == OPERANDS || prechar == '*' || prechar == ')' ) &&
(ntype == OPERANDS || nchar == '(')
)
// 使用 & 作为连接符
treated_expression.push_back('&');
treated_expression.push_back(nchar);
pretype = ntype;
prechar = nchar;
}
return treated_expression;
}
catch(const Regular_Expression_Input_Exception& e)
{
std::cerr << e.what() << "\n";
exit(0);
}
catch(const exception& e)
{
std::cerr << e.what() << "\n";
exit(0);
}
return treated_expression;
}
// 正则表达式的中缀表达式转成后缀表达式
string Regular_Expression::Regular_Expression_Infix2PostFix(string & pre_expression)
{
assert(pre_expression.size() > 0);
string treated_expression;
try
{
vector<char> op;
int len = pre_expression.length();
int ntype ;
char nchar;
for(int i=0 ; i<len ; ++i)
{
nchar = pre_expression[i];
ntype = get_chartype(nchar);
if(ntype == ILLEGAL_CHAR) throw Regular_Expression_Input_Exception(nchar);
// 遇到 ( 直接压入栈中;
if(nchar == '(')
{
op.push_back(nchar);
}
// 遇到 ) 将运算符出栈并输出,直到遇到 ( ,将 ( 出栈但不输出
else if(nchar == ')')
{