本程序实现了egrep命令,首先将正则表达式转换为NFA,并实现模拟NFA的算法。
本程序使用flex实现词法分析,bison实现语法分析
若给定的一行字符串中存在一个字串能被该NFA接受,则输出整行。所用语法如下:
S-->S|S
|SS
|S*
|(S)
|a
bison程序:
%{
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <stack>
#include <algorithm>
#include <boost/foreach.hpp>
#include <set>
#include <map>
using namespace std;
typedef string state;
typedef set<string> states;
typedef map<string, states> transition;
class value
{
public:
string* symbol;
state *start;
state *final;
map<state,transition> *transitions;
};
char* filename;
int number;//用于生成状态号
int yylex(void);
void yyerror(char const *);
template<class TYPE>
string transfer(TYPE init);
void Scopy(value S,value &result);
void Skleen(value S,value &result);
void SconnectS(value S1,value S2,value &result);
void SorS(value S1,value S2,value &result);
states epsilonClosure(states T,map<state,transition> transitions);
states move(states T, string a,map<state,transition> transitions);
void simulateNFA(string str,value NFA);
%}
%define api.value.type { class value }
//优先级为:闭包>连接>并
%token ASCII
%token LP
%token RP
%left OR
%left CONNECT
%left KLEEN
%expect 4
%%
lines: lines S '\n'
{
number=0;
ifstream in(filename);
string line;
while(getline(in,line))
simulateNFA(line,$2);
}
| lines '\n'
|
| error '\n' {yyerrok;}
;
S: S KLEEN
{Skleen($1,$$);}
|S S %prec CONNECT
{SconnectS($1,$2,$$);}
|S OR S
{SorS($1,$3,$$);}
|LP S RP
{Scopy($2,$$);}
|ASCII
{
$$.start=new string(transfer<int>(number++));
$$.final=new string(transfer<int>(number++));
states accetping;
accetping.insert(*$$.final);
$$.transitions=new map<state,transition>();
(*$$.transitions)[*$$.start][*$$.symbol]=accetping;
}
;
%%
#include "lex.yy.c"
int main(int argc,char*argv[]) {
number=0;
filename=argv[1];
return yyparse();
}
void yyerror(char const *s)
{
cout<<s<<endl;
}
states epsilonClosure(states T,map<state,transition> transitions)
{
stack<state> S;
BOOST_FOREACH(state u, T)
{ S.push(u); }
while (!S.empty())
{
state t = S.top();
S.pop();
BOOST_FOREACH(state u, transitions[t]["epsilon"])
{
if (T.count(u) == 0)
{
T.insert(u);
S.push(u);
}
}
}
return T;
}
states move(states T,string a,map<state,transition> transitions)
{
states result;
BOOST_FOREACH(state u, T)
BOOST_FOREACH(state v, transitions[u][a])
{ result.insert(v); }
return result;
}
void simulateNFA(string str,value NFA)
{
bool flag=false;
//穷举字串测试,若被NFA受则退出循环
for(int i=0;i<str.length();i++)
{
for(int j=0;j<=str.length()-i;j++)
{
string substr;
substr=str.substr(i,j);
states S;
string c;//转移符号
int count=0;
if(j==0)
{c="epsilon";}
else
{c=transfer<char>(substr[0]);}
S.insert(*NFA.start);
S=epsilonClosure(S,*NFA.transitions);
while(count<substr.length())
{
S=epsilonClosure(move(S,c,*NFA.transitions),*NFA.transitions);
c=substr[++count];
}
if (S.count(*NFA.final)!= 0)
{
flag=true;
break;
}
}
if(flag)
break;
}
if(flag)
cout<<str<<endl;
}
template<class TYPE>
string transfer(TYPE init)
{
stringstream ss;
ss<<init;
string str;
ss>>str;
return str;
}
/*result-->(S)
直接拷贝S至转换表至result*/
void Scopy(value S,value &result)
{
result.start=new string(*S.start);
result.final=new string(*S.final);
result.transitions=new map<state,transition>();
copy((*S.transitions).begin(),(*S.transitions).end(),insert_iterator<map<state,transition> >
(*result.transitions,(*result.transitions).begin()));
}
/*result-->S*
首先拷贝s的转换表至result,然后对result生成新的开始态、结束态,
并将result的开始态连至S的开始态和result的结束态,边上符号为"epsilon";
将S的结束态连至S的开始态和result的结束态,边上符号为"epsilon"*/
void Skleen(value S,value &result)
{
result.start=new string(transfer<int>(number++));
result.final=new string(transfer<int>(number++));
result.transitions=new map<state,transition>();
copy((*S.transitions).begin(),(*S.transitions).end(),insert_iterator<map<state,transition> >
(*result.transitions,(*result.transitions).begin()));
states accepting;
accepting.insert(*S.start);
accepting.insert(*result.final);
(*result.transitions)[*result.start]["epsilon"]=accepting;
(*result.transitions)[*S.final]["epsilon"]=accepting;
}
/*result-->S1 S2
分别将S1、S2的转换表拷贝至result,再将S2的开始态改为S1的结束态*/
void SconnectS(value S1,value S2,value &result)
{
result.start=new string(*S1.start);
result.final=new string(*S2.final);
result.transitions=new map<state,transition>();
copy((*S1.transitions).begin(),(*S1.transitions).end(),insert_iterator<map<state,transition> >
(*result.transitions,(*result.transitions).begin()));
copy((*S2.transitions).begin(),(*S2.transitions).end(),insert_iterator<map<state,transition> >
(*result.transitions,(*result.transitions).end()));
map<state,transition>::iterator it;
if((it=(*result.transitions).find(*S2.start))!=(*result.transitions).end())
{
pair<state,transition> temp;
temp=make_pair(*S1.final,it->second);
(*result.transitions).erase(*S2.start);
(*result.transitions).insert(temp);
}
}
/*result-->S|S
分别将S1、S2的转换表拷贝至result,然后对result生成新的开始态、结束态,
并将result的开始态连至S1和S2的开始态,边上符号为"epsilon";将S1和S2的结
束态连至result的结束态,边上符号为"epsilon"*/
void SorS(value S1,value S2,value &result)
{
result.start=new string(transfer(number++));
result.final=new string(transfer(number++));
result.transitions=new map<state,transition>();
copy((*S1.transitions).begin(),(*S1.transitions).end(),insert_iterator<map<state,transition> >
(*result.transitions,(*result.transitions).begin()));
copy((*S2.transitions).begin(),(*S2.transitions).end(),insert_iterator<map<state,transition> >
(*result.transitions,(*result.transitions).end()));
states accepting;
accepting.insert(*S1.start);
accepting.insert(*S2.start);
(*result.transitions)[*result.start]["epsilon"]=accepting;
accepting.clear();
accepting.insert(*result.final);
(*result.transitions)[*S1.final]["epsilon"]=accepting;
(*result.transitions)[*S2.final]["epsilon"]=accepting;
}
flex程序:
%{
#include<string>
%}
escape "\\("|"\\)"|"\\|"|"\\*"
%%
[ \t]+
\n {return yytext[0];}
"(" {return LP;}
")" {return RP;}
"|" {return OR;}
"*" {return KLEEN;}
{escape} {
yylval.symbol=new string(transfer(yytext).substr(1,1));
return ASCII;
}
. {
yylval.symbol=new string(transfer(yytext));
return ASCII;
}
假设将bison程序拷至biaon.y,flex程序拷至flex.l。
运行如下:
flex lex.l
bison bison.y
g++ bison.tab.c -ly -ll
./a.out filname
最后可输入待测试正则表达式
版权声明:本文为博主原创文章,未经博主允许不得转载。