kmp 算法
kmp算法要点在于在dfa上模拟运行输入,若到达接受状态,则接受并返回匹配位置,否则返回不匹配。
其dfa的模拟代码如下
int kmp(string pat,string txt)
{
int n=txt.size();
int m=txt.size();
int i,j; // j is current state,txt[i] is current input
auto dfa=buildDFA(string pat)
for(j=0,i=0;i<n && j<m;i++)
{
j=dfa[j][txt[i]];
}
if(j==m) return i-m; // if reachs accept state then accept
else return n; // refuse
}
其dfa的构造代码如下:
+ Match: if state j and next char c==pat[j],goto j+1
+ Mismatch: if next char c !=pat[j],simulate pat[1,j-1] on dfa,if maintain state X,which is simulate pat[1,j-1] on dfa will get X.
vector<vector<int>> buildDFA(string pat)
{
int m=pat.size();
const int R=256;
vector<vector<int> > dfa(m,vector<int>(R));
dfa[0][pat[0]]=1;
// X simulate pat[1,j-1] on dfa
// X0=0
// Xj=dfa[Xj-1][txt[j-1]]
for(int X=0,j=1;j<m;j++)
{
// copy transition function at state X to state j
copy(dfa[X].begin(),dfa[X].end(),dfa[j].begin);
dfa[j][pat[j]]=j+1;
X=dfa[X][j]; //update state X
}
}
代码合并如下
``` C++
int kmp(string pat,string txt)
{
int n=txt.size();
int m=pat.size();
int i,j;
const int R=256;
//dfa dfa[state][input]
// construct dfa
vector<vector<int> > dfa(m,vector<int>(R));
dfa[0][pat[0]]=1;
for(int X=0,j=1;j<m;j++)
{
copy(dfa[X].begin(),dfa[X].end(),dfa[j].begin());
dfa[j][pat[j]]=j+1;
X=dfa[X][pat[j]];
}
// simulate dfa
// txt[i] is current input
// dfa[i][j]: j is state,i is input
for(i=0,j=0;i<n&&j<m;i++)
j=dfa[j][txt[i]];
if(j==m) return i-m; // m is final state
else return n;
}