KMP算法,看了好久,才写出代码,理解起来有点难度,后来在捋顺了关系后,再回顾就没有之前那样困难了
KMP算法的关键在于求next,而next的计算则在于前缀字符串的计算,将问题分解,先求出f(最大前缀字符串),之后再求next
KMP是一个单模匹配算法,时间复杂度为O(m),其中m是带搜索目标串的长度。
写出KMP算法代码,主要参考了2篇文章,在本处附上:
https://wenku.baidu.com/view/8f0bba4bb9d528ea80c7790a.html?from_page=view&from_mod=download
http://blog.csdn.net/joylnwang/article/details/6778316/
原理可参考上面的两篇文章,这里附上我的实现代码,希望对理解该算法有帮助。
#include<iostream>
#include<vector>
std::vector<int> getNext(std::string pattern)
{
std::vector<int> f(pattern.size()+1, 0); //下标从1开始,满足[1, k-1] = [j - (k - 1), j - 1],最大k
std::vector<int> next(pattern.size()+1, 0); //next数组,下标从1开始计算
f[1] = 0;
int t = 1;
int i = 2;
while(i < (pattern.size() + 1))
{
while ((t > 1) && (pattern[t - 2] != pattern[i - 2]))
{
t = f[t];
}
if (1 != t)
{
f[i] = t;
}
else
{
/*看前一个字符是否与pattern第一个字符匹配,若匹配,则k为2*/
if (((i - 2 ) != 0) && (pattern[0] == pattern[i - 2]))
{
f[i] = 2;
}
else
{
f[i] = 1;
}
}
i++;
t++;
}
/* 根据最大前缀数组,求解next,若pattern[f[t] - 1] != pattern[t - 1],则next[t] = f[t] ,否则往前追溯f[t], 判断pattern[f[f[t]] - 1]与pattern[t - 1]是否相等,不相等则next[t]=f[f[t]],否则继续往前追溯,直到t=0 */
for (int i = 2; i <= pattern.size(); i++)
{
if (pattern[i - 1] != pattern[f[i] - 1])
{
next[i] = f[i];
}
else
{
t = f[f[i]];
while((t > 0) && (pattern[i - 1] == pattern[t - 1]))
{
t = f[t];
}
next[i] = t;
}
}
return next;
}
void kmp(std::string target, std::string pattern, std::vector<int>& matches)
{
std::vector<int> next = getNext(pattern);
int i = 0, j = 1;
while (i < target.size())
{
if (target[i] == pattern[j - 1])
{
if (j == pattern.size())
{
matches.push_back(i - pattern.size() + 1);
i++;
j = 1;
}
else
{
i++;
j++;
}
}
else
{
j = next[j];
if (0 == j)
{
j++;
i++;
}
}
}
return;
}
int main(int argc, char *argv[])
{
std::string pattern = "accabcacac";
std::string target = "abcacbdefaccabcacacacadfabcacababcacbdefacaccabcacaccabcacacadf";
std::vector<int> matches;
kmp(target, pattern, matches);
if (0 == matches.size())
{
std::cout<<"not match"<<std::endl;
}
else
{
for (int i = 0; i < matches.size(); i++)
{
std::cout<<matches[i]<<":"<<target.substr(matches[i], pattern.size())<<std::endl;
}
}
return 0;
}
pattern后缀与前缀有重复,比如abcdefab,那么在匹配完成之后,下一个pattern串匹配位置,可以从c的位置开始,对比下面这份代码与上面代码的结果有什么不同:
#include<iostream>
#include<vector>
void getNext(std::string pattern, std::vector<int>& f, std::vector<int>& next)
{
int t = 1;
int i = 2;
while(i < (pattern.size() + 1))
{
while ((t > 1) && (pattern[t - 2] != pattern[i - 2]))
{
t = f[t];
}
if (1 != t)
{
f[i] = t;
}
else
{
/*看前一个字符是否与pattern第一个字符匹配,若匹配,则k为2*/
if (((i - 2 ) != 0) && (pattern[0] == pattern[i - 2]))
{
f[i] = 2;
}
else
{
f[i] = 1;
}
}
i++;
t++;
}
for (int i = 2; i <= pattern.size(); i++)
{
if (pattern[i - 1] != pattern[f[i] - 1])
{
next[i] = f[i];
}
else
{
t = f[f[i]];
while((t > 0) && (pattern[i - 1] == pattern[t - 1]))
{
t = f[t];
}
next[i] = t;
}
}
return;
}
void kmp(std::string target, std::string pattern, std::vector<int>& matches)
{
std::vector<int> f(pattern.size()+1, 0);
std::vector<int> next(pattern.size()+1, 0);
getNext(pattern, f, next);
int i = 0, j = 1;
while (i < target.size())
{
if (target[i] == pattern[j - 1])
{
if (j == pattern.size())
{
matches.push_back(i - pattern.size() + 1);
/* 关键在这里,若是pattern本身后缀与前缀有重合,则从pattern已匹配过的下个位置开始 */
if (pattern[f[j] - 1] == pattern[j - 1])
{
j = f[j] + 1;
}
else
{
j = 1;
}
i++;
}
else
{
i++;
j++;
}
}
else
{
j = next[j];
if (0 == j)
{
j++;
i++;
}
}
}
return;
}
int main(int argc, char *argv[])
{
std::string pattern = "accabcacac";
std::string target = "abcacbdefaccabcacacacadfabcacababcacbdefacaccabcacaccabcacacadf";
std::vector<int> matches;
kmp(target, pattern, matches);
if (0 == matches.size())
{
std::cout<<"not match"<<std::endl;
}
else
{
/* 查看结果有什么不同 */
for (int i = 0; i < matches.size(); i++)
{
std::cout<<matches[i]<<":"<<target.substr(matches[i], pattern.size())<<std::endl;
}
}
return 0;
}