从开始了解学习KMP到现在已经第三遍了…………借着项目写RKR-GST算法也用到字符串模式匹配,顺便再把KMP弄清楚些。
模式串为“abaabcaba”:
next[j] = k 代表p[j] 之前的模式串子串中,有长度为k 的相同前缀和后缀。
next[j+1] 对于pattern的前j+1个序列字符:
- 若pattern[k] == pattern[j],则next[j + 1 ] = next [j] + 1 = k + 1;
- 若pattern[k ] ≠ pattern[j],如果此时pattern[ next[k] ] == pattern[j ],则next[ j + 1 ] = next[k] + 1,否则继续递归重复此过程。
#include <iostream>
#include <cstring>
using namespace std;
int violentMatch(char *s, char *p)
{
int sLen = (int)strlen(s);
int pLen = (int)strlen(p);
int i = 0, j = 0;
while (i < sLen && j < pLen)
{
if (s[i] == p[j])
{
//①如果当前字符匹配成功(即S[i] == P[j]),则i++,j++
i++;
j++;
}
else
{
//②如果失配(即S[i]! = P[j]),令i = i - (j - 1),j = 0
i = i - j + 1;
j = 0;
}
}
//匹配成功,返回模式串p在文本串s中的位置,否则返回-1
if (j == pLen)
return i - j;
else
return -1;
}
void initNext(char *p, int next[])
{
int pLen = (int)strlen(p);
next[0] = -1;
int k = -1;
int j = 0;
while (j < pLen - 1)
{
//p[k]表示前缀,p[j]表示后缀
if (k == -1 || p[j] == p[k])
{
++j;
++k;
next[j] = k;
}
else
{
k = next[k];
}
}
}
void myInitNext(char *p, int next[])
{
int pLen = (int)strlen(p);
next[0] = -1;
for (int j = 0, k = -1; j < pLen - 1; j++)
{
while (!(k == -1 || p[j] == p[k]))<span style="white-space:pre"> </span>//“递归”向前查找
k = next[k];
next[j+1] = ++k;
}
}
int kmpSearch(char *s, char *p, int next[])
{
int i = 0, j = 0;
int sLen = (int)strlen(s);
int pLen = (int)strlen(p);
while (i < sLen && j < pLen)
{
//①如果j = -1,或者当前字符匹配成功(即S[i] == P[j]),都令i++,j++
if (j == -1 || s[i] == p[j])
{
i++;
j++;
}
else
{
//②如果j != -1,且当前字符匹配失败(即S[i] != P[j]),则令 i 不变,j = next[j]
//next[j]即为j所对应的next值
j = next[j];
}
}
if (j == pLen)
return i - j;
else
return -1;
}
int myKMPSearch(char *s, char *p, int next[])
{
int sLen = (int)strlen(s);
int pLen = (int)strlen(p);
for (int i = 0, j = 0; i < sLen; i++, j++)
{
if (j == pLen)
return i - j;
else if (j == -1)
j = 0;
if (s[i] != p[j])
j = next[j];
}
return -1;
}
int myKMPSearch1(char *s, char *p, int next[])
{
int sLen = (int)strlen(s);
int pLen = (int)strlen(p);
for (int i = 0, j = 0; i < sLen; i++, j++)
{
if (j == pLen)
return i - j;
if (s[i] != p[j])
j = max(next[j], 0);
}
return -1;
}
int myKMPSearch2(char *s, char *p, int next[])
{
next[0] = 0;
int sLen = (int)strlen(s);
int pLen = (int)strlen(p);
for (int i = 0, j = 0; i < sLen; i++, j++)
{
if (j == pLen)
return i - j;
if (s[i] != p[j])
j = next[j];
}
return -1;
}
int main(int argc, const char * argv[])
{
char s[] = "BBC ABCDAB ABCDABCDABDABABE";
char p[] = "ABCDABC";
int matchPos;
int next[100];
initNext(p, next);
for (int i = 0; i < strlen(p); i++)
cout << next[i] << " ";
cout << endl;
matchPos = kmpSearch(s, p, next);
cout << matchPos << endl;
myInitNext(p, next);
for (int i = 0; i < strlen(p); i++)
cout << next[i] << " ";
cout << endl;
matchPos = myKMPSearch(s, p, next);
cout << matchPos << endl;
cout << "myKMPSearch1:" << endl;
matchPos = myKMPSearch1(s, p, next);
cout << matchPos << endl;
cout << "myKMPSearch2:" << endl;
matchPos = myKMPSearch2(s, p, next);
cout << matchPos << endl;
cout << "violentMatch:" << endl;
matchPos = violentMatch(s, p);
cout << matchPos << endl;
return 0;
}
详细KMP算法参考: http://blog.csdn.net/v_july_v/article/details/7041827