1. 例子
目标串S:ababcabcacbab
模式串P:abcac
匹配过程:
s
1 ababcabcacbab
abcacd
j
2 ababcabcacbab //KMP算法没有这一趟,再KMP算法中,目标串的指针是不会回溯的
abcacd
01s s 89
2 ababcabcacbab
abcabd
j j
0123456s s
3 ababcabcacdab //第3趟并没有再比较红色的ab,因为再第2趟已经比较到了第二个ab
abcabd
01j j //模式串两个ab重复 abcabd,如果目标串与模式串第二个ab匹配来,说明一定与第一个ab匹配,因此就不用再与第一个ab比较了
2.求next[j] = k的算法
next[j] = k中j是模式串指针的当前位置,当模式串的 j(5)位置与目标串s(7)位置的元素不匹配时,j(5)应该回溯到模式串的 k(2)位置(即k表示已经匹配上到元素个数),而目s(7)就地不动(不回溯!)
求数组next[j]的算法如下,它只与模式串有关,与目标串无关
void cal_next(char *p, int *next, int len)
{//important!! array rangs from 0 to len-1
int i = -1, j = 0;//i == -1 means back to the first elem of p[];
next[0] = -1;
//except first element, next[j]=k means that before j, there are k elem matched,
while(j < len)
{
if(i == -1 || p[i] == p[j])
{//if p[j] machted, we know that before j+1, there are i elem matched
i++;
j++;
next[j] = i;
}
else
{//go back;
i = next[i];
}
}
}
//e.g1: p[] a b c a b c d e
// next[j] -1 0 0 0 1 2 3 0
//e.g2: p[] a b a a b c a c
// next[j] -1 0 0 1 1 2 0 1
3. KMP算法
int KMP(char* s, char *p, int len)
{ //array starts with 0;
//p is par_string, s is des_string, len is the length of p[];
int i = 0, j = 0;//i point to s[], j point to p[]
while(s[i] != '\0' && j < len)
{//not meet the end of both arrays
if(j == -1 || p[j] == s[i])
{//matched
i++;
j++;
}
else
{//mismatched
j = next[j];
}
}
if(j == len)
{//all matched!
printf("\ni = %d, j = %d, match point = %d\n", i, j, i - j);
return i - j;
}
else
{//fail to match!
printf("no match!\n");
return -1;
}
}
4. Test program (C)
#include <stdio.h>
#include <stdlib.h>
#define N 100
int next[N];
void cal_next(char *p, int *next, int len)
{//array rangs from 1 to len-1
int i = -1, j = 0;//i == 0 is invalid
next[0] = -1;
//except first element,
//next[j]=k means that befor j, there are k-1 elem matched
while(j < len)
{
if(i == -1 || p[i] == p[j])
{
i++;
j++;
next[j] = i;
}
else
{
i = next[i];
}
}
//print next[]
printf(" %s\n", p);
for(i = 0; i < strlen(p); i++)
{
printf("%d", next[i]);
}
}
int KMP(char* s, char *p, int len)
{ //array starts with 0;
//p is par_string, s is des_string, len is the length of p[];
int i = 0, j = 0;//i point to s[], j point to p[]
while(s[i] != '\0' && j < len)
{//not meet the end of both arrays
if(j == -1 || p[j] == s[i])
{//matched
i++;
j++;
}
else
{//mismatched
j = next[j];
}
}
if(j == len)
{//all matched!
printf("\ni = %d, j = %d, match point = %d\n", i, j, i - j);
return i - j;
}
else
{//fail to match!
printf("\n miss match!\n");
return -1;
}
}
int main()
{
char *p = "abcabcde";//par_string
char *s = "abcdabceabcabcdefabc";//des_string
cal_next(p, next, strlen(p));
KMP(s, p, strlen(p));
return 0;
}
reference click here