1.引入
如何在10G文件中相对比于BF算法更好查找关键字"abcdefg"呢?我们引入字符串搜索算法之KMP算法.
2.理解与源码
代码:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void make_next(const char *pattern, int *next) {
int q, k;// q-指向后缀,代表后缀从哪个地方开始算;k-指向前缀,代表前缀从哪个地方开始算
int m = strlen(pattern);
next[0] = 0;
for (q = 1,k = 0;q < m; q ++) {
while (k > 0 && pattern[q] != pattern[k])
k = next[k-1];
if (pattern[q] == pattern[k]) {
k ++;
}
next[q] = k;
}
// next[0] = 0;
// q=1, k=0, pattern[q]:pattern[k] = b:a, next[1] = 0;
// q=2, k=0, pattern[q]:pattern[k] = c:a, next[2] = 0;
// q=3, k=0, pattern[q]:pattern[k] = a:a, k++, next[3] = 1;
// q=4, k=1, pattern[q]:pattern[k] = b:b, k++, next[4] = 2;
// q=5, k=2, pattern[q]:pattern[k] = c:c, k++, next[5] = 3;
// q=6, k=3, pattern[q]:pattern[k] = d:a, k=next[k-1] -> k=0; next[6] = 0;
}
int kmp(const char *text, const char *pattern, int *next) {
int n = strlen(text);
int m = strlen(pattern);
make_next(pattern, next);
int i, q;
for (i = 0, q = 0;i < n;i ++) {
while (q > 0 && pattern[q] != text[i]) {
q = next[q-1];
}
if (pattern[q] == text[i]) {
q ++;
}
if (q == m) {
//printf("Pattern occurs with shift: %d\n", (i-m+1));
break;
}
}
return i-q+1;
}
int main() {
int i;
int next[20] = {0};
char *text = "ababxbababababcdababcabddcadfdsss";
char *pattern = "abcabd";
int idx = kmp(text, pattern, next);
printf("match pattern : %d\n", idx);// 打印出的值是找到这个串的索引值
for (i = 0;i < strlen(pattern);i ++) {
printf("%4d", next[i]);
}
printf("\n");
return 0;
}