字符串KMP匹配
KMP算法是什么
next数组:是当该字符与主串发生不匹配后,值对应索引的字符要移动到跟主串不匹配的字符对齐。
数组的计算方法:公共前后缀–前面和后面是一样的。next = 公共前后缀 + 1。
较于暴力匹配有哪些改进
KMP是一种高效的模式匹配算法,他牺牲了一定的空间去保存next数组,提高了匹配效率。KMP算法还能更加智能的移动字符串,使字符串匹配。
实现
初始化串
#include <stdio.h>
#include <stdlib.h>
typedef struct String (
char* data;
int len;
) String;
String* initString() {
String* s = (String*)malloc(sizeof(String));
s -> data = NULL;
s -> len = 0;
return s;
}
void stringAssign(String* s, char* data) {
if (s ->data) {
free(s -> data);
}
int len = 0;
char* temp = data;
while(*temp) {
len++;
temp++
}
if (len == 0) {
s -> data = NULL;
s -> len = 0;
} else {
temp = data;
s -> len = len;
s -> data = (char*)malloc(sizeof(char * (len + 1)));
for (int i = 0; i<n; i++, temp++) {
s -> data[i] = *temp;
}
}
}
void printString(String* s) {
for (int i = 0; i< s -> len; i++) {
printf(i == 0 ? "%c " : "-> %c ", s -> data[i];)
}
printf("\n");
}
int main(int argc, char* argv[])
{
String* master = initString();
String* sub = initString();
StringAssign(master, argv[1]);
StringAssign(sub, argv[2]);
int* next = getNext(master);
printNext(next, master -> len);
return 0;
}
求next数组
后一个next值可以根据前一个next值推断
int* getNext(String* s) {
int* next = (int*)malloc(sizeof(int) * s -> len);
int i = 0;//字符串的索引
int j = -1;//next数组的值
next[i] = j;
while(i < s -> len - 1) {
if (j == -1 || s -> data[i] == s -> data[j]) {
i++;
j++;
next[i] = j;
} else {
j = next[i]
}
}
return next;
}
void printNext(int* next, int* len) {
for (int i = 0; i< len; i++) {
printf(i == 0 ? "%d " : "-> %d ", next[i]);
}
printf("\n");
}
KMP匹配
void kmpMatch(String* master, String* sub) {
int i = 0;
int j = 0;
while(i < master -> len && j < sub -> len) {
if (master -> data[i] == sub -> data[i]) {
i++;
j++;
} else {
j = next[i];
}
}
if (j == sub -> len) {
printf("KMP match succes.\n");
} else {
printf("KMP match fail.\n");
}
}