教材只讲了字符串结构的KMP算法,其实二进制形式的KMP算法更加通用,二进制的KMP在搜索二进制子串时速度非常快,字符串的KMP只不过是二进制KMP的一个特列罢了。
以下是源码:
#include "StdAfx.h"
#include <stdio.h>
#include <string.h>
#include <conio.h>
#define ARRAY_NUM(a) ((sizeof(a))/(sizeof(a[0])))
typedef unsigned char byte;
void getnext_bin(byte sub[], int subSize, int next[])
{
// 得到next数据,其实本质是自身KMP匹配
printf("sub bin array : ");
int i,j;
i = 0;
j = -1;
next[0] = -1;
printf("%d", next[i]);
while(i+1 < subSize)
{
if(j==-1 || sub[i]==sub[j])
{
++i;
++j;
#if 1
if (sub[i] != sub[j])
{
next[i] = j;
}
else
{
next[i] = next[j];
}
#else
next[i] = j;
#endif
printf(", %d",next[i]);
}
else
{
j = next[j];
}
}
printf("\n");
}
int kmp_bin(byte main[], int mainSize, byte sub[], int subSize, int next[])
{
// 返回s在m中的第一个数据的下标
int i,j;
i = 0;
j = 0;
int nIndex = -1;
while(i < mainSize)
{
if(j==-1 || main[i]==sub[j])
{
++i;
++j;
if(j == subSize)
{
nIndex = (i-j);
break;
}
}
else
{
j = next[j];
}
}
return nIndex;
}
int main()
{
// 二进制序列的KMP
byte m[] = {0, 1, 2 ,3 ,6 ,5 ,6};
byte s[] = {3, 4, 5};
int next[ARRAY_NUM(s)] = {0};
getnext_bin(s, sizeof(s), next);
printf("kmp_bin = %d\n", kmp_bin(m, ARRAY_NUM(m), s, ARRAY_NUM(s), next));
// 字符串序列的KMP
char *mstr = "11111111112345678iasuasdfas";
char *sstr = "8iasua";
int nextstr[100] = {0};
getnext_bin((byte *)sstr, strlen(sstr), nextstr);
printf("kmp_bin2 = %d\n", kmp_bin((byte *)mstr, strlen(mstr), (byte *)sstr, strlen(sstr), nextstr));
return 0;
}
增加一次获取所有匹配位置接口
struct kmp_result
{
int num;
int *offset;
};
// 返回匹配个数,大于0有效
int kmp_bin_all(void* main, int mainSize, void* sub, int subSize, kmp_result *pResult)
{
if (NULL == main || mainSize < 1)
{
printf("main bin size error\r\n");
return -1;
}
if (NULL == sub || subSize < 1)
{
printf("sub bin size error\r\n");
return -1;
}
if (NULL == pResult)
{
printf("result point error\r\n");
return -1;
}
int *next = (int *)malloc(sizeof(int)*subSize);
if (NULL == next)
{
printf("malloc next array error\r\n");
return -1;
}
getnext_bin((LPBYTE)sub, subSize, next);
pResult->num = 0;
pResult->offset = NULL;
LPBYTE mainBin = (LPBYTE)main;
int mainRemain = mainSize;
int alloc_num = 0;
int pos = -1;
while (1)
{
if (mainRemain < subSize)
{
break;
}
pos = kmp_bin(mainBin, mainRemain, (LPBYTE)sub, subSize, next);
if (pos == -1)
{
break;
}
else
{
if (pResult->num + 1 > alloc_num)
{
alloc_num += 32;
pResult->offset = (int *)realloc(pResult->offset, alloc_num);
if (NULL == pResult->offset)
{
pResult->num = 0;
goto ERR;
}
}
pResult->offset[pResult->num] = pos + (mainBin - (LPBYTE)main);
pResult->num++;
pos += subSize;
mainBin += (pos);
mainRemain -= (pos);
}
}
ERR:
free(next);
if (pResult->num > 0)
{
printf("result array: ");
int idx = 0;
for (idx=0; idx<pResult->num; idx++)
{
printf("%2d, ", pResult->offset[idx]);
}
printf("\r\n");
}
return pResult->num;
}
#define kmp_str_all(m, s, r) kmp_bin_all(m, strlen(m), s, strlen(s), r)
void kmp_free_result(kmp_result *pResult)
{
if (NULL != pResult)
{
if (NULL != pResult->offset)
{
free(pResult->offset);
pResult->offset = NULL;
}
pResult->num = 0;
}
}