我的实现中,BM算法在OJ中比KMP稍慢,分别为40ms和16ms
BM算法
char *strStr(char *haystack, char *needle) {
int nlen = strlen(needle), hlen = strlen(haystack);
if (nlen == 0) return haystack;
int BC[256];
getBC(BC, needle);
int* GS = new int[nlen];
getGS(GS, needle);
int ni = nlen - 1, start = 0;
while (start + nlen <= hlen)
{
while (haystack[start + ni] == needle[ni])
{
if (ni == 0) return haystack + start;
ni--;
}
start += max(GS[ni], ni - BC[haystack[start + ni]]);
ni = nlen - 1;
}
delete [] GS;
return NULL;
}
void getBC(int * BC, char * needle)
{
for(int i = 0; i < 256; i++) BC[i] = -1;
for(int i = 0; needle[i] != '\0'; i++)
BC[needle[i]] = i;
}
void getGS(int * GS, char * needle)
{
int nlen = strlen(needle);
//生成后缀最大匹配长度表
int *ss = new int[nlen];
ss[nlen-1] = nlen;//ss[a]=b ==> needle[a-b+1...a]==needle[nlen-b...nlen-1]
int s = nlen-1, t = nlen - 2;
for (int j = nlen - 2; j >= 0; j--)
{
if ((j > s) && (j - s > ss[nlen - 1 - (t - j)]))
ss[j] = ss[(nlen - 1 - t) + j];
else
{
t = j; // 与后缀匹配之子串的终点,就是当前字符
s = min(s, j); // 与后缀匹配之子串的起点
while ((0 <= s) && (needle[s] == needle[nlen - 1 - (t - s)]))
s--;
ss[j] = t - s;// 与后缀匹配之最长子串的长度
}
}
for (int i = 0; i < nlen; i++)
GS[i] = nlen;
int j = 0;
for (int i = nlen - 1; i >= -1; i--)
{
if (-1 == i || i + 1 == ss[i])
for (; j < nlen - i - 1; j++)
if (GS[j] == nlen)
GS[j] = nlen - i - 1;
}
for (int i = 0; i < nlen - 1; i++)
GS[nlen - ss[i] - 1] = nlen - i - 1;
delete[] ss;
}
KMP算法
char *strStr(char *haystack, char *needle) {
if (strcmp("", needle) == 0)
return haystack;
int hi = 0;
int ni = 0;
int hsize = strlen(haystack);
int nsize = strlen(needle);
int * failure = new int[nsize];;
getFailure(failure, needle);
for (; hi < hsize; )
{
if (haystack[hi] == needle[ni])
{
if (ni == nsize - 1)
return &haystack[hi - ni];
hi++;
ni++;
}
else if (ni == 0)
{
hi++;
}
else
{
ni = failure[ni - 1] + 1;//KMP
}
}
return 0;
}
void getFailure(int * failure, char* needle)
{
int size = strlen(needle);
int index = 0;
int start = 0;
failure[index++] = -1;
while (index < size)
{
if (start == -1)
{
failure[index++] = -1;
start = 0;
}
else if (needle[index] == needle[start])
{
failure[index] = failure[index - 1] + 1;//KMP
index++;
start++;
}
else
{
start = failure[start];
}
}
}