通过一个prefix数组,用来保存最长公共前后缀的长度,这样在每次回溯时,可以减少回溯的长度。
text:用来匹配的字符数组
pattern:要查找的字符数
prefix:匹配值(默认prefix[0]=0)
A | B | A | B | C | A | B | A | A |
A | B | A | B | A | B | A | B | C | A | B | A | A | B |
对于pattern来说:
AB的最大公共前后缀 0;
ABA的最大公共前后缀 1;
ABAB的最大公共前后缀 2;
ABABC的最大公共前后缀 0;
ABABCA的最大公共前后缀 1;
ABABCAB的最大公共前后缀 2;
ABABCABA的最大公共前后缀 3;
ABABCABAA的最大公共前后缀 1;
A | B | A | B | C | A | B | A | A |
0 | 0 | 1 | 2 | 0 | 1 | 2 | 3 | 1 |
0 | 0 | 1 | 2 | 0 | 1 | 2 | 3 | 1 |
则编写prefix的代码:
void prefix_table(char pattern[], int prefix[], int n)
{
prefix[0] = 0;
int len = 0;//最长公共前后缀长度
int i = 1;
while (i < n)
{
if (pattern[i] == pattern[len])
{
len++;
prefix[i] = len;
i++;
}
else
{
if (len > 0)
{
len = prefix[len - 1];
}
else
{
prefix[i] = len;
i++;
}
}
}
}
为了后面方面写kmp算法,将prefix整体右移一位,前面补-1:
void move_prefix_table(int prefix[],int n)
{
for (int i = n - 1; i > 0; i--)
{
prefix[i] = prefix[i - 1];
}
prefix[0] = -1;
}
kmp算法:
void kmp_search(char text[], char pattern[])
{
int i = 0;
int pattern_count = 0;
int text_count = 0;
while (pattern[i] != '\0')
{
i++;
pattern_count++;
}
cout <<"pattern_count="<< pattern_count << endl;
i = 0;
while (text[i] != '\0')
{
i++;
text_count++;
}
cout << "text_count=" << text_count << endl;
int *prefix = new int[sizeof(int)*pattern_count];
prefix_table(pattern, prefix, pattern_count);
move_prefix_table(prefix, pattern_count);
for (int i = 0; i < pattern_count; i++)
{
cout << prefix[i] << " ";
}
cout << endl;
i = 0; //主串索引
int j = 0; //查找串索引
while (i < text_count)
{
if (j == pattern_count - 1 && text[i] == pattern[j])
{
cout << "Found pattern at " << i - j << endl;
j = prefix[j];
}
{
}
if (text[i] == pattern[j])
{
i++;
j++;
}
else
{
j = prefix[j];
if (j == -1)
{
i++;
j++;
}
}
}
}
完整代码:
void prefix_table(char pattern[], int prefix[], int n)
{
prefix[0] = 0;
int len = 0;//最长公共前后缀长度
int i = 1;
while (i < n)
{
if (pattern[i] == pattern[len])
{
len++;
prefix[i] = len;
i++;
}
else
{
if (len > 0)
{
len = prefix[len - 1];
}
else
{
prefix[i] = len;
i++;
}
}
}
}
void move_prefix_table(int prefix[],int n)
{
for (int i = n - 1; i > 0; i--)
{
prefix[i] = prefix[i - 1];
}
prefix[0] = -1;
}
void kmp_search(char text[], char pattern[])
{
int i = 0;
int pattern_count = 0;
int text_count = 0;
while (pattern[i] != '\0')
{
i++;
pattern_count++;
}
cout <<"pattern_count="<< pattern_count << endl;
i = 0;
while (text[i] != '\0')
{
i++;
text_count++;
}
cout << "text_count=" << text_count << endl;
int *prefix = new int[sizeof(int)*pattern_count];
prefix_table(pattern, prefix, pattern_count);
move_prefix_table(prefix, pattern_count);
for (int i = 0; i < pattern_count; i++)
{
cout << prefix[i] << " ";
}
cout << endl;
i = 0; //主串索引
int j = 0; //查找串索引
while (i < text_count)
{
if (j == pattern_count - 1 && text[i] == pattern[j])
{
cout << "Found pattern at " << i - j << endl;
j = prefix[j];
}
{
}
if (text[i] == pattern[j])
{
i++;
j++;
}
else
{
j = prefix[j];
if (j == -1)
{
i++;
j++;
}
}
}
}
int main()
{
char text[] = "ABABABABCABAAB";
char pattern[] = "ABABCABAA";
kmp_search(text, pattern);
system("pause");
return 0;
}