分两个部分:
1. 子串预处理表
2. 搜索算法
子串W预处理表T[]的建立:
1. A为W的子串,并且其字首的子串和字尾的子串同为B。
2. W中字符,若其前面的所有字符构成子串A,则该字符在T中对应的值为最大B的字长。
3. 否则,该字符在T中对应的值为0
4. T[0] = -1, T[1] = 0
搜索算法:
1. i 为当前正在比较的字符在W中的位置(从0开始),j为主串S中开始比较的字符的位置
2. 若比较成功,则i+1
3. 若比较失败,S中下一次开始比较的位置为当前位置回退T[i],W中下一次开始比较的位置为0 (若上一次比较为失败),或是T[i](若上一次比较为成功)。
4.重复步骤2.
#include
<
iostream
>
using namespace std;
#ifdef _UNICODE
#define _tcout wcout
#else
#define _tcout cout
#endif
void BuildKmpTable(TCHAR * psz_string, int kmp_table[])
... {
int n_len = _tcslen(psz_string);
int n_str_index = 0;
int n_table_index = 2;
kmp_table[0] = -1;
kmp_table[1] = 0; // self reference are assumed to be zero
int n_prefix_len = 0;
for (int i = n_table_index, j = n_str_index; i < n_len; )
...{
// computer the value in the kmp_table[i]
if (psz_string[i - 1] == psz_string[j])
...{
n_prefix_len++;
kmp_table[i] = n_prefix_len;
j++;
i++;
}
else
...{
if (j > 0)
...{
j = kmp_table[j];
}
else
...{
kmp_table[i] = 0;
i++;
}
}
}
}
int KmpSearch(TCHAR * psz_main, TCHAR * psz_sub)
... {
int n_sub_len = _tcslen(psz_sub);
int* kmp_table = new int[n_sub_len];
BuildKmpTable(psz_sub, kmp_table);
int n_main_index = 0;
int n_sub_index = 0;
int n_main_len = _tcslen(psz_main);
while ((n_main_index + n_sub_index) < n_main_len)
...{
if (psz_main[n_main_index + n_sub_index] == psz_sub[n_sub_index])
...{
n_sub_index++;
if (n_sub_index == n_sub_len)
...{
return n_main_index + 1;
}
}
else
...{
n_main_index += n_sub_index - kmp_table[n_sub_index];
if (n_sub_index > 0)
...{
n_sub_index = kmp_table[n_sub_index];
}
}
}
return -1;
}
int _tmain( int argc, _TCHAR * argv[])
... {
int n_index = KmpSearch(argv[1], argv[2]);
_tcout << _T("search "") << argv[2] << _T("" in "") << argv[1] << _T(""") << endl;
_tcout << _T("it matched in ") << n_index << endl;
return 0;
}
using namespace std;
#ifdef _UNICODE
#define _tcout wcout
#else
#define _tcout cout
#endif
void BuildKmpTable(TCHAR * psz_string, int kmp_table[])
... {
int n_len = _tcslen(psz_string);
int n_str_index = 0;
int n_table_index = 2;
kmp_table[0] = -1;
kmp_table[1] = 0; // self reference are assumed to be zero
int n_prefix_len = 0;
for (int i = n_table_index, j = n_str_index; i < n_len; )
...{
// computer the value in the kmp_table[i]
if (psz_string[i - 1] == psz_string[j])
...{
n_prefix_len++;
kmp_table[i] = n_prefix_len;
j++;
i++;
}
else
...{
if (j > 0)
...{
j = kmp_table[j];
}
else
...{
kmp_table[i] = 0;
i++;
}
}
}
}
int KmpSearch(TCHAR * psz_main, TCHAR * psz_sub)
... {
int n_sub_len = _tcslen(psz_sub);
int* kmp_table = new int[n_sub_len];
BuildKmpTable(psz_sub, kmp_table);
int n_main_index = 0;
int n_sub_index = 0;
int n_main_len = _tcslen(psz_main);
while ((n_main_index + n_sub_index) < n_main_len)
...{
if (psz_main[n_main_index + n_sub_index] == psz_sub[n_sub_index])
...{
n_sub_index++;
if (n_sub_index == n_sub_len)
...{
return n_main_index + 1;
}
}
else
...{
n_main_index += n_sub_index - kmp_table[n_sub_index];
if (n_sub_index > 0)
...{
n_sub_index = kmp_table[n_sub_index];
}
}
}
return -1;
}
int _tmain( int argc, _TCHAR * argv[])
... {
int n_index = KmpSearch(argv[1], argv[2]);
_tcout << _T("search "") << argv[2] << _T("" in "") << argv[1] << _T(""") << endl;
_tcout << _T("it matched in ") << n_index << endl;
return 0;
}