问题描述:
使用KMP算法快速查找字符串(可支持中文)
PS:
解析KMP的方法虽然多种多样,但是我仍然最喜欢数据结构书中我认为最经典的那种
参考代码:
#include <iostream>
#include <string>
#include <vector>
using namespace std;
int KMPAlgorithm(const wstring& strText, const wstring& strPattern, int *m_pNext, int nStartPos)
{
int i = nStartPos;
int j = 0;
while (i < (int)strText.size() && j < (int)strPattern.size())
{
if (j == -1 || strText[i] == strPattern[j])
{
++i;
++j;
}
else
{
j = m_pNext[j];
}
}
if (j == strPattern.size())
return (i - j);
return -1;
}
void GetNextVal(const wstring& strPattern, int *m_pNext)
{
int j = 0;
int k = -1;
m_pNext[0] = -1;
while (j < (int)strPattern.size())
{
if (k == -1 || strPattern[j] == strPattern[k])
{
++j;
++k;
if (strPattern[j] != strPattern[k])
m_pNext[j] = k;
else
m_pNext[j] = m_pNext[k];
}
else
{
k = m_pNext[k];
}
}
}
void GetAllSubPos(const wstring& strText, const wstring& strPattern, std::vector<int>& vecans)
{
size_t szlen = strPattern.size();
int *m_pNext = new int[strPattern.size() + 1];
memset(m_pNext, 0, (strPattern.size() + 1) * sizeof(int));
if (strText.size() == 0 || strPattern.size() == 0)
return;
GetNextVal(strPattern, m_pNext);
vecans.clear();
int nPos = 0;
while (1)
{
nPos = KMPAlgorithm(strText, strPattern, m_pNext, nPos);
if (nPos == -1)
break;
vecans.push_back(nPos);
nPos++;
}
delete[]m_pNext;
}
int main(int argc, char*argv[])
{
vector<int>ansvec;
wstring strText;
wstring strPattern;
printf("输入主串: ");
getline(wcin, strText);
printf("输入模式串: ");
getline(wcin, strPattern);
GetAllSubPos(strText, strPattern, ansvec);
if (ansvec.size() > 0)
printf("输入所有查找结果:\n");
else
printf("没有查到\n");
for (int i = 0; i < (int)ansvec.size(); i++)
{
wstring strSub = strText.substr(ansvec[i]);
wcout << strSub << endl;
}
return 0;
}
运行结果: