题目:给定一个文本,求文本中包含某个字符串的句子,并按照句子中包含字符串的个数输出该句子(包含的字符串不区分大小写)
思路:利用KMP算法进行字符串的匹配工作,并求出句子中包含的字符串的个数,之后用一个结构体存放求出来的结果,并使用sort按照包含个数的大小进行排序,之后输出
例子:
文本: I have my own life.
We should accumulate knowledge in our day-to-day life.
Today is difficult.
Tomorrow is more difficult.
But the day after tomorrow is beautiful!
Today is Sunday, I want that every day is Sunday.查询字符串为:Day (我们假定一句话都出现在一行)
当我们处理的时候就将一行看成是一个字符串,将文本看成是字符串数组进行查询
代码:
#include <fstream>
#include <string>
#include <iostream>
using namespace std;
//KMP算法得到匹配的个数,不区分大小写
int KMPCount(char* strTarget,const char* strPattern,int next[])
{
assert(strTarget != NULL && strPattern != NULL && next != NULL);
int nCurT = 0;
int nCurP = 0;
int nCount = 0;
int nTargetLen = strlen(strTarget);
int nPatternLen = strlen(strPattern);
while (nCurT < nTargetLen)
{
if (nCurP == nPatternLen)
{
nCount++;
nCurP = next[nCurP];
}
else if (nCurP < nPatternLen)
{
if (nCurP == -1
|| strTarget[nCurT] == strPattern[nCurP]
|| abs(strTarget[nCurT] - strPattern[nCurP]) == 32)
{
nCurT++;
nCurP++;
}
else
{
nCurP = next[nCurP];
}
}
}
return nCount;
}
void GetNext_Count(const char* strPattern,int next[])
{
assert(strPattern != NULL && next != NULL);
int nCurP = 1;
int nLastPos = -1;
next[0] = -1;
int nPatternLen = strlen(strPattern);
while (nCurP <= nPatternLen)
{
if (nLastPos == -1
|| strPattern[nCurP-1] == strPattern[nLastPos]
|| abs(strPattern[nCurP-1] - strPattern[nLastPos]) == 32)
{
nLastPos++;
next[nCurP] = nLastPos;
nCurP++;
}
else
{
nLastPos = next[nLastPos];
}
}
}
int main()
{
//按行读入字符串,求每行的匹配个数
ifstream fin(".\\Data\\test.txt");
if (!fin)
{
cout<<"Not open the document!"<<endl;
return -1;
}
string strSentence;
//将文本用字符串数组存起来
char** strSentenceArray = new char*[100];
int nSentenceCount = 0;
while (getline(fin,strSentence))
{
char* strWords = const_cast<char*>(strSentence.c_str());
int strWordsLen = strlen(strWords);
strSentenceArray[nSentenceCount] = new char[strWordsLen+1];
strcpy(strSentenceArray[nSentenceCount],strWords);
nSentenceCount++;
}
fin.close();
//求匹配串的next数组
const char* strPattern = "Day";
int* next_count = new int[4];
GetNext_Count(strPattern,next_count);
multimap<int,char*,greater<int>> Result;
<span style="white-space:pre"> </span>for (int i = 0; i < nSentenceCount; ++i)
<span style="white-space:pre"> </span>{
<span style="white-space:pre"> </span>int nPatternCount = KMPCount(strSentenceArray[i],strPattern,next_count);<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>
<span style="white-space:pre"> </span>if (nPatternCount > 0)
<span style="white-space:pre"> </span>{
<span style="white-space:pre"> </span>//若有匹配项则将匹配个数和字符串存起来
<span style="white-space:pre"> </span>Result.insert(make_pair(nPatternCount,strSentenceArray[i]));
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>multimap<int,char*,greater<int>>::iterator it = Result.begin();
<span style="white-space:pre"> </span>for (;it != Result.end();it++)
<span style="white-space:pre"> </span>{
<span style="white-space:pre"> </span>cout<<it->first<<" "<<it->second<<endl;
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>//释放申请的空间
<span style="white-space:pre"> </span>delete[] next_count;
<span style="white-space:pre"> </span>for (int i = 0; i < nSentenceCount; ++i)
<span style="white-space:pre"> </span>{
<span style="white-space:pre"> </span>delete[] strSentenceArray[i];
<span style="white-space:pre"> </span>}
<span style="white-space:pre"> </span>delete[] strSentenceArray;
system("pause");
return 0;
}
运行结果为: