统计文本中每个单词的序列
使用STL
/*统计文本中出现的单词的序列*/
#include <iostream>
#include <fstream>
#include <string>
#include <set>
using namespace std;
int main()
{
string str;
set<string> DistinctWordSet;
set<string>::iterator it;
ifstream in("word.txt");
while(in >> str)
{
DistinctWordSet.insert(str);
}
//输出
for (it = DistinctWordSet.begin();it != DistinctWordSet.end();it++)
{
cout<<*it<<endl;
}
system("pause");
return 1;
}
统计文本中每个单词的出现次数
STL实现
/*统计文本中每个单词的出现次数*/
#include <iostream>
#include <fstream>
#include <algorithm>
#include <map>
#include <string>
using namespace std;
int main()
{
string str;
map<string,int> WordCountMap;
map<string,int>::iterator it;
ifstream in("word.txt");//打开文件
if (in.fail())
{
cout<<"打开文件错误!"<<endl;
exit(0);
}
while(in >> str)
{
transform(str.begin(),str.end(),str.begin(),::tolower);//大写变小写
WordCountMap[str]++;
}
in.close();
//输出
for (it = WordCountMap.begin();it != WordCountMap.end();it++)
{
cout<<it->first<<" "<<it->second<<endl;
}
system("pause");
return 1;
}
Hash实现
/*统计文本中每个单词的出现次数*/
#include <iostream>
#include <assert.h>
#include <string>
#include <fstream>
#include <algorithm>
using namespace std;
const int NHASH = 29989;
const int MULT = 31;
class StrNode
{
public:
string word;
unsigned int count;
StrNode* next;
public:
StrNode(string str) : count(1),next(NULL),word(str){}
};
class CountStr
{
public:
CountStr();
public:
unsigned int HashIndex(string str);
void InsertWord(string str);
void InitStr(string FileName);
void Print();
private:
StrNode* bin[NHASH];
};
CountStr::CountStr()
{
memset(bin,NULL,NHASH * sizeof(StrNode*));
}
/*如字符串abc的Hash值为(97 *31 + 98) * 31 + 99*/
unsigned int CountStr::HashIndex(string str)
{
unsigned int index = 0;
int strLen = str.size();
assert(strLen > 0);
for (int i = 0;i < strLen;i++)
{
index = MULT * index + str.at(i);
}
return index % NHASH;
}
void CountStr::InsertWord(string str)
{
StrNode* p = NULL;
unsigned int index = HashIndex(str);
for (StrNode* p = bin[index];p != NULL;p = p->next)
{
if (str == p->word)
{
p->count++;
return;
}
}
p = new StrNode(str);
//使用头插法插入节点
p->next = bin[index];
bin[index] = p;
}
void CountStr::InitStr(string fileName)
{
string str;
ifstream in(fileName.c_str());
while(in >> str)
{
transform(str.begin(),str.end(),str.begin(),::tolower);//大写变小写
InsertWord(str);
}
}
void CountStr::Print()
{
for (int i = 0;i < NHASH;i++)
{
for (StrNode* p = bin[i];p;p = p->next)
{
cout<<p->word<<" "<<p->count<<endl;
}
}
}
int main()
{
CountStr countStr;
countStr.InitStr("word.txt");
countStr.Print();
system("pause");
return 1;
}
trie树实现
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <assert.h>
using namespace std;
const int MaxBranchNum = 26;
/*定义trie树结点*/
class TrieNode
{
public:
char* word;
int count;
TrieNode* nextBranch[MaxBranchNum];
public:
TrieNode() : word(NULL),count(0)
{
memset(nextBranch,NULL,sizeof(TrieNode*) * MaxBranchNum);
}
};
/*定义类Trie*/
class Trie
{
public:
Trie();
~Trie();
void Insert(const char* str);
void Print();
private:
TrieNode* pRoot;
private:
void Destory(TrieNode* pRoot);
void Print(TrieNode* pRoot);
};
Trie::Trie()
{
pRoot = new TrieNode();
}
Trie::~Trie()
{
Destory(pRoot);
}
/*注意*/
void Trie::Insert(const char* str)
{
assert(NULL != str);
int index;
TrieNode* pLoc = pRoot;
for (int i = 0;str[i];i++)
{
index = str[i] - 'a';//如果区分大小写,可以扩展
if(index < 0 || index > MaxBranchNum)//不执行插入
{
return;
}
if (NULL == pLoc->nextBranch[index])
{
pLoc->nextBranch[index] = new TrieNode();
}
pLoc = pLoc->nextBranch[index];
}
if (NULL != pLoc->word)//单词已经出现过
{
pLoc->count++;
return;
}
else //单词没有出现过,应该插入单词
{
pLoc->count++;
pLoc->word = new char[strlen(str) + 1];
assert(NULL != pLoc->word);
strcpy(pLoc->word,str);
}
}
void Trie::Print()
{
Print(pRoot);
}
/*输出所有的单词*/
void Trie::Print(TrieNode* pRoot)
{
if (NULL == pRoot)
{
return;
}
//输出单词
if (NULL != pRoot->word)
{
if (strcmp(pRoot->word,"is") == 0)
{
cout<<"is"<<endl;
}
if (strcmp(pRoot->word,"it") == 0)
{
cout<<"it"<<endl;
}
cout<<pRoot->word<<" "<<pRoot->count<<endl;
}
//递归处理分支
for (int i = 0;i < MaxBranchNum;i++)
{
Print(pRoot->nextBranch[i]);
}
}
/*销毁trie树*/
void Trie::Destory(TrieNode* pRoot)
{
if (NULL == pRoot)
{
return;
}
for (int i = 0;i < MaxBranchNum;i++)
{
Destory(pRoot->nextBranch[i]);
}
//销毁单词占得空间
if (NULL != pRoot->word)
{
delete []pRoot->word;
pRoot->word = NULL;
}
delete pRoot;//销毁结点
pRoot = NULL;
}
int main(int argc, char *argv[])
{
string str;
Trie t;
ifstream in("word.txt");
//把单词输入字典树
while(in >> str)
{
transform(str.begin(),str.end(),str.begin(),::tolower);//大写变小写
//cout<<str<<endl;
t.Insert(str.c_str());
}
//输出
t.Print();
system("PAUSE");
return 1;
}