展开全部
之前写过的,请加分。#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
map IDFTable;
struct Words{
string wd;
float freq;
float weight;
};
bool cmp(Words &w1,Words&w2)
{
return w1.weight>w2.weight;
}
map WordTable;
vector WordList;
char Comment[]=",.!\"?;:()";
int totalText=0;
bool IsAllNumber(string cs)
{
for (int i=0;i
{
32313133353236313431303231363533e58685e5aeb931333335316532if(cs[i]'9')
return false;
}
return true;
}
bool Isblank(string cs)
{
for (int i=0;i
{
if(cs[i]!=' '&&cs[i]!='\t')
return false;
}
return true;
}
string &ToLower(string &cs)
{
for (int i=0;i
{
if(cs[i]>='A'&&cs[i]<='Z')
cs[i]+=('a'-'A');
}
return cs;
}
void readFile(string fname,set &wds)
{
ifstream fin(fname.c_str());
string word;
wds.clear();
while (!fin.eof())
{
fin>>word;
for (int i=0;Comment[i]!=0;i++)
{
int pos;
while((pos=word.find(Comment[i]))!=-1)
{
word.replace(pos,1,"");
}
}
//the world;
if(!IsAllNumber(word)&&!Isblank(word))
{
wds.insert(ToLower(word));
}
/*totalwords++;
*/
}
fin.close();
}
void GenerateIDF()
{
totalText=0;
string files[7]={"curious.txt",
"erotic.txt",
"fall.txt",
"hands.txt",
"water.txt",
"wifi.txt",
"young.txt"};
int x;
setwds;
for (int i=0;i<7;i++)
{
readFile(files[i],wds);
for (set::iterator it=wds.begin();it!=wds.end();++it)
{
map::iterator iter;
string word=*it;
if((iter=IDFTable.find(word))!=IDFTable.end())
{
iter->second+=1;
}
else
{
IDFTable[word]=1;
}
}
totalText++;
}
//
int cnt=0;
for (map::iterator iter=IDFTable.begin();iter!=IDFTable.end();++iter)
{
iter->second=log((float)totalText/(iter->second+1.0));
/*cout<first<second<
cnt++;
if(cnt%100==0)
{
cin>>x;
}*/
}
}
int GenerateTF(){
ifstream fin("Test.txt");
string word;
int textwords=0;
while (!fin.eof())
{
fin>>word;
for (int i=0;Comment[i]!=0;i++)
{
int pos;
while((pos=word.find(Comment[i]))!=-1)
{
word.replace(pos,1,"");
}
}
if(!IsAllNumber(word)&&!Isblank(word))
{
//wds.insert(ToLower(word));
textwords++;
ToLower(word);
map::iterator it;
if((it=WordTable.find(word))!=WordTable.end())
{
it->second++;
}
else
{
WordTable[word]=1;
}
}
}
fin.close();
//计算频率
for (map::iterator it=WordTable.begin();it!=WordTable.end();++it)
{
Words wd;
wd.wd=it->first;
wd.freq=(float)(it->second)/textwords;
float idf=0;
map::iterator iter;
if((iter=IDFTable.find(wd.wd))!=IDFTable.end())
{
idf=iter->second;
}
else
idf=log((float)totalText);
wd.weight=wd.freq*idf;
WordList.push_back(wd);
}
return textwords;
}
void GenerateSort()
{
sort(WordList.begin(),WordList.end(),cmp);
}
int main(){
GenerateIDF();
int txtwd=GenerateTF();
GenerateSort();
int topnum=10;
cout<
cout<
for (int i=0;i
{
cout<
}
}