/****************************************/
//创建人:李航前
//时间:2014,9,11
//内容:余弦相似度,做法思路参见http://www.ruanyifeng.com/blog/2013/03/cosine_similarity.html
/****************************************/
#include <string>
#include <iostream>
#include <set>
#include <map>
#include <math.h>
using namespace std;
void Insert_Sum(string sentence,set<string>& s){
//该函数可以向set数组中存入两句话中所有词,形成词库
int pos1;
int pos2;
string word;
pos1=0;
while(true){
pos2=sentence.find(" ",pos1);
word=sentence.substr(pos1,pos2-pos1);
s.insert(word);
pos1=pos2+1;
if((int)pos2==-1)
break;
}
}
void Count(map<string,int>& s,string sentence){
int pos1;
int pos2;
string word;
pos1=0;
while(true){
pos2=sentence.find(" ",pos1);
word=sentence.substr(pos1,pos2-pos1);
s[word]++;
pos1=pos2+1;
if((int)pos2==-1)
break;
}
}
int main(){
string sen1;
string sen2;
set<string> word_sum;//两个句子所有词语的总集合
set<string>::iterator word_sumIter;
cout<<"请输入句子1"<<endl;
getline(cin,sen1);
cout<<"请输入句子2"<<endl;
getline(cin,sen2);
Insert_Sum(sen1,word_sum);
Insert_Sum(sen2,word_sum);
map<string,int> word1;//统计两个句子的词频
map<string,int>::iterator wordIter1;
map<string,int> word2;
map<string,int>::iterator wordIter2;
for(word_sumIter=word_sum.begin();word_sumIter!=word_sum.end();word_sumIter++){
word1[*word_sumIter]=0;
word2[*word_sumIter]=0;
}
Count(word1,sen1);
Count(word2,sen2);
for(wordIter1=word1.begin();wordIter1!=word1.end();wordIter1++)
cout<<(*wordIter1).first<<(*wordIter1).second<<endl;
for(wordIter2=word2.begin();wordIter2!=word2.end();wordIter2++)
cout<<(*wordIter2).first<<(*wordIter2).second<<endl;
//下面计算word1,word2向量之间的夹角
double a;
double b;
double c;
a=0,b=0,c=0;
double result;
for(wordIter1=word1.begin(),wordIter2=word2.begin();wordIter1!=word1.end(),wordIter2!=word2.end();wordIter1++,wordIter2++){
a=(*wordIter1).second*(*wordIter2).second+a;
b=pow((double)(*wordIter1).second,2)+b;
c=pow((double)(*wordIter2).second,2)+c;
}
cout<<a<<endl;
cout<<b<<endl;
cout<<c<<endl;
cout<<pow(b,1.0/2.0)*pow(c,1.0/2.0);
result=a/(pow(b,1.0/2.0)*pow(c,1.0/2.0));
cout<<"余弦相似度为: "<<result;
system("pause");
}