信息熵的简单做法
p*log(p)其中log函数是以2为底的函数,在C语言中,计算概率是可以应用换底公式来计算上面的公式。
直接贴代码了:
#include <map>
#include <cmath>
#include <iostream>
#include <string>
#include <stdio.h>
int GetSumCnt(std::map<std::string, int> &smap)
{
int cnt_sum = 0;
std::map<std::string, int>::iterator iter;
for(iter = smap.begin(); iter != smap.end(); ++ iter)
cnt_sum += iter->second;
return cnt_sum;
}
int main()
{
float score = 0.0, p_tmp = 0.0;
int cnt = 0;
int cnt_sum = 0;
std::string str;
std::map<std::string, int> smap;
std::map<std::string, int>::iterator iter;
str = "baidu";
cnt = 3;
smap[str] += cnt;
score = 0.0;
cnt_sum = GetSumCnt(smap);
for(iter = smap.begin(); iter != smap.end(); ++ iter)
{
p_tmp = ((float)iter->second) / ((float)cnt_sum);
printf("The p_tmp is %f\n", p_tmp);
score -= p_tmp * (log(p_tmp) / log(2));
}
printf("The score is %f\n", score);
str = "beijing";
cnt = 3;
smap[str] += cnt;
score = 0.0;
cnt_sum = GetSumCnt(smap);
for(iter = smap.begin(); iter != smap.end(); ++ iter)
{
p_tmp = ((float)iter->second) / ((float)cnt_sum);
printf("The p_tmp is %f\n", p_tmp);
score -= p_tmp * (log(p_tmp) / log(2));
}
printf("The score is %f\n", score);
str = "beijing";
cnt = 3;
smap[str] += cnt;
score = 0.0;
cnt_sum = GetSumCnt(smap);
for(iter = smap.begin(); iter != smap.end(); ++ iter)
{
p_tmp = ((float)iter->second) / ((float)cnt_sum);
printf("The p_tmp is %f\n", p_tmp);
score -= p_tmp * (log(p_tmp) / log(2));
}
printf("The score is %f\n", score);
return 0;
}
在此,主要想记录的是换底公式,在写代码的时候,可以让代码更加方便简洁一些。