熵代表信息的紊乱程度,用公式表示为:
public class InfoEntropy {
// 代表每个字母的出现的频率。
public int[] count = new int[26];
private double entropy;
private int sumCount ;
public InfoEntropy(String str){
// 不区分大小写
String lowStr = str.toLowerCase();
for(int i = 0; i < lowStr.length(); i++){
if (lowStr.charAt(i) >= 'a' && lowStr.charAt(i) <= 'z'){
count[lowStr.charAt(i) - 'a']++;
sumCount++;
}
}
calEntropy();
}
//将频率作为概率,否者double值太小。
public void calEntropy(){
for(int i = 0; i < this.count.length; i++){
if (count[i] == 0){
continue;
}
this.entropy -= (double)((count[i]) * (Math.log((double)(count[i]))/ Math.log(2)));
}
}
public double getEntropy(){
return this.entropy;
}
public static void main(String[] args){
String test = "I am a dog.";
InfoEntropy entropy = new InfoEntropy(test);
System.out.println(entropy.getEntropy());
}
}