欧几里德距离是一个简单的相似度评价方法,返回0-1的值代表互相之间的相似度,1为完全相似
上代码:
package com.Social.cbra;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.logging.Logger;
/**
*
* @author larry
*
*/
public class Similarity {
static Logger logger = Logger.getLogger(Similarity.class.getName());
Map<String, Double> rating_map = new HashMap<String, Double>();
/**
* @param args
*/
public static void main(String[] args) {
Similarity similarity1 = new Similarity();
similarity1.rating_map.put("1", 1d);
similarity1.rating_map.put("2", 1d);
similarity1.rating_map.put("3", 1d);
Similarity similarity2 = new Similarity();
similarity2.rating_map.put("1", 1d);
similarity2.rating_map.put("2", 1d);
similarity2.rating_map.put("3", 1d);
logger.info("" + similarity1.getsimilarity_bydim(similarity2));
//System.out.println(rating_map.size());
}
public double getsimilarity_bydim(Similarity u) {
double sim = 0d;
double common_items = 0;
Iterator<String> rating_map_iterator = rating_map.keySet().iterator();
while(rating_map_iterator.hasNext()){
String rating_map_iterator_key = rating_map_iterator.next();
Iterator<String> u_rating_map_iterator = u.rating_map.keySet().iterator();
while(u_rating_map_iterator.hasNext()){
String u_rating_map_iterator_key = u_rating_map_iterator.next();
if(rating_map_iterator_key.equals(u_rating_map_iterator_key)){
//相似度计数加一
//求差值的平方和
common_items++;
sim += Math.pow((u.rating_map.get(u_rating_map_iterator_key) - this.rating_map.get(rating_map_iterator_key)), 2);
}
}
}
//如果等于零则无相同条目,返回sim=0即可
if(common_items > 0){
//相似度的范围在0-1之间//tanh取值范围-1到1
//0表示完全不相似
//1表示完全相似
//求平均后开跟
//乘上相同的数量占最大可能相同的数量的比重
sim = Math.sqrt(sim/common_items);
sim = 1.0d - Math.tanh(sim);
int max_common_items = Math.min(rating_map.size(), u.rating_map.size());
sim = sim * (common_items/max_common_items);
}
return sim;
}
}