在NLP自然语言处理中,常常会有推荐相关的算法,这就免不了相关的距离计算公式计算两个向量之间的举例。这里用Java语言实现余弦定理和修正余弦,直接上代码~
【余弦定理】
package com.xzw.cos;
import java.util.Map;
import java.util.Map.Entry;
/**
* 相似度计算,余弦定理Java实现
* @author xzw
*
*/
public class Cos {
public static double getTwoVectorsSimilar(Map<String, Double> src, Map<String, Double> dest){
double score = 0;
if (src.size() == 0 || dest.size() == 0) {
return 0;
}
double v1 = 0, v2 = 0, fenzi = 0, fmOne = 0, fmTwo = 0;
for (Entry<String, Double> item : src.entrySet()) {
v1 = item.getValue();
fmOne += v1 * v1;
if (dest.containsKey(item.getKey())) {
v2 = dest.remove(item.getKey());
fmTwo += v2 * v2;
fenzi += v1 * v2;
}
}
for (double dv : dest.values()) {
fmTwo += dv * dv;
}
dest.clear();
dest = null;
if (fmOne == 0 || fmTwo == 0) {
return 0;
}else {
score = fenzi / Math.sqrt(fmOne * fmTwo);
}
return score;
}
}
【修正余弦】
package com.xzw.cos;
import java.util.Map;
import java.util.Map.Entry;
/**
* 相似度计算,修正余弦Java实现
* @author xzw
*
*/
public class NodCos {
public static double getSim(Map<String,Double> src,
Map<String,Double> dest){
double score = 0;
double avgs = 0;
double avgd = 0;
if(src.size() == 0 || dest.size() == 0){
return 0;
}
for( double v :src.values()){
avgs += v;
}
avgs = avgs / src.size();
for( double v :dest.values()){
avgd += v;
}
avgd = avgd / dest.size();
double v1 = 0, v2 = 0, fz = 0, fm1 = 0, fm2 = 0;
for (Entry<String, Double> items : src.entrySet()) {
v1 = items.getValue();
fm1 += (v1 - avgs) * (v1-avgs);
if(dest.containsKey(items.getKey())){
v2 = dest.remove(items.getKey());
fm2 += (v2 - avgd) * (v2 - avgd);
fz += (v1 - avgs) * (v2 - avgd);
}
}
for(double dv : dest.values()){
fm2 += (dv - avgd) * (dv - avgd);
}
dest.clear();
dest = null;
if(fm1 == 0 || fm2 == 0){
return 0;
} else{
score = fz / Math.sqrt((fm1 * fm2));
}
return score;
}
}