package sim;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* 字符串相似性匹配算法
* Created by panther on 15-7-20.
*/
public class Similarity {
Map vectorMap = new HashMap();
int[] tempArray = null;
public Similarity(String string1, String string2) {
for (Character character1 : string1.toCharArray()) {
if (vectorMap.containsKey(character1)) {
vectorMap.get(character1)[0]++;
} else {
tempArray = new int[2];
tempArray[0] = 1;
tempArray[1] = 0;
vectorMap.put(character1, tempArray);
}
}
for (Character character2 : string2.toCharArray()) {
if (vectorMap.containsKey(character2)) {
vectorMap.get(character2)[1]++;
} else {
tempArray = new int[2];
tempArray[0] = 0;
tempArray[1] = 1;
vectorMap.put(character2, tempArray);
}
}
}
// 求余弦相似度
public double sim() {
double result = 0;
result = pointMulti(vectorMap) / sqrtMulti(vectorMap);
return result;
}
private double sqrtMulti(Map paramMap) {
double result = 0;
result = squares(paramMap);
result = Math.sqrt(result);
return result;
}
// 求平方和
private double squares(Map paramMap) {
double result1 = 0;
double result2 = 0;
Set keySet = paramMap.keySet();
for (Character character : keySet) {
int temp[] = paramMap.get(character);
result1 += (temp[0] * temp[0]);
result2 += (temp[1] * temp[1]);
}
return result1 * result2;
}
// 点乘法
private double pointMulti(Map paramMap) {
double result = 0;
Set keySet = paramMap.keySet();
for (Character character : keySet) {
int temp[] = paramMap.get(character);
result += (temp[0] * temp[1]);
}
return result;
}
public static void main(String[] args) {
String s1 = "我是一个帅哥";
String s2 = "帅哥是我";
Similarity similarity = new Similarity(s1, s2);
System.out.println(similarity.sim());
}
}
输出结果:
分析:
字符串s1中的内容是“我是一个帅哥”,这个字符串中对应的向量名称为,这个字符串的值为<1,1,1,1,1,1>,字符串s2对应的值为<1,1,0,0,1,1>,向量s1点乘向量s2的结果为1*1+1*1+1*0+1*0+1*1+1*1 = 4,
向量s1的模为根号6,向量s2的模为2,所以相似度的结果为0.81