1.计算方法
package main;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
* 求集合相似度Jaccard距离 J(A,B) = |A∩B|/|A∪B|
* @author Administrator
*
*/
public class AA {
public static void main(String[] args) {
// TODO Auto-generated method stub
Set<String> set = new HashSet<String>();
set.add("1");
set.add("2");
set.add("3");
set.add("4");
Set<String> set1 = new HashSet<String>();
set1.add("1");
set1.add("3");
set1.add("5");
set1.add("7");
System.out.println(operInsertct(set,set1));
System.out.println(operUnin(set,set1));
double Jaccard = (operInsertct(set,set1).size()*1.0)/(operUnin(set,set1).size()*1.0)*100;
System.out.println("集合A,B相似度为:"+Jaccard+"%");
}
/**
* 求多个set数组的交集
* @param setArrays
* @return
*/
public static <String> Set<String> operInsertct(Set<String>... setArrays) {
// 计数map
Map<String, Integer> countMap = new HashMap<String, Integer>();
for (int i = 0; i < setArrays.length; i++) {
for (String element : setArrays[i]) {
Integer keyCount = (Integer) countMap.get(element);
if (keyCount == null) {
countMap.put(element, 1);
} else {
countMap.put(element, ++keyCount);
}
}
}
//元素出现setArrays.length代表元素在交集
Iterator<Entry<String, Integer>> iterator = countMap.entrySet().iterator();
while (iterator.hasNext()) {
if ((Integer) iterator.next().getValue() != setArrays.length) {
iterator.remove();
}
}
return countMap.keySet();
}
/**
* 求多个set数组的并集
* @param setArrays
* @return
*/
public static <String> Set<String> operUnin(Set<String>... setArrays) {
// 计数map
Map<String, Integer> countMap = new HashMap<String, Integer>();
for (int i = 0; i < setArrays.length; i++) {
for (String element : setArrays[i]) {
Integer keyCount = (Integer) countMap.get(element);
if (keyCount == null) {
countMap.put(element, 1);
}
}
}
return countMap.keySet();
}
}
2.运行结果
3.借鉴文章