package day0321.day0330;
import java.util.HashSet;
import java.util.Set;
public class JaccardSimilarity {
public static double calculateJaccardSimilarity(String str1, String[] strArray) {
// 将字符串视为字符的集合
Set<Character> set1 = new HashSet<>();
Set<Character> set2 = new HashSet<>();
for (char c : str1.toCharArray()) {
set1.add(c);
}
// 初始化最高相似度为0
double maxSimilarity = 0;
String mostSimilarString = "";
// 遍历数组中的每个字符串,计算Jaccard相似度
for (String str2 : strArray) {
set2.clear(); // 清空set2,准备计算下一个字符串的集合
for (char c : str2.toCharArray()) {
set2.add(c);
}
// 计算交集
Set<Character> intersection = new HashSet<>(set1);
intersection.retainAll(set2);
// 计算并集
Set<Character> union = new HashSet<>(set1);
union.addAll(set2);
// 计算Jaccard相似度
double jaccardSimilarity = (double) intersection.size() / union.size() * 0.7 + calculateCoefficient(str1.length(),str2.length()) * 0.3 ;
// 更新最高相似度及相似字符串
if (jaccardSimilarity > maxSimilarity) {
maxSimilarity = jaccardSimilarity;
mostSimilarString = str2;
}
}
// 返回结果
System.out.println("Jaccard Similarity: " + maxSimilarity);
System.out.println("Most Similar String: " + mostSimilarString);
return maxSimilarity;
}
public static double calculateCoefficient(int strLen, int strLen2) {
// 直接使用差值的绝对值作为系数
double coefficient = 1.0 / (1.0 + Math.abs(strLen - strLen2));
return coefficient;
}
public static void main(String[] args) {
// 示例用法
String str1 = "BH44+ BH44";
String[] strArray = { "BK54+BK55", "BH45+", "BH44+","BH44+BH45", "world"};
double similarity = calculateJaccardSimilarity(str1, strArray);
}
}
02-02
10-13
05-13
1091
![](https://csdnimg.cn/release/blogv2/dist/pc/img/readCountWhite.png)