使用词频来计算两个语句的相似度,假设语句已经使用分词工具分好
public class Similar {
public static double getSimilarity(Vector<String> T1, Vector<String> T2) throws Exception {
int size = 0 , size2 = 0 ;
double sum=0;
if ( T1 != null && ( size = T1.size() ) > 0 && T2 != null && ( size2 = T2.size() ) > 0 ) {
Set<String> set=new HashSet<String>();
Map<String, Integer> s1 = new HashMap<String, Integer>();
Map<String, Integer> s2= new HashMap<String, Integer>();
//获取T1,T2所有的不重复单词
for (int i = 0; i < size; i++) {
set.add(T1.get(i));
}
for(int j=0;j<size2;j++){
set.add(T2.get(j));
}
//计算每个向量中的词语出现的次数
for (String string : set) {
int count=0;
int count1=0;