网页去重(二)最小编辑法
一、 例子
二、 Java算法实现
public static int min_edit_distance(String target,String source){
int t_len = target.length();
int s_len = source.length();
int[][] distance = new int[t_len+1][s_len+1];
//初始化 0状态
distance[0][0] = 0;
for(int i=1;i<=t_len;i++){
distance[i][0] = distance[i-1][0]+insert_cost(target.charAt(i-1));
}
for(int j=1;j<=s_len;j++){
distance[0][j] = distance[0][j-1]+delete_cost(source.charAt(j-1));
}
for(int i=1;i<=t_len;i++){
for(int j=1;j<=s_len;j++){
int a = distance[i-1][j] + insert_cost(target.charAt(i-1));
int b = distance[i-1][j-1] + substitute_cost(target.charAt(i-1), source.charAt(j-1));
int c = distance[i][j-1] + delete_cost(source.charAt(j-1));
distance[i][j] = min(a,min(b,c));
}
}
return distance[t_len][s_len];
}
private static int min(int a ,int b){
return a<b?a:b;
}
private static int insert_cost(char c){
return 1;
}
private static int delete_cost(char str){
return 1;
}
private static int substitute_cost(char a,char b){
if(a == b) return 0;
else return 2;
}
测试:
@Test
public void min_edit_distanceTest(){
String target ="execution";
String source = "intention";
int result = SimilaryUtil.min_edit_distance(target, source);
System.out.println(result);
}
结果为8