一、需求背景
当前正在做一个文本纠错项目,需要对比两个字符串的差异并给出编辑方案:即通过何种变换(添加、删除、替换、插入等),很够将当前字符串变为目标字符串?
二、解决方案
基于上诉的需求,其中一种解决方案就是使用最小编辑距离
思想实现:
/**
* 最小编辑距离
*
* @param sourceStr 目标字符串
* @param targetStr 欲修改的字符串
* @return sourceStrResult, targetStrResult
*/
public static String[] miniStrEdit(String sourceStr, String targetStr) {
int len1 = sourceStr.length();
int len2 = targetStr.length();
int[][] dp = new int[len1 + 1][len2 + 1];
// 预处理第一行和第一列
for (int i = 0; i <= len1; i++) {
dp[i][0] = i;
}
for (int j = 0; j <= len2; j++) {
dp[0][j] = j;
}
// 动态规划填表
for (int i = 1; i <= len1; i++) {
for (int j = 1; j <= len2; j++) {
// 如果当前两个字符相等,则不需要任何操作
if (sourceStr.charAt(i - 1) == targetStr.charAt(j - 1)) {
dp[i][j] = dp[i - 1][j - 1];
}
else {
// 否则从三种方式中选择最小的
int min = Math.min(dp[i - 1][j - 1], Math.min(dp[i][j - 1], dp[i - 1][j]));
dp[i][j] = min + 1;
}
}
}
StringBuilder sourceBuilder = new StringBuilder(sourceStr);
StringBuilder targetBuilder = new StringBuilder(targetStr);
// 执行插入、替换、删除操作
int i = len1;
int j = len2;
while (i > 0 && j > 0) {
if (dp[i][j] == dp[i - 1][j - 1] && sourceStr.charAt(i - 1) == targetStr.charAt(j - 1)) {
i--;
j--;
}
else if (dp[i][j] == dp[i - 1][j - 1] + 1) {
sourceBuilder.replace(i - 1, i, "<span class='replace'>" + sourceStr.charAt(i - 1) + "</span>");
targetBuilder.replace(j - 1, j, "<span class='replace'>" + targetStr.charAt(j - 1) + "</span>");
i--;
j--;
}
else if (dp[i][j] == dp[i - 1][j] + 1) {
sourceBuilder.insert(i - 1, "<span class='delete'>");
sourceBuilder.insert(i - 1 + 22, "</span>");
i--;
}
else if (dp[i][j] == dp[i][j - 1] + 1) {
targetBuilder.insert(j - 1, "<span class='insert'>");
int x = Math.min((j - 1 + 22), targetBuilder.toString().length());
targetBuilder.insert(x, "</span>");
j--;
}
}
// 处理剩余部分
while (i > 0) {
sourceBuilder.insert(i, "<span class='delete'>");
sourceBuilder.insert(i - 1 + 22, "</span>");
i--;
}
while (j > 0) {
targetBuilder.insert(j, "<span class='insert'>");
targetBuilder.insert(j - 1 + 22, "</span>");
j--;
}
return new String[]{sourceBuilder.toString(), targetBuilder.toString()};
}
调用以后处理结果如下:
public class tt {
public static void main(String[] args) {
String s1 = "撤销平果县,以原平果县的兴政区域为平果市的兴政区域平果市人民政府驻码头镇铝城大道1780-6号。";
String s2 = "撤销平果市,以原平果县人民政府的行政区域为平果市人民政府的行政区域平果人民政府驻码头镇铝城大道1780-6号。";
String[] strings = StringUtils.miniStrEdit2(s1, s2);
System.out.println(strings[0]);
System.out.println(strings[1]);
}
}
//撤销平果<span class='replace'>县</span>,以原平果县的<span class='replace'>兴</span>政区域为平果市的<span class='replace'>兴</span>政区域平果<span class='delete'>市</span>。
//撤销平果<span class='replace'>市</span>,以原平果县<span class='insert'>人</span><span class='insert'>民</span><span class='insert'>政</span><span class='insert'>府</span>的<span class='replace'>行</span>政区域为平果市<span class='insert'>人</span><span class='insert'>民</span><span class='insert'>政</span><span class='insert'>府</span>的<span class='replace'>行</span>政区域平果。