问题描述
(a)执行一次删除操作,编辑距离加1
(b)执行一次删除操作,编辑距离加1
(c)修改一个字符,编辑距离加1
例如testing修改为kestan的编辑距离为3,即最少操作次数
问题求解
(a)需要用到一个二维矩阵dist[len1 + 1][len2 + 1],len1、len2分别为两个字符串的长度。
(b)dis[i][j]是指str1[i]和str2[j]的最短编辑距离,显然有:
for(i = 0; i <= len1; i++) dist[i][0] = i;
for(j = 0; j <= len2; j++) dist[0][j] = j;
(c)分析规定的三个操作:添加,删除,替换。
x = dist[i],y = dist[j]
if x == y, then dist[i][j] == dist[i-1][j-1]
if x != y, and we insert y for str1, then dist[i][j] = dist[i][j-1] + 1
if x != y, and we delete x for str1, then dist[i][j] = dist[i-1][j] + 1
if x != y, and we replace x with y for str1, then dp[i][j] = dp[i-1][j-1] + 1
When x!=y, dist[i][j] is the min of the three situations.
C语言代码
#include<stdio.h>
#include<string.h>
int min(int a, int b, int c)
{
int temp = a < b ? a : b;
return temp < c ? temp : c;
}
int myCharDistance(char str1[], char str2[])
{
int i, j, len1, len2;
int dist[100][100];
len1 = strlen(str1);
len2 = strlen(str2);
for(i = 0; i <= len1; i++)
dist[i][0] = i;
for(j = 0; j <= len2; j++)
dist[0][j] = j;
for(i = 1; i <= len1; i++)
for(j = 1; j <= len2; j++) {
if(str1[i - 1] == str2[j - 1]) //因为dist比str1和str2多了第0行和第0列,str是从下标0开始存数,而dist[]是从下标1才开始真正存数,所以dist[i]对应str[i - 1], 里一定要注意。
dist[i][j] = dist[i - 1][j - 1];
else {
int insert = dist[i][j - 1] + 1;
int dele = dist[i - 1][j] + 1;
int replace = dist[i - 1][j - 1] + 1;
dist[i][j] = min(insert, dele, replace);
}
}
return dist[len1][len2];
}
int main()
{
char str1[100], str2[100];
gets(str1);
gets(str2);
printf("min distance between this two char is:%d\n", myCharDistance(str1, str2));
return 0;
}