问题:
给定字符串firstStr,secondStr,长度分别为m,n。通过delete, insert, replace 操作把firstStr转化为secondStr,而这三种操作的每次执行成本分别为delCost, insCost, repCost。求完成这种转换的最小成本。
解决方案:
1. 递归(recursive)
base condition:用于终止递归, 当m == 0 || n == 0;
Mincost( firstStr, secondStr, m, n, delCost, insCost, repCost ) =
min{ Mincost( firstStr, secondStr, m - 1, n, delCost, insCost, repCost ) + delCost, Mincost( firstStr, secondStr, m, n-1, delCost, insCost, repCost )+ insCost,
Mincost( firstStr, secondStr, m - 1, n-1, delCost, insCost, repCost)+ repCost };
2.动态规划(dynamic programming)
引入成本数组cost[m][n]:代表把长度m的字符串转成长度为n的字符串的成本;
建立递推关系:cost[m][n] = min{ cost[m-1][n] + delCost, cost[m][n-1] + insCost, cost[m-1][n-1] + repCost };
两种方法的比较的区别很明显:
1.递归计算整个搜索过程最终会形成一棵树,这颗树不同树枝之间会有很多重复的计算,最终导致了性能很差;
2.动态规划会把许多计算结果缓存到cost数组里,虽然有个空间的开销,但是算过的不用再计算,直接查表即可,
所以动态规划算法有很好的性能表现
代码如下:
#ifndef _EDIT_DISTANCE_H_
#define _EDIT_DISTANCE_H_
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
/*
*helper function
*
*/
int minVal( int a, int b )
{
return ( a < b ) ? a : b;
}
/*
*helper function
*
*/
inline int minnum( int a, int b, int c )
{
return minVal( minVal(a, b), c );
}
/*
*implementation of recursive for edit distance problem
*
*/
int EditDistanceRecur( const char* firstStr, const char* secondStr, int firstPos, int secondPos, int delCost, int insCost, int rapCost )
{
if( 0 == firstPos && 0 == secondPos )
return 0;
if( 0 == firstPos )
return secondPos;
if( 0 == secondPos )
return firstPos;
int del = EditDistanceRecur( firstStr, secondStr, firstPos - 1, secondPos, delCost, insCost, rapCost ) + delCost;
int insert = EditDistanceRecur( firstStr, secondStr, firstPos, secondPos - 1, delCost, insCost, rapCost ) + insCost;
int replace = EditDistanceRecur( firstStr, secondStr, firstPos - 1, secondPos - 1, delCost, insCost, rapCost );
if( firstStr[firstPos - 1] != secondStr[secondPos - 1] )
replace += rapCost;
return minnum( del, insert, replace );
}
/*
* interface function
*
*/
int FindEditDistance( const char* strfirst, const char* strsecond, int delCost, int insCost, int rapCost )
{
return EditDistanceRecur( strfirst, strsecond, strlen( strfirst ), strlen( strsecond ), delCost, insCost, rapCost );
}
/*
* implementation of dynamic programming for edit distance problem
*
*/
int EditDistanceDP( const char* firstStr, const char* secondStr, int delCost, int insCost, int rapCost )
{
size_t firstLen = strlen( firstStr );
size_t secondLen = strlen( secondStr );
int** table = new int*[ firstLen + 1];
for( int i = 0; i <= firstLen; i++ )
{
table[i] = new int[ secondLen + 1];
memset( table[i], 0x00, sizeof(int)*( secondLen + 1 ) );
}
//base condition
for( int i = 0; i <= secondLen; i++ )
{
table[0][i] = i * insCost;
}
for( int i = 0; i <= firstLen; i++ )
{
table[i][0] = i * delCost;
}
for( int i = 1; i <= firstLen; i++ )
{
for( int j = 1; j <= secondLen; j++ )
{
int deleteCost = table[i][j-1];
deleteCost += delCost;
int insertCost = table[i-1][j];
insertCost += insCost;
int replaceCost = table[i-1][j-1];
if( firstStr[i] != secondStr[j] )
replaceCost += rapCost;
table[i][j] = minnum( deleteCost, insertCost, replaceCost );
}
}
int res = table[firstLen][secondLen];
for( int i = 0; i <= firstLen; i++ )
{
delete [] table[i];
}
if( table )
{
delete [] table;
}
return res;
}
/*
* test method
*
*/
void TestEditDistance()
{
const char* first = "adddfooiedfafa";
const char* second = "adafsadoiddf";
int delCost = 1;
int insCost = 2;
int rapCost = 3;
printf("Minimum cost %d by dynamic programming \n",
EditDistanceDP( first, second, delCost, insCost, rapCost ) );
printf("Minimum cost %d by recursive \n",
first, second, FindEditDistance( first, second, delCost, insCost, rapCost ) );
getchar();
}
#endif