///要求:求串s到串t编辑距离并输出一种编辑方法。 ///编辑距离就是用来计算从原串(s)转换到目标串(t)所需要的最少的插入,删除和替换的数目,在NLP中应用比较广泛。 #include <iostream> #include <string.h> using namespace std; //存储操作信息; struct OperInfo { int oper; //操作类型,0:无操作、1:delete,2:insert,3:replace; int i; //当前操作处于源串第i位; int j; //当前操作处于目的串第j位; OperInfo *next;//下一个操作; }; //第五位表示操作0:无操作、1:delete,2:insert,3:replace; int doInfo[1000][5]={{0,0,0,0,0}}; int cnt=0; int count=0; //求三个数中的最小数 int Minimum(int a, int b, int c) { int mi; mi = a; if (b < mi) { mi = b; } if (c < mi) { mi = c; } return mi; } //计算两个字符串间的编辑距离 //原理请参见:http://www.gdcp.cn/jpkc/sjjg/app/jm/edit_distance/problem.htm int getEditDistance(char *s, char *t) { int **d; // matrix int n; // length of s int m; // length of t int i; // iterates through s int j; // iterates through t char s_i; // ith character of s char t_j; // jth character of t int cost; // cost // Step 1 n = strlen(s); m = strlen(t); d=new int*[n+1]; for(i=0;i<=n;i++) { d[i]=new int[m+1]; } if (n == 0) { return m; } if (m == 0) { return n; } // Step 2 for (i = 0; i <= n; i++) { d[i][0] = i; } for (j = 0; j <= m; j++) { d[0][j] = j; } // Step 3 for (i = 1; i <= n; i++) { s_i = s[i-1]; // Step 4 for (j = 1; j <= m; j++) { t_j = t[j - 1]; // Step 5 if (s_i == t_j) { cost = 0; } else { cost = 1; } // Step 6 //若最后一步为delete操作:则d[i][j]=d[i-1][j]+1; //若最后一步为insert操作:则d[i][j]=d[i][j-1]+1; //若cost=0,则不操作。 //若cost=1,则作替换。 d[i][j] = Minimum(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost); if (d[i][j]==(d[i - 1][j] + 1)) { doInfo[cnt][0]=i; doInfo[cnt][1]=j; doInfo[cnt][2]=i-1; doInfo[cnt][3]=j; doInfo[cnt][4]=1;//delete cnt++; } if (d[i][j]==(d[i][j-1] + 1)) { doInfo[cnt][0]=i; doInfo[cnt][1]=j; doInfo[cnt][2]=i; doInfo[cnt][3]=j-1; doInfo[cnt][4]=2;//insert cnt++; } if (d[i][j]==(d[i - 1][j-1] +cost)&&cost==1) { doInfo[cnt][0]=i; doInfo[cnt][1]=j; doInfo[cnt][2]=i-1; doInfo[cnt][3]=j-1; doInfo[cnt][4]=3;//replace cnt++; } if (d[i][j]==(d[i - 1][j-1] +cost)&&cost==0) { doInfo[cnt][0]=i; doInfo[cnt][1]=j; doInfo[cnt][2]=i-1; doInfo[cnt][3]=j-1; doInfo[cnt][4]=0;//no cnt++; } } } // Step 7 int result=d[n][m]; //释放二维数组占用的空间 for (i=0;i<=n;i++) { delete []d[i]; } delete []d; return result; } //将操作步聚存储到链表中; void getOperater(int n,int m,OperInfo *head) { int i; for (i=0;i<100;i++) { if(doInfo[i][0]==0&&doInfo[i][1]==0&&doInfo[i][2]==0&&doInfo[i][3]==0&&doInfo[i][4]==0) break; if (doInfo[i][0]==n&&doInfo[i][1]==m) { OperInfo *node=new OperInfo; node->oper=doInfo[i][4]; node->i=doInfo[i][0]; node->j=doInfo[i][1]; node->next=head->next; head->next=node; if (m==0&&n==0) break; getOperater(doInfo[i][2],doInfo[i][3],head); break;//只取一种实现方式。 } } } void displayDoInfo(OperInfo *head,char *a,char *b) { cout<<"其中的一种操作步骤如下:"<<endl; OperInfo *node=head; node=node->next; while(node!=NULL) { if (node->oper==0) { cout<<"没有操作,直接看下一位"<<endl; } if (node->oper==1) { count++; cout<<"第"<<count<<"步--"<<"delete:"<<a[node->i-1]<<endl; } if (node->oper==2) { count++; cout<<"第"<<count<<"步--"<<"insert:"<<b[node->j-1]<<endl; } if (node->oper==3) { count++; cout<<"第"<<count<<"步--"<<"replace:"<<a[node->i-1]<<"->"<<b[node->j-1]<<endl; } node=node->next; } /* int i,j; for (i=0;i<100;i++) { for (j=0;j<5;j++) { cout<<doInfo[i][j]<<" "; } cout<<endl; } */ } void main() { char *a="aded"; char *b="acfbe"; int n=strlen(a); int m=strlen(b); OperInfo *head=new OperInfo; head->next=NULL; cout<<a<<"-->"<<b<<"的编辑距离为:"<<getEditDistance(a,b)<<endl; getOperater(n,m,head); displayDoInfo(head,a,b); }