1. 描述
给定两个序列
X = { x1 , x2 , ... , xm },Y = { y1 , y2 , ... , yn }
求X和Y的一个最长公共子序列。
2. 分析
设最长子序列 Z = { z1 , ... , zk }
则
1、若 xm = yn , 则 zk = xm = yn,且Z[k-1] 是 X[m-1] 和 Y[n-1] 的最长公共子序列;
2、若 xm != yn ,且 zk != xm , 则 Z 是 X[m-1] 和 Y 的最长公共子序列;
3、若 xm != yn , 且 zk != yn , 则 Z 是 Y[n-1] 和 X 的最长公共子序列
设c[i][j]为序列Xi和Yj的一个LCS长度,
则得出递推公式:
c[i][j] = 0 当 i = 0 , j = 0 时 ,
c[i][j] = c[i-1][j-1] + 1 当 i , j > 0 ; xi = yi 时 ,
c[i][j] = max { c[i][j-1] , c[i-1][j] } 当 i , j > 0 ; xi != yi 时
3. 代码
void LCS(char x[], char y[])
{
int lenx, leny, i, j;
lenx = strlen(x) - 1; // index of x and y starts from 1, 即x[0] y[0]不用
leny = strlen(y) - 1;
int c[lenx + 1][leny + 1];
char b[lenx + 1][leny + 1];
printf("x[] = %s\n", x + 1);
printf("y[] = %s\n", y + 1);
for (i = 0; i <= lenx; i++)
c[i][0] = 0;
for (i = 0; i <= leny; i++)
c[0][i] = 0;
for (i = 1; i <= lenx; i++)
for (j = 1; j <= leny; j++)
{
if (x[i] == y[j]) {
c[i][j] = c[i - 1][j - 1] + 1;
b[i][j] = '\\';
}
else if (c[i][j - 1] >= c[i - 1][j]) {
c[i][j] = c[i][j - 1];
b[i][j] = '<';
}
else {
c[i][j] = c[i - 1][j];
b[i][j] = '^';
}
}
for (i = 1; i <= lenx; i++)
{
for (j = 1; j <= leny; j++)
printf("%c ", b[i][j]);
printf("\n");
}
printf("%d\n", c[lenx][leny]);
printLCS(lenx, leny, b, x);
}
其中构造一个LCS最优解的函数如下:
void printLCS(int m, int n, char b[m + 1][n + 1], char x[])
{
print(m, n, b, x, m, n);
printf("\n");
}
void print(int m, int n, char b[m + 1][n + 1], char x[], int i, int j)
{
if (i == 0 || j == 0)
return ;
if (b[i][j] == '\\') {
print(m, n, b, x, i - 1, j - 1);
printf("%c", x[i]);
} else if (b[i][j] == '<')
print(m, n, b, x, i, j - 1);
else
print(m, n, b, x, i - 1, j);
}
测试如下:
#include <stdio.h>
#include <string.h>
void LCS(char x[], char y[]);
void printLCS(int m, int n, char b[m + 1][n + 1], char x[]);
void print(int m, int n, char b[m + 1][n + 1],char x[], int i, int j);
int main(void)
{
char x[] = "-qawsedrftgyhujikol";
char y[] = "-zaxscdvfbgnhmjkl";
LCS(x, y);
return 0;
}
输出: