给定两个序列x和y,称z是x和y的公共子序列,如果z既是x的子序列,又是y的子序列;最长的公共子序列称作最长公共子序列LCS(longest common subsequence)。
解题思路
(1)LCS的最优子结构
设zk是xm和yn的一个LCS,则,如果x和y的最后一个元素相同,则z中去掉最后一个元素之后zk-1仍为xm-1和yn-1的LCS。
如果xm!=yn,若zk!=xm,则z是xm-1和y的一个LCS,若zk!=yn,则z是xm和yn-1的LCS。
(2)一个递归解
设c[i][j]为序列xi和yj的一个LCS的长度,则有:
expression | condition |
---|---|
c[i][j]=0 | i=0或j=0 |
c[i][j]=c[i−1][j−1]+1 | xi=yj且i,j>0 |
c[i][j]=max(c[i][j−1],c[i−1][j]) | xi!=yj且i,j>0 |
(3)计算LCS的长度
实现代码
#include <iostream>
#include <cstring>
#include <cstdio>
using namespace std;
int lenLCS(const char *ch1, const char *ch2, int len1, int len2, int **c)
{
for (int i = 0; i <= len1; i++)
{
c[i][0] = 0;
}
for (int i = 0; i <= len2; i++)
{
c[0][i] = 0;
}
for (int i = 1; i <= len1; i++)
{
for (int j = 1; j <= len2; j++)
{
if (ch1[i-1] == ch2[j-1])
{
c[i][j] = c[i-1][j-1] + 1;
}
else
{
if (c[i-1][j] >= c[i][j-1])
{
c[i][j] = c[i-1][j];
}
else
{
c[i][j] = c[i][j-1];
}
}
}
}
// for (int i = 0; i <= len1; i++)
// for (int j = 0; j <= len2; j++)
// if (j == len2) printf("%d\n", c[i][j]);
// else printf("%d ", c[i][j]);
return c[len1][len2];
}
void printLCS(const char *ch1, const char* ch2, int len1, int len2, int **c)
{
int i = len1;
int j = len2;
while (c[i][j] > 0)
{
if (ch1[i-1] == ch2[j-1])
{
cout<<ch1[i-1];
i--;
j--;
}
else
{
if (c[i][j] == c[i-1][j])
{
i--;
}
else
{
j--;
}
}
}
}
int main()
{
char *ch1 = "ACCGGTCGAGTGCGCGGAAGCCGGCCGAA";
char *ch2 = "GTCGTTCGGAATGCCGTTGCTCTGTAAA";
int len1 = strlen(ch1);
int len2 = strlen(ch2);
int **c = new int*[len1 + 1];
for (int i = 0; i <= len1; i++)
{
c[i] = new int[len2 + 1];
}
cout<<lenLCS(ch1, ch2, len1, len2, c)<<endl;
printLCS(ch1, ch2, len1, len2, c);
for (int i = 0; i <= len1; i++)
{
delete [] c[i];
}
delete [] c;
return 0;
}
不连续情况的转移方程:
扩展到连续情况,转移方程为:
实现代码如下:
#include <iostream>
#include <cstring>
#include <cstdio>
using namespace std;
int fun(char *ch1, char *ch2, int len1, int len2, int **c)
{
for (int i = 0; i <= len1; i++)
{
c[i][0] = 0;
}
for (int j = 0; j <= len2; j++)
{
c[0][j] = 0;
}
int maxlen = 0;
for (int i = 1; i <= len1; i++)
{
for (int j = 1; j <= len2; j++)
{
if (ch1[i-1] == ch2[j-1])
{
c[i][j] = c[i-1][j-1] + 1;
}
else
{
c[i][j] = 0;
}
maxlen = max(maxlen, c[i][j]);
}
}
return maxlen;
}
int main()
{
char *ch1 = "acaccbabb";
char *ch2 = "acbac";
int len1 = strlen(ch1);
int len2 = strlen(ch2);
int **c = new int*[len1 + 1];
for (int i = 0; i <= len1; i++)
{
c[i] = new int[len2 + 1];
}
int maxlen = fun(ch1, ch2, len1, len2, c);
printf("The max length is : %d\n", maxlen);
for (int i = 0; i <= len1; i++)
{
delete [] c[i];
}
delete [] c;
}