最长公共子序列(The longest common subsequence)
问题描述:
Given two sequences. X = (x1 , x2 , …, xm), Y = (y1 , y2 , …, yn), find a maximum length common subsequence (LCS) of X and Y.
E.g.:
X = (A, B, C, B, D, A, B), Y = (B, D, C, A, B, A),
(B, C, B, A) and (B, D, A, B) are longest common subsequences of X and Y (length = 4).
题目联想:
突然就想起了大二上学期数据结构里的算法 —— KMP模式匹配算法?,现在忘了…… 在这里码住记得复习。。
最长公共子串和最长公共子序列的区别:
子串要求在原字符串中是连续的,而子序列则只需保持相对顺序一致,并不要求连续。
❗算法描述❗:
回到动态规划上来,依然按照动态规划的四步走策略:
① 定义子问题:
x[0 …… i],y[0 …… j]
② 是否满足最优子结构:
嗯~ o(* ̄▽ ̄*)o满足?
③ 确定子问题的求解顺序:
很明显咯,自底向上bottom - up!
|~ 0, i = 0 or j = 0;
c[i, j] = |~ c[i-1, j-1] + 1, x[i] == x[j];
|~ max(c[i, j-1], c[i-1, j]), x[i] != x[j].
④ 回溯最优解:
代码实现:
#include <iostream>
#include <vector>
using namespace std;
void printLCS(vector<vector<int>> b, vector<char> x, int i, int j);
int main()
{
vector<char> x;
cout << "请输入X序列:" << endl;
while (1)
{
char temp;
cin >> temp;
x.push_back(temp);
if (getchar() == '\n')
{
break;
}
}
vector<char> y;
cout << "请输入Y序列:" << endl;
while (1)
{
char temp;
cin >> temp;
y.push_back(temp);
if (getchar() == '\n')
{
break;
}
}
int len1 = int(x.size());
int len2 = int(y.size());
vector<vector<int>> c(len1 + 1, vector<int>(len2 + 1, 0));
vector<vector<int>> b(len1 + 1, vector<int>(len2 + 1, -2));
for (int i = 1; i < len1 + 1; ++i) //因为二维数组的第 0 行和第 0 列 作为 base case 没有使用
{
for (int j = 1; j < len2 + 1; ++j)
{
if (x[i - 1] == y[j - 1]) //所以字符串的第 i-1 个对应于 二维数组的 第 i 行
{
c[i][j] = c[i - 1][j - 1] + 1;
b[i][j] = 0;
}
else
{
//c[i][j] = max(c[i][j - 1], c[i - 1][j]);
if (c[i - 1][j] >= c[i][j - 1])
{
c[i][j] = c[i - 1][j];
b[i][j] = 1;
}
else
{
c[i][j] = c[i][j - 1];
b[i][j] = -1;
}
}
}
}
cout << "最长公共子序列的长度:" << endl;
cout << c[len1][len2] << endl;
cout << "最长公共子序列:" << endl;
printLCS(b, x, len1, len2);
cout << endl;
return 0;
}
void printLCS(vector<vector<int>> b, vector<char> x, int i, int j)
{
if (i == 0 || j == 0)
{
return;
}
if (b[i][j] == 0)
{
printLCS(b, x, i - 1, j - 1);
cout << x[i - 1] << ' ';
}
else if (b[i][j] == 1)
{
printLCS(b, x, i - 1, j);
}
else
{
printLCS(b, x, i, j - 1);
}
}
最后贴一个写的蛮全的博客叭:
史上最全最丰富的“最长公共子序列”、“最长公共子串”问题的解法与思路
2019/04/12次日更新:
发现其实上面从 c[1][1]开始遍历有点绕,弄得简单点,我们不要把 c[i][0] 、c[0][j] 就一直赋值为 0 了,稍微修改一下,如下(修改时注意数组下标范围的更改):
#include <iostream>
#include <vector>
using namespace std;
int max(int, int);
int main()
{
vector<char> x;
cout << "请输入X序列:" << endl;
while (1)
{
char temp;
cin >> temp;
x.push_back(temp);
if (getchar() == '\n')
{
break;
}
}
vector<char> y;
cout << "请输入Y序列:" << endl;
while (1)
{
char temp;
cin >> temp;
y.push_back(temp);
if (getchar() == '\n')
{
break;
}
}
int len1 = int(x.size());
int len2 = int(y.size());
vector<vector<int>> c(len1, vector<int>(len2 , 0));
//c[i][0]、c[0][j]为base case,所以要先计算好它们的值
if (x[0] == y[0])
{
c[0][0] = 1;
}
for (int i = 1; i < len1; ++i)
{
if (x[i] == y[0])
{
c[i][0] = 1;
}
else
{
c[i][0] = c[i-1][0];
}
}
for (int j = 1; j < len2; ++j)
{
if (x[0] == y[j])
{
c[0][j] = 1;
}
}
for (int i = 1; i < len1; ++i)
{
for (int j = 1; j < len2; ++j)
{
if (x[i] == y[j])
{
c[i][j] = c[i - 1][j - 1] + 1;
}
else
{
c[i][j] = max(c[i][j - 1], c[i - 1][j]);
}
}
}
cout << "最长公共子序列的长度:" << endl;
cout << c[len1 - 1][len2 - 1] << endl;
return 0;
}
int max(int a, int b)
{
return a > b ? a : b;
}
and 优化:
如果只要求输出最长公共子序列的长度,我们可以再将上一算法进行优化,对记录最优解的数组 即 数组 c[] 使用滚动数组,只需要两行就 ok 啦!
具体的自己画下图就懂啦嘻嘻?
#include <iostream>
#include <vector>
using namespace std;
int max(int, int);
int main()
{
vector<char> x;
cout << "请输入X序列:" << endl;
while (1)
{
char temp;
cin >> temp;
x.push_back(temp);
if (getchar() == '\n')
{
break;
}
}
vector<char> y;
cout << "请输入Y序列:" << endl;
while (1)
{
char temp;
cin >> temp;
y.push_back(temp);
if (getchar() == '\n')
{
break;
}
}
int len1 = int(x.size());
int len2 = int(y.size());
vector<vector<int>> c(2, vector<int>(len2 , 0)); //滚动数组,节约空间
for (int j = 0; j < len2; ++j)
{
if (x[0] == y[j])
{
c[0][j] = 1;
}
}
for (int i = 1; i < len1; ++i)
{
if (x[i] == y[0])
{
c[0][0] = 1;
}
for (int j = 1; j < len2; ++j)
{
if (x[i] == y[j])
{
c[1][j] = c[0][j - 1] + 1;
c[0][j] = c[1][j];
}
else
{
c[1][j] = max(c[0][j - 1], c[0][j]);
c[0][j] = c[1][j];
}
}
}
cout << "最长公共子序列的长度:" << endl;
cout << c[0][len2 - 1] << endl;
return 0;
}
int max(int a, int b)
{
return a > b ? a : b;
}