1. 子序列和子串的区别
简单理解就是子串是连续的,子序列是不连续的。
例如 abcaurssas,其中 acusa就是一个子序列,abca就是一个子串。
2. 动态规划求解最长公共子序列(LCS)
对于母串X=<x1,x2,⋯,xm>, Y=<y1,y2,⋯,yn>,求LCS。
假设Z=<z1,z2,⋯,zk>是X与Y的LCS, 我们观察到
- 如果xm=yn,则zk=xm=yn,有Zk−1是Xm−1与Yn−1的LCS;
- 如果xm≠yn,则Zk是Xm与Yn−1的LCS,或者是Xm−1与Yn的LCS。
因此,求解LCS的问题则变成递归求解的两个子问题。但是,上述的递归求解的办法中,重复的子问题多,效率低下。改进的办法——用空间换时间,用数组保存中间状态,方便后面的计算。这就是动态规划(DP)的核心思想了。
用二维数组c[i][j]记录串x1x2⋯xi与y1y2⋯yj的LCS长度,则可得到状态转移方程
c[i,j]=0i=0 or j=0
c[i,j]= c[i−1,j−1]+1i,j>0 and xi=yj
c[i,j]=max(c[i,j−1],c[i−1,j])i,j>0 and xi≠yj
二维数组C[i,j]即是最长公共子序列生成的过程,将C生成的过程逆过来即可以求出最长公共子序列。
具体的过程可以参考:
http://blog.csdn.net/hrn1216/article/details/51534607
3. 代码实现(此代码可以找出全部的公共子序列)
void FillCountArray(std::string &s1, std::string &s2, int **countArr) {
int sizeX = s1.size();
int sizeY = s2.size();
for (int i = 1; i <= sizeX; ++i)
{
for (int j = 1; j <= sizeY; ++j)
{
if (s1[i - 1] == s2[j - 1]) {
countArr[i][j] = countArr[i - 1][j - 1] + 1;
}
else {
countArr[i][j] = std::max(countArr[i - 1][j], countArr[i][j - 1]);
}
}
}
}
std::vector<std::string > results;
void FindAllLCS(std::string &X, std::string &Y, int **countArr, int i, int j, std::string curRes) {
while (i > 0 && j > 0)
{
if (X[i - 1] == Y[j - 1])
{
curRes.push_back(X[i - 1]);
--i;
--j;
}
else
{
if (countArr[i - 1][j] > countArr[i][j - 1])
--i;
else if (countArr[i - 1][j] < countArr[i][j - 1])
--j;
else
{
FindAllLCS(X, Y, countArr, i - 1, j, curRes);
FindAllLCS(X, Y, countArr, i, j - 1, curRes);
return;
}
}
}
std::reverse(curRes.begin(), curRes.end());
results.emplace_back(curRes);
}
int main()
{
std::string X = "13456778";
std::string Y = "357486782";
int sizeX = X.size();
int sizeY = Y.size();
int **CountArray = new int *[sizeX + 1];
for (int i = 0; i < sizeY; ++i)
{
CountArray[i] = new int[sizeY + 1];
}
for (int i = 0; i <= sizeX; ++i)
{
for (int j = 0; j <= sizeY; ++j)
{
CountArray[i][j] = 0;
}
}
FillCountArray(X, Y, CountArray);
for (int i = 0; i <= sizeX; ++i)
{
for (int j = 0; j <= sizeY; ++j)
{
std::cout << CountArray[i][j] << " ";
}
std::cout << std::endl;
}
std::cout << std::endl;
std::string rest;
FindAllLCS(X, Y, CountArray, sizeX, sizeY, rest);
for (int i = 0; i < results.size(); ++i)
{
std::cout << results[i] << std::endl;
}
delete []CountArray;
system("pause");
return 0;
}
4.最长公共子串
如果是求最长公共子串,则只需要将状态转移方程改为:
c[i,j]=0i=0 or j=0
c[i,j]= c[i−1,j−1]+1i,j>0 and xi=yj
c[i,j]=0i,j>0 and xi≠yj
求解函数为:
int FindMaxSubString(std::string &s1, std::string &s2, int **countArr) {
int sizeX = s1.size();
int sizeY = s2.size();
int bg = 0;
int st = 0;
int ed = 0;
for (int i = 1; i <= sizeX; ++i)
{
for (int j = 1; j <= sizeY; ++j)
{
if (s1[i - 1] == s2[j - 1]) {
countArr[i][j] = countArr[i - 1][j - 1] + 1;
if (countArr[i][j] > bg){
bg = countArr[i][j];
st = i - bg + 2;//代表起始位置
ed = j - bg + 2;
}
}
else {
countArr[i][j] = 0;
}
}
}
return bg;
}
5.参考文献
http://blog.csdn.net/hrn1216/article/details/51534607
http://www.cnblogs.com/en-heng/p/3963803.html
http://blog.csdn.net/alps1992/article/details/47923041
http://blog.csdn.net/lisonglisonglisong/article/details/41596309