一、问题描述
查找两个字符串a,b中的最长公共子串。若有多个,输出在较短串中最先出现的那个。
输入描述:
输入两个字符串
输出描述:
返回重复出现的字符
输入例子:
abcdefghijklmnop
abcsafjklmnopqrstuvw
输出例子:
jklmnop
二、解题思路
1.第一种解法,动态规划,dp[i][j] 记录的是短字符串 s1 前 i 个字符和长字符串 s2 前 j 个字符的最长公共子串。当 s1[i-1] == s2[i-1]时考虑是否是 dp[i-1][j-1] 后续字符,如果是则 dp[i][j] = dp[i-1][j-1]+s1[i-1],如果不是则 dp[i][j] 是 dp[i-1][j-1]、dp[i-1][j]和 dp[i][j-1] 中较长的字符串;如果s1[i-1] != s2[i-1],dp[i][j] 是 dp[i-1][j-1]、dp[i-1][j]和 dp[i][j-1] 中较长的字符串。这里要特别注意题目条件,最长的子串是较短输入字符串中最先出现的。
代码:
#include <iostream>
#include <string>
using namespace std;
string maxCommonSubstr(string s3, string s4)
{
string s1 = (s3.length() <= s4.length())?s3:s4;
string s2 = (s3.length() <= s4.length())?s4:s3;
int m = s1.length();
int n = s2.length();
string **dp = new string*[m+1];
for(int i = 0; i < m+1; i++)
{
dp[i] = new string[n+1];
}
for(int i = 0; i < m+1; i++)
dp[i][0] = "";
for(int i = 0; i < n+1; i++)
dp[0][i] = "";
for(int i = 1; i < m+1; i++)
{
for(int j = 1; j < n+1; j++)
{
if(s1[i-1] == s2[j-1])
{
int len = 0;
int a = i-1;
int b = j-1;
while(a >= 0 && b >= 0 && s1[a] == s2[b])
{
len++;
a--;
b--;
}
if(len > dp[i-1][j-1].length() && len > dp[i-1][j].length() && len > dp[i][j-1].length())
{
dp[i][j] = s1.substr(a+1, len);
} else {
if(dp[i][j-1].length() > dp[i-1][j].length())
{
dp[i][j] = (dp[i][j-1].length() >= dp[i-1][j-1].length())?dp[i][j-1]:dp[i-1][j-1];
} else {
dp[i][j] = (dp[i-1][j].length() >= dp[i-1][j-1].length())?dp[i-1][j]:dp[i-1][j-1];
}
}
} else {
if(dp[i][j-1].length() > dp[i-1][j].length())
{
dp[i][j] = (dp[i][j-1].length() >= dp[i-1][j-1].length())?dp[i][j-1]:dp[i-1][j-1];
} else {
dp[i][j] = (dp[i-1][j].length() >= dp[i-1][j-1].length())?dp[i-1][j]:dp[i-1][j-1];
}
}
}
}
string maxstr = dp[m][n];
for(int i = 0; i < m+1; i++)
{
delete [] dp[i];
dp[i] = 0;
}
delete [] dp;
dp = 0;
return maxstr;
}
void test1()
{
string s1, s2;
while(cin >> s1 >> s2)
{
cout << maxCommonSubstr(s1, s2) << endl;
}
}
int main()
{
test1();
return 0;
}
2.第二种解法,也是动态规划,但是思路简单很多。dp[i][j] 记录短字符串 s1 前 i 个字符和长字符串 s2 前 j 个字符的最长子串的长度,初始化所有值为 0。当 s1[i-1] = s2[j-1]时,dp[i][j] = dp[i-1][j-1]+1,注意要使用一个额外的值 start 来记录最长子串在短字符串 s1 中出现的起始位置和 maxlen记录当前最长子串的长度,当dp[i][j] > maxlen 时 start = i-dp[i][j];档s1[i-1] != s2[j-1]时不需要任何操作,这样dp[i][j]就是默认值 0。
代码:
#include <iostream>
#include <string>
using namespace std;
string maxCommonSubstr(string s1, string s2)
{
string s3 = (s1.length() <= s2.length())?s1:s2;
string s4 = (s1.length() <= s2.length())?s2:s1;
int m = s3.length();
int n = s4.length();
int **dp = new int*[m+1];
for(int i = 0; i < m+1; i++)
{
dp[i] = new int[n+1];
for(int j = 0; j < n+1; j++)
dp[i][j] = 0;
}
int start = 0, maxlen = 0;
for(int i = 1; i < m+1; i++)
{
for(int j = 1; j < n+1; j++)
{
if(s3[i-1] == s4[j-1])
{
dp[i][j] = dp[i-1][j-1]+1;
if(dp[i][j] > maxlen)
{
maxlen = dp[i][j];
start = i - maxlen;
}
}
}
}
for(int i = 0; i < m+1; i++)
{
delete dp[i];
dp[i] = 0;
}
delete [] dp;
dp = 0;
return s3.substr(start, maxlen);
}
void test1()
{
string s1, s2;
while(cin >> s1 >> s2)
{
cout << maxCommonSubstr(s1, s2) << endl;
}
}
int main()
{
test1();
return 0;
}
3.第三种解法,就是通过暴力解法。根据短字符串从最大长度依次递减,在长字符串中查找是否有匹配的子串,但是这种方法的复杂度是 O(n1*n1*n2),空间复杂度是 O(1), 前面两种动态规划的时间复杂度是 O(n1*n2),但空间复杂度是 O(n1*n2)。
代码:
#include <iostream>
#include <string>
using namespace std;
string maxCommonSubstr(string s1, string s2)
{
string s3 = (s1.length() <= s2.length())?s1:s2;
string s4 = (s1.length() <= s2.length())?s2:s1;
for(int i = s3.length(); i > 0; i--)
{
for(int j = 0; j <= s3.length() - i; j++)
{
if(s4.find(s3.substr(j, i)) != string::npos)
{
return s3.substr(j, i);
}
}
}
return "";
}
void test1()
{
string s1,s2;
while(cin >> s1 >> s2)
{
cout << maxCommonSubstr(s1, s2) << endl;
}
}
int main()
{
test1();
return 0;
}