对于寻找两个字符串的最长公共子字符串的问题,暴力搜索的方式的时间复杂度将高达O(n^3), 而通过后缀树的方式可将时间复杂度降低到O(n^2)。
以下是我实现的C++源码:
#include <iostream>
#include <string>
#include <vector>
using namespace std;
class Solution
{
public:
int FindLongestCommonSubstring(string str1, string str2, vector<string>& MaxSubstr)
{
int m = str1.length(), n = str2.length(), i, j, k = 0, MaxSubstrLen = 0;
vector<vector<int>> L(m, vector<int>(n, 0)); // L记录相同子串的长度
for (i = 0; i < m; i++)
{
for (j = 0; j < n; j++)
{
if (str1[i] == str2[j])
{
if (i == 0 || j == 0)
L[i][j] = 1;
else
L[i][j] = L[i - 1][j - 1] + 1;
if (MaxSubstrLen < L[i][j])
{
MaxSubstrLen = L[i][j];
k = 0;
MaxSubstr[k] = str1.substr(i + 1 - MaxSubstrLen, MaxSubstrLen);
}
else if (MaxSubstrLen == L[i][j] && MaxSubstr[k] != str1.substr(i + 1 - MaxSubstrLen, MaxSubstrLen))
//有多个长度相同的MaxSubstr, 且要排除多个MaxSubstr内容相同的情况,如当str1="a", str2="aaaa"
MaxSubstr[++k] = str1.substr(i + 1 - MaxSubstrLen, MaxSubstrLen);
}
else
L[i][j] = 0;
}
}
return k;
}
};
int main()
{
string str1 = "aaabaaaeabcd";
string str2 = "bbbababcde";
vector<string> MaxSubstr(str1.length()); //MaxSubstr个数不会超过str1和str2的长度,这里取str1.length
Solution a;
int MaxSubstrNum = a.FindLongestCommonSubstring(str1, str2, MaxSubstr);
for (int i = 0; i <= MaxSubstrNum; i++)
cout << MaxSubstr[i] << ' ';
cout << endl;
return 0;
}
参考:https://en.wikipedia.org/wiki/Longest_common_substring_problem