题意:给定一个字符串s,求s中的最长重复子串,只要该子串在s中出现2次及以上即可认定为重复子串(可有部分重叠);
传送门
输入:s = “banana”
输出:“ana”
输入:s = “abcd”
输出:""
题解:字符串hash+二分查找
- 二分子串长度,复杂度O(logN)
- 通过字符串hash判断是否重复,复杂度O(N)
typedef long long ll;
typedef pair<ll, ll> pll;
ll pow(ll a, ll m, int mod) {
ll ret = 1;
while (m > 0) {
if (m % 2 == 1) {
ret = ret * a % mod;
if (ret < 0) ret += mod;
}
a = a * a % mod;
if (a < 0) a += mod;
m >>= 1;
}
return ret;
}
// 双hash
int check(const vector<int> &arr, int m) {
srand((unsigned)time(NULL));
static int a1 = random()%75 + 26;
static int a2 = random()%75 + 26;
static int mod1 = random()%(INT_MAX - (int)1e9+6) + 1e9+6;
static int mod2 = random()%(INT_MAX - (int)1e9+6) + 1e9+6;
ll aL1 = pow(a1, m, mod1);
ll aL2 = pow(a2, m, mod2);
ll h1 = 0, h2 = 0;
for (int i = 0; i < m; ++i) {
h1 = (h1 * a1 % mod1 + arr[i]) % mod1;
if (h1 < 0) h1 += mod1;
h2 = (h2 * a2 % mod2 + arr[i]) % mod2;
if (h2 < 0) h2 += mod2;
}
set<pll> vis;
vis.insert({h1, h2});
for (int start = 1; start <= arr.size() - m; ++start) {
h1 = (h1 * a1 % mod1 - arr[start - 1] * aL1 % mod1 + arr[start + m - 1]) % mod1;
if (h1 < 0) h1 += mod1;
h2 = (h2 * a2 % mod2 - arr[start - 1] * aL2 % mod2 + arr[start + m - 1]) % mod2;
if (h2 < 0) h2 += mod2;
if (vis.count({h1, h2})) {
return start;
}
vis.insert({h1, h2});
}
return -1;
}
string longestDupSubstring(string s) {
int n = s.size();
vector<int> arr(n);
for (int i = 0; i < n; ++i) {
arr[i] = s[i] - 'a';
}
int l = 1, r = n - 1;
int length = 0, start = -1;
while (l <= r) {
int m = l + (r - l + 1) / 2;
int idx = check(arr, m);
if (idx != -1) {
l = m + 1;
length = m;
start = idx;
}
else r = m - 1;
}
return start != -1 ? s.substr(start, length) : "";
}