题意:一个长度N(1<=N<=20000)的数列,找出其中至少重复了K次的最长子串,输出长度,子串可以重叠。
思路:后缀数组经典应用,二分答案ans,找到满足条件的最大的长度。对于每个二分的值x,我们按照x将height值分组,同一组内超过K个位置即可。
#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
#include <stack>
#include <cmath>
#include <list>
#include <cstdlib>
#include <set>
#include <map>
#include <vector>
#include <string>
using namespace std;
#define rint register int
typedef long long ll;
const ll inf = 0x3f3f3f3f3f3f3f3f;
const int maxn = 20005;
const int mod = 2147493647;
int s[maxn];
int wa[maxn],wb[maxn],c[maxn * 50],sa[maxn],rk[maxn],height[maxn];
//n是串长度,m是字符哈希值的范围
void get_SA(int n, int m) {
int *x = wa, *y = wb; //x、y为指针变量才能调用swap
for (rint i=0; i<=m; ++i) c[i] = 0;
for (rint i=1; i<=n; ++i) ++c[x[i]=s[i]];
for (rint i=1; i<=m; ++i) c[i]+=c[i-1];
for (rint i=n; i>=1; --i) sa[c[x[i]]--]=i;
for (rint k=1; k<=n; k<<=1) {
rint num=0;
for (rint i=n-k+1; i<=n; ++i) y[++num]=i;
for (rint i=1; i<=n; ++i) if (sa[i]>k) y[++num]=sa[i]-k;
for (rint i=0; i<=m; ++i) c[i]=0;
for (rint i=1; i<=n; ++i) ++c[x[i]];
for (rint i=1; i<=m; ++i) c[i]+=c[i-1];
for (rint i=n; i>=1; --i) sa[c[x[y[i]]]--]=y[i],y[i]=0;
swap(x,y);
x[sa[1]]=1;
num=1;
for (rint i=2; i<=n; ++i)
x[sa[i]]=(y[sa[i]]==y[sa[i-1]] && y[sa[i]+k]==y[sa[i-1]+k]) ? num : ++num;
if (num==n) break;
m=num;
}
}
void get_height(int n) {
rint k=0;
for (rint i=1; i<=n; ++i) rk[sa[i]]=i;
for (rint i=1; i<=n; ++i) {
if (rk[i]==1) continue;//第一名height为0
if (k) --k;//h[i]>=h[i-1]-1;
rint j=sa[rk[i]-1];
while (j+k<=n && i+k<=n && s[i+k]==s[j+k]) ++k;
height[rk[i]]=k;//h[i]=height[rk[i]];
}
}
int n, k;
bool jud(int x) {
int cnt = 1;
for (int i = 2; i <= n; ++i) {
if (height[i] >= x) {
++cnt;
if (cnt >= k) {
return true;
}
} else {
cnt = 1;
}
}
return false;
}
int main() {
while (~scanf("%d%d", &n, &k)) {
for (int i = 1; i <= n; ++i) {
scanf("%d", &s[i]);
}
get_SA(n, 1000005);
get_height(n);
int l = 0, r = n;
while (r - l > 1) {
int mid = (r + l) >> 1;
if (jud(mid)) {
l = mid;
} else {
r = mid;
}
}
printf("%d\n", l);
}
return 0;
}