题面
解析
翻译一下题目要求:给定一个长为n(1≤n≤20000)的字符串,求其最长的至少出现了k次的可重叠子串长度。
答案显然具有二分性,如果长为$l$的串至少出现了k次,那么长为$l-1$的串也至少出现了k次,那么此题显然可以二分答案,那么如何check?
子串即是原串所有后缀的前缀,至少出现了k次,即是该串至少为k个后缀的公共前缀,联想到后缀数组的性质,可以求出所有排名相邻的串的最长公共前缀LCP,即$hei$数组。那么我们对排名分块,对于任意一块$[l, r]$满足$\forall i\in [l+1,r], hei[i] \geqslant ans$, 如果存在一个块$[l,r]$满足$r-l+1\geqslant k$,那么这个ans就可行,否则不行。这样就完成了check。
代码:
#include<cstdio> #include<cstring> #include<algorithm> #include<iostream> using namespace std; const int maxn = 20005, maxm = 1000004; int n, K, mx, ans; int a[maxn]; int sa[maxn], rk[maxn], fir[maxn], sec[maxn], c[maxm], hei[maxn]; void Build_SA() { for(int i = 1; i <= n; ++i) fir[i] = a[i]; for(int i = 1; i <= n; ++i) c[fir[i]] ++; for(int i = 1; i <= mx; ++i) c[i] += c[i-1]; for(int i = n; i >= 1; --i) sa[c[fir[i]]--] = i; for(int k = 1; k <= n; k <<= 1) { int t = 0; for(int i = n - k + 1; i <= n; ++i) sec[++t] = i; for(int i = 1; i <= n; ++i) if(sa[i] - k > 0) sec[++t] = sa[i] - k; for(int i = 0; i <= mx; ++i) c[i] = 0; for(int i = 1; i <= n; ++i) c[fir[sec[i]]] ++; for(int i = 1; i <= mx; ++i) c[i] += c[i-1]; for(int i = n; i; --i) sa[c[fir[sec[i]]]--] = sec[i], sec[i] = 0; swap(fir, sec); t = 0; fir[sa[1]] = ++t; for(int i = 2; i <= n; ++i) if(sec[sa[i]] != sec[sa[i-1]] || sec[sa[i] + k] != sec[sa[i-1] + k]) fir[sa[i]] = ++t; else fir[sa[i]] = t; if(t >= n) break; mx = max(mx, t); } } void get_height() { int h = 0; for(int i = 1; i <= n; ++i) rk[sa[i]] = i; for(int i = 1; i <= n; ++i) { int t = sa[rk[i]-1]; while(a[t+h] == a[i+h]) h++; hei[rk[i]] = h; h = max(0, h - 1); } } bool check(int x) { int p = 1; for(int i = 2; i <= n; ++i) if(hei[i] < x) { if(i - p >= K) return 1; p = i; } if(n - p + 1 >= K) return 1; return 0; } int main() { scanf("%d%d", &n, &K); for(int i = 1; i <= n; ++i) scanf("%d", &a[i]), mx = max(mx, a[i]); Build_SA(); get_height(); int l = 0, r = n, mid; while(l <= r) { mid = (l+r)>>1; if(check(mid)) ans = mid, l = mid + 1; else r = mid - 1; } printf("%d\n", ans); return 0; }