题意:给定一个字符串,求最少出现K次的最长重复字串,这K个字串可以重叠。
做法:1.hash(二分最大长度,这里要判断的是枚举起点,然后对该长度的字串求hash值,统计下该字串出现了几次,如果>=k,则满足条件)
2.后缀数组(论文经典题,详细看论文)
code:
1.
#include <cstdio>
#include <iostream>
#include <cstring>
#include <algorithm>
#include <vector>
#include <queue>
#include <map>
#include <set>
#include <cmath>
#include <string>
#define zero(x) (((x)>0?(x):-(x))<eps)
#define MAGIC 1121117
#define eps 1e-8
#define ULL unsigned long long
#define Test puts("END")
using namespace std;
const int MOD = 1000000007;
const int INF = 1000000000;
const int N = 100005;
const int M = 311117;
int a[N],n,K,hash[N],fac[N];
int times[M],value[M];
bool used[M];
void makeHash(){
hash[0] = a[0];
for(int i = 1;i < n;i ++){
hash[i] = hash[i - 1] * MAGIC + a[i];
}
}
int getPos(int v){
int pos = (v % M + M) % M;
while(used[pos] && value[pos] != v){
pos ++;
// cout << "pos " << pos << endl;
if(pos >= M){
pos -= M;
}
}
return pos;
}
int getHash(int l,int r){
if(l == 0) return hash[r];
else return hash[r] - hash[l - 1] * fac[r - l + 1];
}
int insert(int v){
int pos = getPos(v);
if(!used[pos]){
used[pos] = true;
value[pos] = v;
}
times[pos] ++;
return times[pos];
}
bool check(int limit){
memset(times,0,sizeof(times));
memset(used,false,sizeof(used));
for(int l = 0;l + limit - 1 < n;l ++){
int r = l + limit - 1;
int v = getHash(l,r);
int ret = insert(v);
if(ret >= K) return true;
}
return false;
}
int main(){
// freopen("input.txt","r",stdin);
while(scanf("%d%d",&n,&K) != EOF){
for(int i = 0;i < n;i ++){
scanf("%d",&a[i]);
}
fac[0] = 1;
for(int i = 1;i < N;i ++)
fac[i] = fac[i - 1] * MAGIC;
makeHash();
int l = 0,r = n;
int ans = -1;
while(l <= r){
int mid = (l + r) >> 1;
// cout << l << ' ' << r << endl;
if(check(mid)){
ans = mid;
l = mid + 1;
}
else r = mid - 1;
}
printf("%d\n",ans);
}
return 0;
}
2.后缀数组
#include <cstdio>
#include <iostream>
#include <cstring>
#include <algorithm>
#include <vector>
#include <queue>
#include <map>
#include <set>
#include <cmath>
#include <string>
#define zero(x) (((x)>0?(x):-(x))<eps)
#define MAGIC 11117
#define eps 1e-8
#define LL long long
#define Test puts("END")
using namespace std;
const int MOD = 1000000007;
const int INF = 1000000000;
const int N = 2000100;
const int M = 2000100;
int s[N],n,K;
int wa[N],wb[N],wv[N],wc[N],rank[N],height[N],sa[N];
void calheight(int *r,int *sa,int n)
{
int i,j,k = 0;
for(i = 1;i <= n;i ++) rank[sa[i]] = i;
for(i = 0;i < n;height[rank[i ++ ]] = k)
for(k ? k -- : 0,j = sa[rank[i] - 1];r[i + k] == r[j + k];k ++);
return ;
}
int cmp(int *r,int a,int b,int l){
return r[a] == r[b] && r[a + l] == r[b + l];
}
void da(int *r,int *sa,int n,int m){
int i,j,p,*x = wa,*y = wb,*t;
for(i = 0;i < m;i ++) wc[i] = 0;
for(i = 0;i < n;i ++) wc[x[i] = r[i]] ++;
for(i = 1;i < m;i ++) wc[i] += wc[i - 1];
for(i = n - 1;i >= 0;i --) sa[-- wc[x[i]]] = i;
for(j = 1,p = 1;p < n;j *= 2,m = p){
for(p = 0,i = n - j;i < n;i ++) y[p ++] = i;
for(i = 0;i < n;i ++) if(sa[i] >= j) y[p ++] = sa[i] - j;
for(i = 0;i < n;i ++) wv[i] = x[y[i]];
for(i = 0;i < m;i ++) wc[i] = 0;
for(i = 0;i < n;i ++) wc[wv[i]] ++;
for(i = 1;i < m;i ++) wc[i] += wc[i - 1];
for(i = n - 1;i >= 0;i --) sa[-- wc[wv[i]]] = y[i];
for(t = x,x = y,y = t,p = 1,x[sa[0]] = 0,i = 1;i < n;i ++)
x[sa[i]] = cmp(y,sa[i - 1],sa[i],j) ? p - 1 : p ++;
}
return ;
}
bool check(int limit){
int l = 0;
while(l <= n){
if(height[l] < limit){
l ++;
continue;
}
int r = l;
while(r <= n && height[r] >= limit){
r ++;
}
if(r - l + 1>= K) return true;
l = r;
}
return false;
}
int main(){
// freopen("input.txt","r",stdin);
while(scanf("%d%d",&n,&K) != EOF){
for(int i = 0;i < n;i ++){
scanf("%d",&s[i]);
}
da(s,sa,n + 1,200);
calheight(s,sa,n);
/*for(int i = 0;i <= n;i ++)
printf("i:%d sa:%d height:%d\n",i,sa[i],height[i]);*/
// check(4);
int l = 0,r = n;
int ans = -1;
while(l <= r){
int mid = (l + r) >> 1;
// cout << l << ' ' << r << endl;
if(check(mid)){
ans = mid;
l = mid + 1;
}
else r = mid - 1;
}
printf("%d\n",ans);
}
return 0;
}