A string s is called an (k,l)-repeat if s is obtained by concatenating k>=1 times some seed string t with length l>=1. For example, the string
s = abaabaabaaba
is a (4,3)-repeat with t = aba as its seed string. That is, the seed string t is 3 characters long, and the whole string s is obtained by repeating t 4 times.
Write a program for the following task: Your program is given a long string u consisting of characters ‘a’ and/or ‘b’ as input. Your program must find some (k,l)-repeat that occurs as substring within u with k as large as possible. For example, the input string
u = babbabaabaabaabab
contains the underlined (4,3)-repeat s starting at position 5. Since u contains no other contiguous substring with more than 4 repeats, your program must output the maximum k.
Input
In the first line of the input contains H- the number of test cases (H <= 20). H test cases follow. First line of each test cases is n - length of the input string (n <= 50000), The next n lines contain the input string, one character (either ‘a’ or ‘b’) per line, in order.
Output
For each test cases, you should write exactly one interger k in a line - the repeat count that is maximized.
Example
Input: 1 17 b a b b a b a a b a a b a a b a b Output: 4
since a (4, 3)-repeat is found starting at the 5th character of the input string.
#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<cmath>
using namespace std;
///后缀数组 倍增算法
const int maxn=500000;
char str[maxn];
int wa[maxn],wb[maxn],wv[maxn],wn[maxn],a[maxn],sa[maxn];
int n;///字符串长度
int cmp(int* r,int a,int b,int l)
{return r[a]==r[b]&&r[a+l]==r[b+l];}
/**n为字符串长度,m为字符的取值范围,r为字符串。后面的j为每次排
序时子串的长度*/
void DA(int* r,int* sa,int n,int m)
{
int i,j,p,*x=wa,*y=wb,*t;
///对R中长度为1的子串进行基数排序
for(i=0;i<m;i++)wn[i]=0;
for(i=0;i<n;i++)wn[x[i]=r[i]]++;
for(i=1;i<m;i++)wn[i]+=wn[i-1];
for(i=n-1;i>=0;i--)sa[--wn[x[i]]]=i;
for(j=1,p=1;p<n;j*=2,m=p)
{
/**利用了上一次基数排序的结果,对待排序的子串的第二关键字进行
了一次高效地基数排序*/
for(p=0,i=n-j;i<n;i++)y[p++]=i;
for(i=0;i<n;i++)if(sa[i]>=j)y[p++]=sa[i]-j;
///基数排序
for(i=0;i<n;i++)wv[i]=x[y[i]];
for(i=0;i<m;i++)wn[i]=0;
for(i=0;i<n;i++)wn[wv[i]]++;
for(i=1;i<m;i++)wn[i]+=wn[i-1];
for(i=n-1;i>=0;i--)sa[--wn[wv[i]]]=y[i];
///当p=n的时候,说明所有串都已经排好序了
///在第一次排序以后,_rank数组中的最大值小于p,所以让m=p
for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
return;
}
///后缀数组 计算height数组
/**
height数组的值应该是从height[1]开始的,而且height[1]应该是等于0的。
原因是,+因为我们在字符串后面添加了一个0号字符,所以它必然是最小的
一个后缀。而字符串中的其他字符都应该是大于0的(前面有提到,使用倍
增算法前需要确保这点),所以排名第二的字符串和0号字符的公共前缀
(即height[1])应当为0.在调用calheight函数时,要注意height数组的范
围应该是[1..n]。所以调用时应该是calheight(r,sa,n)
而不是calheight(r,sa,n+1)。*/
int _rank[maxn],height[maxn];
void calheight(int* r,int* sa,int n)
{
int i,j,k=0;
for(i=1;i<=n;i++)_rank[sa[i]]=i;
for(i=0;i<n;height[_rank[i++]]=k)
for(k?k--:0,j=sa[_rank[i]-1];r[i+k]==r[j+k];k++);
return;
}
//RMQ 求任意区间的最小值
int d[maxn];
int dpmin[maxn][25];
void creat_dpmin(int n)
{
int i,j;
for(i=1;i<=n;i++) dpmin[i][0]=d[i];
for(j=1;j<=log(double(n+1))/log(2.0);j++)
{
for(i=1;i+(1<<j)-1<=n;i++)
{
dpmin[i][j]=min(dpmin[i][j-1],dpmin[i+(1<<(j-1))][j-1]);
}
}
}
//求任意区间的最小值
int get_min(int x,int y)
{
int k=(int)(log(double(y-x+1))/log(2.0));
return min(dpmin[x][k],dpmin[y-(1<<k)+1][k]);
}
//后缀数组 + RMQ
//求第i个后缀和第j个后缀的最长公共前缀
int get_min_lcp(int x,int y)
{
x=_rank[x],y=_rank[y];
if(x>y) swap(x,y);
x++;//利用height数组
int k=(int)(log(double(y-x+1))/log(2.0));
return min(dpmin[x][k],dpmin[y-(1<<k)+1][k]);
}
int main()
{
//scanf("%s",str);//待处理字符串
//后缀数组 倍增算法 使用方法
/**
在使用倍增算法前,需要保证r数组的值均大于0。然后要在原字
符串后添加一个0号字符,具体原因参见罗穗骞的论文。这时候,
若原串的长度为n,则实际要进行后缀数组构建的r数组的长度应
该为n+1.所以调用DA函数时,对应的n应为n+1.*/
/*int n=strlen(str);
for(int i=0;i<n;i++) a[i]=(int)str[i];
a[n]=0;
DA(a,sa,n+1,256);
calheight(a,sa,n);*/
//....................................
int ci;scanf("%d",&ci);
while(ci--)
{
scanf("%d",&n);
for(int i=0;i<n;i++)
{
scanf("%s",str);
a[i]=(int)str[0];
}
a[n]=0;
DA(a,sa,n+1,256);
calheight(a,sa,n);
for(int i=1;i<=n;i++) d[i]=height[i];
creat_dpmin(n);
int ans=0;//answer
for(int i=1;i<n;i++)//长度为i的子串
{
for(int j=0;j+i<n;j+=i)
{
int k=get_min_lcp(j,j+i);
int now=k/i;
/**还差i-k%i个字符可以匹配多一个周期
把j向前移动i-k%i个字符,
如果可以匹配出来一个周期
now就+1
*/
int tj=j-(i-k%i);
if(tj>=0)
if(get_min_lcp(tj,tj+i)>=(i-k%i)) now++;
if(now+1>ans) ans=now+1;//important +1
}
}
printf("%d/n",ans);
}
return 0;
}