Maximum repetition substring
Description The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1. Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number. Input The input consists of multiple test cases. Each test case contains exactly one line, which The last test case is followed by a line containing a '#'. Output For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one. Sample Input ccabababc daabbccaa # Sample Output Case 1: ababab Case 2: aa Source |
[Submit] [Go Back] [Status] [Discuss]
题目大意:给定一个字符串,求重复出现次数最多的连续重复子串。
题解:后缀数组+RMQ
对原串和反串分别求出后缀数组,然后RMQ预处理出height的区间最值以及区间的最小排名。
然后枚举连续重复子串的长度L,求这个长度最多能连续重复出现几次。首先连续重复出现一次是肯定的,所以我们只考虑至少出现两次的情况。假设在原字符串中出现了两次,那么设这个子串为S,原字符串为r,S一定包含了r[1],r[L+1],r[2*L+1].....中某相邻两个,然后只需要看r[L*(i+1)+1]和r[L*i+1]两个串向后和向前最长能延伸到的长度(最长公共后缀和最长公共前缀)。记总长度为K,那么一共重复出现了K/L+1,因为我们需要知道子串的开始位置并且要求字典序最小,但是有可能求出来的最长串可以前移/后移几位即开头可以在落在[i-l,i-l+(l+r)mod L],所以我们对这一段求一下区间的最小排名。 [i−l,i
#include<iostream>
#include<cstdio>
#include<algorithm>
#include<cstring>
#define N 100003
#define M 20
using namespace std;
int n,m,len,a[N],b[N],xx[N],yy[N],*x,*y;
int sa[N],rank[N],height[N],st[M][N],l[N],rt[M][N];
int sa1[N],rank1[N],height1[N],st1[M][N],p=0;
char s[N];
void init()
{
memset(sa,0,sizeof(sa)); memset(sa1,0,sizeof(sa1));
memset(rank,0,sizeof(rank)); memset(rank1,0,sizeof(rank1));
memset(height,0,sizeof(height)); memset(height1,0,sizeof(height));
memset(b,0,sizeof(b));
}
int cmp(int i,int j,int l)
{
return y[i]==y[j]&&(i+l>len?-1:y[i+l])==(j+l>len?-1:y[j+l]);
}
void get_SA(int sa[N],int rank[N],int height[N],int st[M][N])
{
x=xx; y=yy; m=30;
for (int i=1;i<=len;i++) b[x[i]=a[i]]++;
for (int i=1;i<=m;i++) b[i]+=b[i-1];
for (int i=len;i>=1;i--) sa[b[x[i]]--]=i;
for (int k=1;k<=len;k<<=1){
p=0;
for (int i=len-k+1;i<=len;i++) y[++p]=i;
for (int i=1;i<=len;i++)
if (sa[i]>k) y[++p]=sa[i]-k;
for (int i=1;i<=m;i++) b[i]=0;
for (int i=1;i<=len;i++) b[x[y[i]]]++;
for (int i=1;i<=m;i++) b[i]+=b[i-1];
for (int i=len;i>=1;i--) sa[b[x[y[i]]]--]=y[i];
swap(x,y); p=2; x[sa[1]]=1;
for (int i=2;i<=len;i++)
x[sa[i]]=cmp(sa[i],sa[i-1],k)?p-1:p++;
if (p>len) break;
m=p+1;
}
p=0;
for (int i=1;i<=len;i++) rank[sa[i]]=i;
for (int i=1;i<=len;i++) {
if (rank[i]==1) continue;
int j=sa[rank[i]-1];
while (i+p<=len&&j+p<=len&&a[i+p]==a[j+p]) p++;
height[rank[i]]=p;
p=max(p-1,0);
}
for (int i=1;i<=len;i++) st[0][i]=height[i];
for (int i=1;i<=17;i++)
for (int j=1;j+(1<<i)-1<=len;j++)
st[i][j]=min(st[i-1][j],st[i-1][j+(1<<(i-1))]);
}
int calc(int x,int y)
{
if (x>y) swap(x,y);
int k=l[y-x]; x++;
return min(st[k][x],st[k][y-(1<<k)+1]);
}
int calc1(int x,int y)
{
if (x>y) swap(x,y);
int k=l[y-x]; x++;
return min(st1[k][x],st1[k][y-(1<<k)+1]);
}
void solve()
{
for (int i=1;i<=len;i++) rt[0][i]=rank[i];
for (int i=1;i<=17;i++)
for (int j=1;j+(1<<i)-1<=len;j++)
rt[i][j]=min(rt[i-1][j],rt[i-1][j+(1<<(i-1))]);
}
int query(int x,int y)
{
int k=l[y-x+1];
return min(rt[k][x],rt[k][y-(1<<k)+1]);
}
int main()
{
freopen("a.in","r",stdin);
//freopen("my.out","w",stdout);
int T=0;
int j=0;
for (int i=1;i<=100000;i++){
if ((1<<(j+1))<=i) j++;
l[i]=j;
}
while (true) {
T++; init();
scanf("%s",s+1);
if (s[1]=='#') break;
len=strlen(s+1);
for (int i=1;i<=len;i++) a[i]=s[i]-'a'+1;
get_SA(sa,rank,height,st);
for (int i=1;i<=len;i++)
a[len-i+1]=s[i]-'a'+1;
memset(b,0,sizeof(b));
get_SA(sa1,rank1,height1,st1);
int ans=1; int ll=1; int rr=len; int mark=len;
solve();
ll=rr=sa[1]; mark=1;
for (int i=1;i<=len;i++){
for (int j=i+1;j<=len;j+=i) {
if (i+1>len) continue;
int pos1=j-i;
int len1=calc(rank[pos1],rank[j]);
int len2=calc1(rank1[len-pos1+2],rank1[len-j+2]);
int t=(len1+len2)/i+1;
if (t>=ans) {
int k=query(pos1-len2,pos1-len2+(len1+len2)%i);
//cout<<pos1-len2+1<<" "<<pos1-len2+1+(len1+len2)%i<<" "<<t<<" "<<k<<"!"<<endl;
if (k<mark&&t==ans||t>ans) {
mark=k;
ll=sa[k]; rr=sa[k]+t*i-1;
//cout<<ll<<" "<<rr<<endl;
}
ans=t;
}
}
}
printf("Case %d: ",T);
for (int i=ll;i<=rr;i++) printf("%c",s[i]);
printf("\n");
}
}