poj 3693 Maximum repetition substring

Maximum repetition substring
Time Limit: 1000MS Memory Limit: 65536K
Total Submissions: 6864 Accepted: 2049

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa

真是一道好题啊,看下罗神的分析:

先穷举长度L,然后求长度为L 的子串最多能连续出现几次。首先连续出现1 次是肯定可以的,所以这里只考虑至少2 次的情况。假设在原字符串中连续出现2 次,记这个子字符串为S,那么S 肯定包括了字符r[0], r[L], r[L*2],r[L*3], ……中的某相邻的两个。所以只须看字符r[L*i]和r[L*(i+1)]往前和往后各能匹配到多远,记这个总长度为K,那么这里连续出现了K/L+1 次。最后看最大值是多少?

穷举长度L 的时间是n,每次计算的时间是n/L。所以整个做法的时间复杂度是O(n/1+n/2+n/3+……+n/n)=O(nlogn)。

这个不是很好理解,你需要在纸上画出连续几段线段表示字符串的重复出现,然后每相邻两端之间必会有且仅有两天分界线,就是所谓的r[0], r[L], r[L*2],r[L*3], ……

一种很好理解的情况是,分界线刚好在线段左端点,那么很显然右侧能够匹配的决定了重复次数,也就是K/L+1;另一种情况分界线不在线段左端点,那么此时要将分界线左移,补上L-K%L这段,这样做保证一定能够取到答案。

上述两种情况均归结为:lcp查询问题,最后按照sa枚举,首次符合条件,必然是字典序最小的解。


代码:

#include<cstdio>
#include<iostream>
#define Maxn 100010
using namespace std;

int r[Maxn],sa[Maxn],rk[Maxn],height[Maxn];
int wa[Maxn],wb[Maxn],rs[Maxn],wv[Maxn];
int cmp(int *r,int a,int b,int l){
    return r[a]==r[b]&&r[a+l]==r[b+l];
}
void da(int n,int m){
    int i,j,p,*x=wa,*y=wb;
    for(i=0;i<m;i++) rs[i]=0;
    for(i=0;i<n;i++) rs[x[i]=r[i]]++;
    for(i=1;i<m;i++) rs[i]+=rs[i-1];
    for(i=n-1;i>=0;i--) sa[--rs[x[i]]]=i;
    for(j=1,p=1;p<n;j<<=1,m=p){
        for(p=0,i=n-j;i<n;i++) y[p++]=i;
        for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
        for(i=0;i<m;i++) rs[i]=0;
        for(i=0;i<n;i++) rs[wv[i]=x[y[i]]]++;
        for(i=1;i<m;i++) rs[i]+=rs[i-1];
        for(i=n-1;i>=0;i--) sa[--rs[wv[i]]]=y[i];
        swap(x,y);
        for(p=1,x[sa[0]]=0,i=1;i<n;i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
    }
}
void calheight(int n){
    int i,j,k=0;
    for(int i=1;i<n;i++) rk[sa[i]]=i;
    for(int i=1;i<n;height[rk[i++]]=k){
        if(k) k--;
        for(j=sa[rk[i]-1];r[i+k]==r[j+k];k++);
    }
}
int p[Maxn],d[Maxn][20];
void rmq_init(int n){
    p[0]=-1;
    for(int i=1;i<=n;i++)
        p[i]=i&i-1?p[i-1]:p[i-1]+1;
    for(int i=1;i<=n;i++) d[i][0]=height[i];
    for(int j=1;j<=p[n];j++)
        for(int i=1;i+(1<<j)-1<=n;i++)
            d[i][j]=min(d[i][j-1],d[i+(1<<j-1)][j-1]);
}
int rmq_ask(int l,int r){
    int k=p[r-l+1];
    return min(d[l][k],d[r-(1<<k)+1][k]);
}
int lcp(int a,int b){
    a=rk[a],b=rk[b];
    if(a<b) swap(a,b);
    return rmq_ask(b+1,a);
}
char s[Maxn];
int q[Maxn];
int main()
{
    int n,i,j,k,ans,cas=1;
    bool flag;
    while(scanf("%s",s+1),s[1]!='#'){
        for(i=1;s[i];i++)
            r[i]=s[i];
        r[0]=r[n=i]=0;
        da(n,125);
        calheight(n);
        rmq_init(n);
        int tot=0,maxx=0;
        for(int l=1;l<n;l++){
            flag=false;
            for(j=1;j+l<n;j+=l){
                k=lcp(j,j+l);
                ans=k/l+1;
                if(k%l&&(k=j-l+k%l)>=1){
                    k=lcp(k,k+l);
                    ans=max(ans,k/l+1);
                }
                if(ans>=maxx) maxx=ans,flag=true;
            }
            if(flag) q[tot++]=l;
        }
        flag=true;
        for(i=1;i<n&&flag;i++){
            for(j=0;j<tot;j++)
                if(lcp(sa[i],sa[i]+q[j])/q[j]+1>=maxx) {flag=false;break;}
        }
        printf("Case %d: ",cas++);
        for(k=0;k<q[j]*maxx;k++)
            printf("%c",s[sa[i-1]+k]);
        puts("");
    }
	return 0;
}

同类题目spoj 687

687. Repeats

Problem code: REPEATS


A string s is called an (k,l)-repeat if s is obtained by concatenating k>=1 times some seed string t with length l>=1. For example, the string

s = abaabaabaaba

is a (4,3)-repeat with t = aba as its seed string. That is, the seed string t is 3 characters long, and the whole string s is obtained by repeating t 4 times.

Write a program for the following task: Your program is given a long string u consisting of characters ‘a’ and/or ‘b’ as input. Your program must find some (k,l)-repeat that occurs as substring within u with k as large as possible. For example, the input string

u = babbabaabaabaabab

contains the underlined (4,3)-repeat s starting at position 5. Since u contains no other contiguous substring with more than 4 repeats, your program must output the maximum k.

Input

In the first line of the input contains H- the number of test cases (H <= 20). H test cases follow. First line of each test cases is n - length of the input string (n <= 50000), The next n lines contain the input string, one character (either ‘a’ or ‘b’) per line, in order.

Output

For each test cases, you should write exactly one interger k in a line - the repeat count that is maximized.

Example

Input:
1
17
b
a
b
b
a
b
a
a
b
a
a
b
a
a
b
a
b

Output:
4

代码:

#include<cstdio>
#include<iostream>
#define Maxn 50010
using namespace std;

int r[Maxn],sa[Maxn],rk[Maxn],height[Maxn];
int wa[Maxn],wb[Maxn],rs[Maxn],wv[Maxn];
int cmp(int *r,int a,int b,int l){
    return r[a]==r[b]&&r[a+l]==r[b+l];
}
void da(int n,int m){
    int i,j,p,*x=wa,*y=wb;
    for(i=0;i<m;i++) rs[i]=0;
    for(i=0;i<n;i++) rs[x[i]=r[i]]++;
    for(i=1;i<m;i++) rs[i]+=rs[i-1];
    for(i=n-1;i>=0;i--) sa[--rs[x[i]]]=i;
    for(j=1,p=1;p<n;j<<=1,m=p){
        for(p=0,i=n-j;i<n;i++) y[p++]=i;
        for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
        for(i=0;i<m;i++) rs[i]=0;
        for(i=0;i<n;i++) rs[wv[i]=x[y[i]]]++;
        for(i=1;i<m;i++) rs[i]+=rs[i-1];
        for(i=n-1;i>=0;i--) sa[--rs[wv[i]]]=y[i];
        swap(x,y);
        for(p=1,x[sa[0]]=0,i=1;i<n;i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
    }
}
void calheight(int n){
    int i,j,k=0;
    for(int i=1;i<n;i++) rk[sa[i]]=i;
    for(int i=1;i<n;height[rk[i++]]=k){
        if(k) k--;
        for(j=sa[rk[i]-1];r[i+k]==r[j+k];k++);
    }
}
int p[Maxn],d[Maxn][20];
void rmq_init(int n){
    p[0]=-1;
    for(int i=1;i<=n;i++)
        p[i]=i&i-1?p[i-1]:p[i-1]+1;
    for(int i=1;i<=n;i++) d[i][0]=height[i];
    for(int j=1;j<=p[n];j++)
        for(int i=1;i+(1<<j)-1<=n;i++)
            d[i][j]=min(d[i][j-1],d[i+(1<<j-1)][j-1]);
}
int rmq_ask(int l,int r){
    int k=p[r-l+1];
    return min(d[l][k],d[r-(1<<k)+1][k]);
}
int lcp(int a,int b){
    a=rk[a],b=rk[b];
    if(a<b) swap(a,b);
    return rmq_ask(b+1,a);
}
char s[Maxn];
int main()
{
    int t,n,k,ans;
    scanf("%d",&t);
    while(t--){
        scanf("%d%*c",&n);
        for(int i=1;i<=n;i++){
            scanf("%c%*c",s+i);
            r[i]=s[i]-'a'+1;
        }
        r[0]=r[++n]=0;
        da(n,3);
        calheight(n);
        rmq_init(n);
        int maxx=0,ans;
        for(int l=1;l<n;l++)
            for(int i=1;i+l<n;i+=l){
                k=lcp(i,i+l);
                ans=k/l+1;
                if(k%l&&(k=i-l+k%l)>=1){
                    k=lcp(k,k+l);
                    ans=max(ans,k/l+1);
                }
                if(ans>maxx) maxx=ans;
            }
        printf("%d\n",maxx);
    }
	return 0;
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值