Maximum repetition substring+POj+后缀数组之求重复次数最多的连续重复子串

Maximum repetition substring
Time Limit: 1000MS Memory Limit: 65536K
Total Submissions: 6904 Accepted: 2065

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa
解决方案:此题为论文例题,
1)可先枚举子串长度l,求i,i+l的最长公共前缀得k,那么重复次数为ans=k/l+1,但重复的子串未必开头在i,若不在,可能重复次数还要加1,这就往前看了,由于后面多出k%l这一截,所以求最长公共前缀可往前l-k%l,若公共前缀大于等于k,这重复次数加1,记录重复次数最大值。
2)求两后缀的最长公共前缀可归为RMQ问题。
3)最后就是字典序的问题了这时可用后缀数组从前往后推,找到符合的即可跳出循环。
code:
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
const int nMax = 2000001;
int sa[nMax], rank[nMax], height[nMax];
int wa[nMax], wb[nMax], wv[nMax], wd[nMax];
int cmp(int *r, int a, int b, int l)
{
    return r[a] == r[b] && r[a+l] == r[b+l];
}
void da(int *r, int n, int m)           //  倍增算法 r为待匹配数组  n为总长度 m为字符范围
{
    int i, j, p, *x = wa, *y = wb, *t;
    for(i = 0; i < m; i ++) wd[i] = 0;
    for(i = 0; i < n; i ++) wd[x[i]=r[i]] ++;
    for(i = 1; i < m; i ++) wd[i] += wd[i-1];
    for(i = n-1; i >= 0; i --) sa[-- wd[x[i]]] = i;
    for(j = 1, p = 1; p < n; j *= 2, m = p)
    {
        for(p = 0, i = n-j; i < n; i ++) y[p ++] = i;
        for(i = 0; i < n; i ++) if(sa[i] >= j) y[p ++] = sa[i] - j;
        for(i = 0; i < n; i ++) wv[i] = x[y[i]];
        for(i = 0; i < m; i ++) wd[i] = 0;
        for(i = 0; i < n; i ++) wd[wv[i]] ++;
        for(i = 1; i < m; i ++) wd[i] += wd[i-1];
        for(i = n-1; i >= 0; i --) sa[-- wd[wv[i]]] = y[i];
        for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; i ++)
        {
            x[sa[i]] = cmp(y, sa[i-1], sa[i], j) ? p - 1: p ++;
        }
    }
}

void calHeight(int *r, int n)            //  求height数组。
{
    int i, j, k = 0;
    for(i = 1; i <= n; i ++) rank[sa[i]] = i;
    for(i = 0; i < n; height[rank[i ++]] = k)
    {
        for(k ? k -- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k ++);
    }
}
int dp[nMax][30];
int Log[nMax];
void initRMQ_log(int len)
{
    for(int i=1; i<=len; i++) dp[i][0]=height[i];
    for(int j=1; (1<<j)<=len; j++)
        for(int i=1; i+(1<<j)-1<=len; i++)
        {
            dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]);
        }
    Log[0]=-1;
    for(int i=1;i<=len;i++){
        Log[i]=(i&(i-1))?Log[i-1]:Log[i-1]+1;
    }
}

int lcp(int a,int b)
{

    a=rank[a],b=rank[b];
    if(a>b)
    {
        swap(a,b);
    }
    a++;
    int k=Log[b-a+1];
    return min(dp[a][k],dp[b-(1<<k)+1][k]);

}
char text[nMax];
int num[nMax];
int save[nMax];
int main()
{
    int kk=0;
    //freopen("in.txt","r",stdin);
    //freopen("out.txt","w",stdout);
    while(~scanf("%s",text))
    {
        if(text[0]=='#') break;
        int len=strlen(text);
        for(int i=0; i<len; i++)
        {
            num[i]=int(text[i]-'a'+1);
        }
        num[len]=0;
        da(num,len+1,30);
        calHeight(num,len);
        initRMQ_log(len);
        int a=0,mmax=0;
        for(int l=1; l<len; l++)///枚举l
        {
            for(int j=0; j+l<len; j+=l)
            {
                int k=lcp(j,j+l);
                int ans=k/l+1;
                int t=l-k%l;///往前搞
                t=j-t;
                if(t>=0&&k%l!=0)
                {
                    if(lcp(t,t+l)>=k) ans++;
                }
                if(ans>mmax)
                {
                    a=0;
                    mmax=ans;
                    save[a++]=l;
                }
                else if(ans==mmax)
                {
                    save[a++]=l;
                }///存储重复次数最长的长度
            }
        }
        int st=0,lo=0;
        bool flag=false;
        for(int i=1; i<=len; i++)///从字典序小的推,可保证字典序最小
        {
            for(int j=0; j<a; j++)
            {
                int r=save[j];
                if(lcp(sa[i],sa[i]+r)>=(mmax-1)*r)
                {
                    st=sa[i];
                    lo=mmax*r;
                    flag=true;
                    break;
                }
            }
            if(flag) break;
        }
        printf("Case %d: ",++kk);
        for(int i=st; i<st+lo; i++)
        {
            printf("%c",text[i]);
        }
        printf("\n");
    }
    return 0;
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值