POJ 3693 Maximum repetition substring 后缀数组 暴力 rmq

题意:给出一个字符串,求出一个重复次数最多的且字典序最小的子串。

思路:前面给出了求出重复次最多的子串,但是该怎样找字典序最小的子串。

           可以想到,sa数组是按照字典序排列的,我们可以记录满足最多重复次数的长度。然后,从前先后考虑sa数组,如果找到对应位置的lcp大于等于的长度,这个就是字典序最小的子串。

代码如下:

#include <cstdio>
#include <algorithm>
#include <cstring>

using namespace std;

struct ST{
} st;

struct DC3{
    static const int maxn =1001000;//three times of length
    int rank[maxn];//0 - n-1
    int sa[maxn];//1 - n
    int height[maxn];//1 - n
    int wa[maxn],wb[maxn],wv[maxn],ws[maxn];
    static const int MAX = 200100;
    int p[MAX];
    int d[MAX][20];
    void rmq_init(int n){
        p[0] = -1;
        for(int i = 1; i <= n; ++i)
            p[i] = i & (i-1)?p[i-1]:p[i-1]+1;
        for(int i = 1; i <= n; ++i) d[i][0] = height[i];
        for(int j = 1; j <= p[n]; ++j)
            for(int i = 1; i + (1 << j) - 1 <= n; ++i)
                d[i][j] = min(d[i][j-1],d[i+(1<<j-1)][j-1]);
    }
    int rmp_query(int l, int r){
        int k = p[r - l + 1];
        return min(d[l][k],d[r - (1<<k) + 1][k]);
    }
    int lcp(int l, int r){//l,r is the start postion of two suffix 
        l = rank[l], r = rank[r];//we should turn them to the index in sa
        if(l > r) swap(l,r);l++;
        return rmp_query(l,r);
    }

    #define F(x) ((x)/3 + ((x)%3 == 1 ? 0:tb))
    #define G(x) ((x) < tb ? (x)*3+1 : ((x)-tb)*3 + 2)

    int c0(int *r, int a, int b){
        return r[a] == r[b] && r[a+1] == r[b+1] && r[a+2] == r[b+2];
    }

    int c12(int k, int *r, int a,int b){
        if (k == 2)
            return r[a] < r[b] || r[a] == r[b] && c12(1,r,a+1,b+1);
        else
            return r[a] < r[b] || r[a] == r[b] && wv[a+1] < wv[b+1];
    }

    void radix_sort(int *r, int *a,int *b,int n,int m) {
        int i;
        for (i = 0; i < n; i++)    wv[i] = r[a[i]];
        for (i = 0; i < m; i++)    ws[i] = 0;
        for (i = 0; i < n; i++)    ws[wv[i]]++;
        for (i = 1; i < m; i++)    ws[i] += ws[i-1];
        for (i = n-1; i >= 0; i--) b[--ws[wv[i]]] = a[i];
        return ;
    }

    void dc3(int *r,int *sa,int n, int m){
        int i,j,*rn = r + n, *san = sa + n;
        int ta = 0, tb = (n+1)/3,tbc = 0, p;
        r[n] = r[n+1] = 0;
        for(i = 0; i < n; i++)
            if(i%3 != 0) wa[tbc++] = i;

        radix_sort(r+2, wa, wb, tbc, m);
        radix_sort(r+1, wb, wa, tbc, m);
        radix_sort(  r, wa, wb, tbc, m);

        for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++)
            rn[F(wb[i])] = c0(r,wb[i-1],wb[i]) ? p-1 : p++;

        if(p < tbc) dc3(rn, san, tbc, p);
        else
            for (i = 0; i < tbc; i++) san[rn[i]] = i;

        for(i = 0; i < tbc; i++)
            if (san[i] < tb) wb[ta++] = san[i]*3;

        if(n%3 == 1) wb[ta++] = n-1;

        radix_sort(r, wb, wa, ta, m);
        for(i = 0; i < tbc; i++)
            wv[wb[i]=G(san[i])] = i;

        for(i = 0,j = 0,p = 0; i < ta && j < tbc; p++)
            sa[p] = c12(wb[j]%3,r,wa[i],wb[j]) ? wa[i++] : wb[j++];
        for( ; i < ta; p++) sa[p] = wa[i++];
        for ( ; j < tbc; p++) sa[p] = wb[j++];
        return ;
    }
        void calc_sa(int *r, int n, int m){//attention: 1 <= r[i] <= m
        r[n] = 0;//add zero, length : n + 1
        dc3(r,sa,n+1,m);
    }
    void calc_height(int *r,int n){
        int i,j,k = 0;
        for (i = 0; i < n+1; i++)
            rank[sa[i]] = i;
        for (i = 0; i < n; height[rank[i++]] = k)//according to rank, only n times
            for (k ? k-- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k++)
                ;
        return;
    }
    void print(int * r, int n){
        for(int i = 1; i <= n; ++i){
            for(int j = sa[i]; j < n; ++j)
                putchar(r[j]);
            putchar('\n');
        }
    }
    void solve(int *r,int n){
        int a[20000],cnt = 0;
        int maxx = 0,pos,len;
        for(int l = 1; l < n; ++l){
            for(int i = 0; i + l < n; i += l){
                int ans = lcp(i,i+l);
                int j = i - (l - ans % l);
                ans = ans / l + 1;
                if(j >= 0 && lcp(j,j+l) >= l) ans++;
                if(ans > maxx){
                    maxx = ans;
                    cnt = 0;
                    a[cnt++] = l;
                }
                else if(ans == maxx)
                    a[cnt++] = l;
            }
        }
        for(int i = 1; i <= n; ++i){
            for(int j = 0; j < cnt; ++j){
                int l = a[j];
                if(lcp(sa[i],sa[i]+l) >= (maxx - 1) * l){
                    len = l * maxx;
                    pos = sa[i];
                    goto aa;
                }
            }
        }
        aa:
        for(int i = pos; i < pos + len; ++i)
            putchar(r[i]);
        putchar('\n');
    }
} solver;

int T,N;
char str[100010];
int r[100010];
int main(void)
{
    //freopen("input.txt","r",stdin);
    int cas = 1;
    while(scanf("%s",str),str[0] != '#'){
        int N = strlen(str);
        copy(str,str+N,r);
        solver.calc_sa(r,N,256);
        solver.calc_height(r,N);
        solver.rmq_init(N);
        //solver.print(r,N);
        printf("Case %d: ",cas++);
        solver.solve(r,N);
    }
    return 0;
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值