ZOJ_3199 Longest Repeated Substring(后缀数组+ST表)

Longest Repeated Substring

Time Limit: 5000 ms
Memory Limit: 32768 KB
Problem Description

Write a program that takes a string and returns length of the longest repeated substring. A repeated substring is a sequence of characters that is immediately followed by itself.

For example, given “Mississippi”, the longest repeated substring is “iss” or “ssi” (not “issi”).
Given “Massachusetts”, the longest repeated substring would be either “s” or “t”.
Given “Maine”, the longest repeated substring is “” (the empty string).

Input

The first line of the input contains a single integer T , the number of test cases.

Each of the following T lines, is exactly one string of lowercase charactors.

The length of each string is at most 50000 characters.

Output

For each test case, print the length of the Longest Repeated Substring.

Sample Input

2
aaabcabc
ab

Sample Output

3
0

题意

定义连续重复子串为原串的一个子串,在该子串的后面,再次出现这个子串
(两个子串相邻,无重叠),求最长的连续重复子串。

题解:

首先利用后缀数组求出字典序相邻的两个后缀的最大公共前缀。即Height数组。

考虑从大到小枚举最大连续重复子串的长度。设最终结果为len,则s[1],s[len+1],s[1+2len]…
必然有一个元素属于目标子串。因为其在后面重复,所以可以求后缀s[1+k
len]与s[1+(k+1)len]
的最长公共前缀,长度为slen。因为s[1+(k+1)len]也属于目标子串的重复串,所以len是两串已经
求出的公共部分的长度,则只需要在位置1+k
len之前在匹配len-slen个字符即可。
再求后缀s[1+k
len-(len-slen)]与s[1+(k+1)*len-(len-slen)]的最长公共前缀即可。若LCP等于
len,则说明存在目标子串长度为len,输出即可。

求任意两后缀的LCP,可以通过Height数组和ST表求出。

#include<cstdio>
#include<iostream>
#include<cstdlib>
#include<cmath>
#include<algorithm>
#include<cstring>
#include<map>
#include<vector>
#include<queue>
#include<iterator>
#define dbg(x) cout<<#x<<" = "<<x<<endl;
#define INF 0x3f3f3f3f
#define LLINF 0x3f3f3f3f3f3f3f3f
#define eps 1e-6
 
using namespace std;
typedef long long LL;
typedef pair<int, int> P;
const int maxn = 100010;
const int mod = 998244353;
int N, M, rk[maxn], tmp[maxn], tp[maxn], sa[maxn], heg[20][maxn];
char s1[maxn];
void init();
int solve();
void QSORT();
void GetHeight();
void binary_sa();
int getmi(int l, int r);

int main()
{
    int T, n, m, i, j, k;
    scanf("%d", &T);
    while(T--)
    {
        scanf("%s", s1+1);
        binary_sa();
        GetHeight();
        init();
        printf("%d\n", solve());
    }
    return 0;
}

int solve()
{
    for(int i=N/2;i>=1;i--){
        for(int j=1;j+i<=N;j+=i){
            int len = getmi(rk[j], rk[j+i]);
            int k = i-len;
            if(j-k>=0){
                len = getmi(rk[j-k],rk[j+i-k]);
                if(len == i)return i;
            }
        }
    }
    return 0;
}

int getmi(int l, int r)
{
    if(l>r)swap(l, r);
    l++;
    int k = log(r-l+1)/log(2.0);
    return min(heg[k][l], heg[k][r-(1<<k)+1]);
}

void init()
{
    for(int j=1;j<17;j++)
        for(int i=0;i+(1<<j)-1<=N;++i)
            heg[j][i] = min(heg[j-1][i], heg[j-1][i+(1<<j)]);
}

void binary_sa()
{
    N = strlen(s1+1);
    M = 200;
    for(int i=1;i<=N;i++)rk[i] = s1[i], tp[i] = i;
    QSORT();
    for(int w=1,q=0;q<N;w<<=1,M=q){
        q = 0;
        for(int i=1;i<=w;i++)tp[++q] = N-w+i;
        for(int i=1;i<=N;i++)if(sa[i]>w)tp[++q] = sa[i]-w;
        QSORT();
        memcpy(tp, rk, sizeof(tp));
        rk[sa[1]] = q = 1;
        for(int i=2;i<=N;i++)
            rk[sa[i]] = (tp[sa[i]] == tp[sa[i-1]]&&tp[sa[i]+w]==tp[sa[i-1]+w])?q:++q;
    }
}

void QSORT()
{
    for(int i=0;i<=M;i++)tmp[i] = 0;
    for(int i=1;i<=N;i++)tmp[rk[i]]++;
    for(int i=1;i<=M;i++)tmp[i] += tmp[i-1];
    for(int i=N;i>=1;i--)sa[tmp[rk[tp[i]]]--] = tp[i];
}

void GetHeight()
{
    int j, k = 0;
    heg[0][0] = 0;
    for(int i=1;i<=N;i++){
        if(k)k--;
        int j = sa[rk[i]-1];
        while(s1[i+k] == s1[j+k])k++;
        heg[0][rk[i]] = k;
    }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值