SPOJ - SUBLEX Lexicographical Substring Search（后缀自动机）

Weaver丶

于 2020-07-28 11:12:58 发布

阅读量152

点赞数

分类专栏： sam

本文链接：https://blog.csdn.net/qq_44641782/article/details/107631356

版权

sam 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

Little Daniel loves to play with strings! He always finds different ways to have fun with strings! Knowing that, his friend Kinan decided to test his skills so he gave him a string S and asked him Q questions of the form:

If all distinct substrings of string S were sorted lexicographically, which one will be the K-th smallest?

After knowing the huge number of questions Kinan will ask, Daniel figured out that he can’t do this alone. Daniel, of course, knows your exceptional programming skills, so he asked you to write him a program which given S will answer Kinan’s questions.

Example:

S = “aaa” (without quotes)
substrings of S are “a” , “a” , “a” , “aa” , “aa” , “aaa”. The sorted list of substrings will be:
“a”, “aa”, “aaa”.

Input
In the first line there is Kinan’s string S (with length no more than 90000 characters). It contains only small letters of English alphabet. The second line contains a single integer Q (Q <= 500) , the number of questions Daniel will be asked. In the next Q lines a single integer K is given (0 < K < 2^31).

Output
Output consists of Q lines, the i-th contains a string which is the answer to the i-th asked question.

Example
Input:

aaa
2
2
3

Output:

aa
aaa

Edited: Some input file contains garbage at the end. Do not process them.

题目大意：给一个字符串，和n次询问，对于每次询问，给出一个k，找到字符串中不重复的第k小的子串。

解题思路：因为题目说了不重复，那么我们可以知道在后缀自动机中的每个节点都表示不重复的子串，那么我们可以在后缀自动机上找第k小的子串啦。我们先预处理出每个节点可以衍生出的不同子串的数量，然后我们就可以用类似主席树找第k小的操作在后缀自动机上寻找第k小的子串。对于某个位置，我们从’a’开始找节点，如果在该节点以字符’a’衍生的子串的数量大于等于k，那么我们就通过字符’a’衍生，并将k-1,继续寻找，否则，k的值减去以’a’衍生的子串数量，继续寻找后序可衍生字符，以次类推。也就是说：我们找的k是指除了本身的第k小，因为一个节点通过其他字符衍生出去的字符串肯定比本身大的，所以如果k==0，就表示该节点表示的字符串就是我们要求的答案。
代码：

#pragma GCC optimize(2)
#include <bits/stdc++.h>
using namespace std;
//std::mt19937 rnd(233);
#define pp pair<int,int>
#define ull unsigned long long
#define ls root<<1
#define rs root<<1|1
//#define int long long
typedef long long ll;
const int inf = 0x3f3f3f3f;
const int NINF = 0xc0c0c0c0;
const int maxn = 1000008;
const int Maxn = 1e7+7;
const double eps=1e-6;
const int mod=1e9+7;
char arr[maxn],str[maxn];
struct SAM{
    int fail[maxn<<1],nxt[maxn<<1][30],len[maxn<<1],id=1,last=1;
    int dp[maxn<<1],a[maxn<<1],b[maxn<<1],ans[maxn<<1],endpos[maxn<<1];
    char base='a';
    void init(){
        for(int i=1;i<=id;i++){
            len[i]=fail[i]=0;
            endpos[i]=0;
            memset(nxt,0,sizeof nxt);
            dp[i]=0;
            a[i]=b[i]=0;
        }
        id=last=1;
    }
    void init_llcm2_ans(){
        for(int i=1;i<=id;i++)ans[i]=len[i];
    }
    void init_endpos(){
        for(int i=1;i<=id;i++)endpos[i]=0;
    }
    void add(char c){//建图
        int x=c-base;
        int now=++id;
        endpos[now]=1;
        len[now]=len[last]+1;
        int p;
        for(p=last;p && !nxt[p][x];p=fail[p])
            nxt[p][x]=now;
        if(!p)
            fail[now]=1;
        else{
            int q=nxt[p][x];
            if(len[q]==len[p]+1){
                fail[now]=q;
            }
            else{
                int nq=++id;
                len[nq]=len[p]+1;
                fail[nq]=fail[q];
                memcpy(nxt[nq],nxt[q], sizeof nxt[q]);
                for(;p && nxt[p][x]==q;p=fail[p])
                    nxt[p][x]=nq;
                fail[q]=fail[now]=nq;
            }
        }
        last=now;
    }
    void getendpos(){//每类字串的数量
        for(int i=id;i;i--){//拓扑序
            int e=b[i];
            endpos[fail[e]]+=endpos[e];
        }
    }
    void get_sub(int Len){//不同长度的字串的最大出现次数
        for(int i=1;i<=id;i++){
            dp[i]=0;
        }
        for(int i=1;i<=id;i++){
            int k=len[i];
            dp[k]=max(dp[k],endpos[i]);
        }
        for(int i=1;i<=Len;i++){
            printf("%d\n",dp[i]);
        }
    }
    ll getSubNum(){//不同字串数量
        ll ans=0;
        for(int i=2;i<=id;i++)ans+=len[i]-len[fail[i]];
        return ans;
    }
    void gettop(int Len){//基数排序
        for(int i=1;i<=id;i++)a[len[i]]++;
        for(int i=1;i<=Len;i++)a[i]+=a[i-1];
        for(int i=1;i<=id;i++)b[a[len[i]]--]=i;
    }
    int llcm(char str[],int l){//求两个字符串的最长相等字串的长度
        int res=0,cnt=0,now=1;
        for(int i=0;str[i]!='\0';i++){
            int c=str[i]-base;
            if(nxt[now][c]){
                cnt++;
                now=nxt[now][c];
            }
            else{
                while(now && !nxt[now][c])now=fail[now];
                if(!now) cnt=0,now=1;
                else cnt=len[now]+1,now=nxt[now][c];
            }
            res=max(res,cnt);
        }
        return res;
    }
    void llcm2(char s[],int Len){//多个字串最长字串长度
        for(int i=1;i<=id;i++) dp[i]=0;
        int cnt=0,now=1;
        for(int i=0;i<Len;i++){
            int c=s[i]-base;
            if(nxt[now][c]){
                cnt++;
                now=nxt[now][c];
            }
            else{
                while(now && !nxt[now][c]) now=fail[now];
                if(!now) cnt=0,now=1;
                else cnt=len[now]+1,now=nxt[now][c];
            }
            dp[now]=max(dp[now],cnt);
        }
        for(int i=id;i;i--){
            int e=b[i];
            dp[fail[e]]=max(dp[fail[e]],min(dp[e],len[fail[e]]));
            //cout<<i<<' '<<dp[i]<<endl;
        }
        for(int i=1;i<=id;i++) ans[i]=min(ans[i],dp[i]);
    }
    int get_llcm2_ans(){
        int cnt=0;
        for(int i=1;i<=id;i++) cnt=max(cnt,ans[i]);
        //printf("%d\n",cnt);
        return cnt;
    }
    int dfs(int x){//每个状态可以衍生出多少个子串
        if(endpos[x])return endpos[x];
        endpos[x]=1;
        for(int i=0;i<26;i++){
            if(nxt[x][i]){
                endpos[x]+=dfs(nxt[x][i]);
            }
        }
        //cout<<x<<' '<<endpos[x]<<endl;
        return endpos[x];
    }
    string find(int k){
//        for(int i=1;i<=id;i++){
//            cout<<i<<' '<<endpos[i]<<endl;
//            for(int j=0;j<26;j++){
//                if(nxt[i][j]){
//                    cout<<"  "<<char(base+j)<<' '<<nxt[i][j]<<endl;
//                }
//            }
//        }
        string res="";
        int now=1;
        while(k>0){
            //cout<<k<<endl;
            for(int i=0;i<26 && k>0;i++){
                int newnode=nxt[now][i];
                if(newnode){
                    if(k<=endpos[newnode]){
                        res+=base+i;
                        //cout<<res<<' '<<k<<endl;
                        k-=1;
                        now=newnode;
                        break;
                    }
                    k-=endpos[newnode];
                }
            }
        }
        return res;
    }
}sam;
char s[maxn];
signed main(){
#ifndef ONLINE_JUDGE
    //freopen("in.in","r",stdin);
    //freopen("out.out","w",stdout);
#endif
    sam.init();
    scanf("%s",&s);
    for(int i=0;s[i]!='\0';i++)sam.add(s[i]);
    sam.init_endpos();
    sam.dfs(1);
    int T;
    scanf("%d",&T);
    while(T--){
        int k;
        scanf("%d",&k);
        cout<<sam.find(k)<<endl;
    }
}