后缀数组(板子整理)

一些说明

rk[i]:  下标位置在i的后缀的排名 

sa[i]: 后缀排名第i的下标位置  
ht[i]:  排名第i和排名第i-1的LCP长度 

基数排序

代码中的--(cnt[rk[tmp[i]]]--)主要是为了解决值相同的情形

如果两个值相同,则先遍历到的位置占据靠后的位置,后遍历到的位置占据靠前的位置

如果没有值相同的情况,不减也可以

inline void base_sort(){
        memset(cnt,0,sizeof(*cnt)*(mx+1));
        for(int i=1;i<=n;++i)++cnt[rk[i]];
        for(int i=1;i<=mx;++i)cnt[i]+=cnt[i-1];
        for(int i=n;i;--i)sa[cnt[rk[tmp[i]]]--]=tmp[i]; //--主要用于解决值相同的情形
    }

SAIS板子

参考来源

还在写倍增后缀数组? SA-IS算法了解一下~ - shadowice1984 的博客 - 洛谷博客

提交记录 #309676 - Universal Online Judge

Tip

这份板子有提到在串最后需要加一个小于所有字母字典序的字符,有点不明所以

不太确定是uoj35需要这么做还是SAIS需要这么做,没加这个字符也把gym题通过去了

下标和新倍增板子下标略有不同,详见PR()函数

题目

Problem - I - Codeforces

#include<bits/stdc++.h>
//#include<iostream>
using namespace std;
#define rep(i,a,b) for(int i=(a);i<=(b);++i)
#define per(i,a,b) for(int i=(a);i>=(b);--i)
typedef long long ll;
typedef double db;
typedef pair<int,int> P;
#define fi first
#define se second
#define pb push_back
#define dbg(x) cerr<<(#x)<<":"<<x<<" ";
#define dbg2(x) cerr<<(#x)<<":"<<x<<endl;
#define SZ(a) (int)(a.size())
#define sci(a) scanf("%d",&(a))
#define pt(a) printf("%d",a);
#define pte(a) printf("%d\n",a)
#define ptlle(a) printf("%lld\n",a)
#define debug(...) fprintf(stderr, __VA_ARGS__)
typedef unsigned ui;
typedef long long ll;
const int N=4e6+10;
int par[N],sz[N],ed[N];   
bool used[N];
int find(int x){
	return par[x]==x?x:par[x]=find(par[x]);
}
struct SuffixArray{
    char ss[N],tt[N];
    int to[N];
    P id[N];
    int n,sa[N], rk[N], ht[N], s[N<<1], t[N<<1], p[N], cnt[N], cur[N];
    #define pushS(x) sa[cur[s[x]]--] = x
    #define pushL(x) sa[cur[s[x]]++] = x
    #define inducedSort(v) \
	    fill_n(sa, n, -1); fill_n(cnt, m, 0);                                     \
	    for (int i = 0; i < n; i++) cnt[s[i]]++;                                  \
	    for (int i = 1; i < m; i++) cnt[i] += cnt[i-1];                           \
	    for (int i = 0; i < m; i++) cur[i] = cnt[i]-1;                            \
	    for (int i = n1-1; ~i; i--) pushS(v[i]);                                  \
	    for (int i = 1; i < m; i++) cur[i] = cnt[i-1];                            \
	    for (int i = 0; i < n; i++) if (sa[i] > 0 &&  t[sa[i]-1]) pushL(sa[i]-1); \
	    for (int i = 0; i < m; i++) cur[i] = cnt[i]-1;                            \
	    for (int i = n-1;  ~i; i--) if (sa[i] > 0 && !t[sa[i]-1]) pushS(sa[i]-1);
    void sais(int n, int m, int *s, int *t, int *p) {
        int n1 = t[n-1] = 0, ch = rk[0] = -1, *s1 = s+n;
        for (int i = n-2; ~i; i--) t[i] = s[i] == s[i+1] ? t[i+1] : s[i] > s[i+1];
        for (int i = 1; i < n; i++) rk[i] = t[i-1] && !t[i] ? (p[n1] = i, n1++) : -1;
        inducedSort(p);
        for (int i = 0, x, y; i < n; i++) if (~(x = rk[sa[i]])) {
            if (ch < 1 || p[x+1] - p[x] != p[y+1] - p[y]) ch++;
            else for (int j = p[x], k = p[y]; j <= p[x+1]; j++, k++)
                if ((s[j]<<1|t[j]) != (s[k]<<1|t[k])) {ch++; break;}
            s1[y = x] = ch;
        }
        if (ch+1 < n1) sais(n1, ch+1, s1, t+n, p+n1);
        else for (int i = 0; i < n1; i++) sa[s1[i]] = i;
        for (int i = 0; i < n1; i++) s1[i] = p[sa[i]];
        inducedSort(s1);
    }
    template<typename T>
    int mapCharToInt(int n, const T *str) {
        int m = *max_element(str, str+n);
        fill_n(rk, m+1, 0);
        for (int i = 0; i < n; i++) rk[str[i]] = 1;
        for (int i = 0; i < m; i++) rk[i+1] += rk[i];
        for (int i = 0; i < n; i++) s[i] = rk[str[i]] - 1;
        return rk[m];
    }
    // Ensure that str[n] is the unique lexicographically smallest character in str.
    template<typename T>
    void suffixArray(int n, const T *str) {
    	//s[n++]='a'-1;
        int m = mapCharToInt(++n, str);
        sais(n, m, s, t, p);
        for (int i = 0; i < n; i++) rk[sa[i]] = i;
        for (int i = 0, h = ht[0] = 0; i < n-1; i++) {
            int j = sa[rk[i]-1];
            while (i+h < n && j+h < n && s[i+h] == s[j+h]) h++;
            if (ht[rk[i]] = h) h--;
        }
    }
    inline void PR(){
        string p(ss);
        for(int i=0;i<n;++i)//i∈[0,n) rank[i]∈[1,n]
        printf("Rank[%d]:%d\n",i,rk[i]);
        for(int i=0;i<=n;++i){//i∈[1,n] sa[i]∈[0,n)
            printf("sa[%d]:%d ",i,sa[i]);
            cout<<p.substr(sa[i])<<endl;
        }
        for(int i=1;i<=n;++i)//i∈[1,n] ht[1]=0
        printf("ht[%d]:%d\n",i,ht[i]);
    }
    ll solve(){
    	int m;
        sci(m);
        rep(i,1,m){
        	par[i]=i;
        	scanf("%s",tt);
        	sz[i]=strlen(tt);
        	int &x=sz[i];
        	rep(j,0,x-1){
        		ss[n]=tt[j];
        		to[n++]=i;
        	}
            ed[i]=n-1;
        	ss[n++]='#';
        }
        suffixArray(n, ss);
        //PR();
        rep(i,1,n){
        	id[i]=P(ht[i],i);
        	int p=id[i].second,x=sa[p],y=sa[p-1];
            int px=to[x],py=to[y],v=id[i].first;
            if(ss[x]=='#' || ss[y]=='#')continue;
            if(px==0 || py==0)continue;
            int ux=ed[px]-x+1,uy=ed[py]-y+1;
            id[i].first=min(id[i].first,ux);
            id[i].first=min(id[i].first,uy);
            //printf("i1:%d p:%d x:%d y:%d px:%d py:%d v:%d w:%d\n",i,p,x,y,px,py,v,w);
        }
        sort(id+1,id+n+1,greater<P>());
        ll ans=0;
        int cnt=0;
        rep(i,1,n){
            int p=id[i].second,x=sa[p],y=sa[p-1];
            int px=to[x],py=to[y],v=id[i].first;
            if(ss[x]=='#' || ss[y]=='#')continue;
            if(px==0 || py==0)continue;
        	if(px==py)continue;
        	int pu=find(px),pv=find(py);
        	if(pv==pu)continue;
        	//printf("i2:%d p:%d x:%d y:%d px:%d py:%d v:%d\n",i,p,x,y,px,py,v);
        	par[pv]=pu;
        	cnt++;
        	ans+=v;
        }
        //printf("cnt:%d\n",cnt);
        assert(cnt==m-1);
        return ans;
    }
}sa;
int main(){
    printf("%lld\n",sa.solve());
    return 0; 
}
/*
7
jia
ran
jin
tian
chi
shen
me

2-4 ans=2
3-6
6-2
3-1
1-5
6-7

*/

新倍增板子

2023.1.27更新,整理了一个常数更小的倍增板子

Codeforces Round #846 (Div. 2), problem: (G) Delicious Dessert

这个题原来的倍增板子2.5s,现在的倍增板子1s

#include<iostream>
#include<cstring>
#include<cstdio>
#include<vector>
using namespace std;
typedef long long ll;
const int maxn=1e6+10;
struct SuffixArray{
    typedef long long ll;
    static const int maxn=1e6+10;
    char s[maxn];
    int cnt[maxn],mx,n,rk[maxn],sa[maxn],tmp[maxn],ht[maxn];
    inline void base_sort(){
        memset(cnt,0,sizeof(*cnt)*(mx+1));
        for(int i=1;i<=n;++i)++cnt[rk[i]];
        for(int i=1;i<=mx;++i)cnt[i]+=cnt[i-1];
        for(int i=n;i;--i)sa[cnt[rk[tmp[i]]]--]=tmp[i]; //--主要用于解决值相同的情形
    }
    inline void suffix_sort(){
        mx=0;
        for(int i=1;i<=n;++i)mx=max(mx,rk[i]=s[i]),tmp[i]=i;
        base_sort();
        for(int len=1,dif=0;dif<n;len<<=1,mx=dif){
            int p=0;
            for(int i=n-len+1;i<=n;++i)tmp[++p]=i;
            for(int i=1;i<=n;++i)
                if(sa[i]>len)
                    tmp[++p]=sa[i]-len;
            base_sort();
            swap(rk,tmp);
            rk[sa[1]]=dif=1;
            for(int i=2;i<=n;++i){
                if(tmp[sa[i-1]]!=tmp[sa[i]]||tmp[sa[i-1]+len]!=tmp[sa[i]+len])++dif;
                rk[sa[i]]=dif;
            }
        }
    }
    inline void calc_ht(){
        for(int i=1,h=0;i<=n;++i){
            if(h)--h;
            int j=sa[rk[i]-1];
            while(s[i+h]==s[j+h])++h;
            ht[rk[i]]=h;
        }
    }
    //rk[i]: 下标位置在i的后缀的排名 
	//sa[i]: 后缀排名第i的下标位置  
	//ht[i]: 排名第i和排名第i-1的LCP长度 
	//rk和sa互为反函数,rk、sa、ht下标、值均为[1,n]
    inline void PR(){
        string p(s+1);
        for(int i=1;i<=n;++i)
        printf("Rank[%d]:%d\n",i,rk[i]);
        for(int i=1;i<=n;++i){
            printf("sa[%d]:%d ",i,sa[i]);
            cout<<p.substr(sa[i]-1)<<endl;
        }
        for(int i=1;i<=n;++i)
        printf("ht[%d]:%d\n",i,ht[i]);
    }
    void solve(){
        
    }
}sa;
int main(){
    sa.solve();
    return 0; 
} 

原倍增板子

#include<iostream>
#include<cstring>
#include<cstdio>
using namespace std;
int T,ans;
struct SuffixArray
{
	#define N 50005
	char s[N];
	int n,m;
	int *x,*y,X[N],Y[N],c[N],sa[N],height[N],Rank[N];
	void clear()
	{
	    memset(X,0,sizeof(X));memset(Y,0,sizeof(Y));memset(c,0,sizeof(c));
	    memset(sa,0,sizeof(sa));memset(height,0,sizeof(height));memset(Rank,0,sizeof(Rank));
	}
	void init()
	{
		scanf("%s",s);
		n=strlen(s);
	}
	void get_sa(int _m=30)//_m为字符集大小 
	{
		m=_m;
	    x=X,y=Y;
	    for (int i=0;i<m;++i) c[i]=0;
	    for (int i=0;i<n;++i) x[i]=s[i]-'a',++c[x[i]];
	    for (int i=1;i<m;++i) c[i]+=c[i-1];
	    for (int i=n-1;i>=0;--i) sa[--c[x[i]]]=i;
	
	    for (int k=1;k<=n;k<<=1)
	    {
	        int p=0;
	        for (int i=n-k;i<n;++i) y[p++]=i;
	        for (int i=0;i<n;++i) if (sa[i]>=k) y[p++]=sa[i]-k;
	
	        for (int i=0;i<m;++i) c[i]=0;
	        for (int i=0;i<n;++i) ++c[x[y[i]]];
	        for (int i=1;i<m;++i) c[i]+=c[i-1];
	        for (int i=n-1;i>=0;--i) sa[--c[x[y[i]]]]=y[i];
	
	        swap(x,y);
	        p=1;x[sa[0]]=0;
	        for (int i=1;i<n;++i)
	            x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&((sa[i-1]+k<n?y[sa[i-1]+k]:-1)==(sa[i]+k<n?y[sa[i]+k]:-1))?p-1:p++;
	        if (p>n) break;
	        m=p;
	    }
	}
	void get_height()
	{
	    for (int i=0;i<n;++i) Rank[sa[i]]=i;
	    int k=0;height[0]=0;
	    for (int i=0;i<n;++i)
	    {
	        if (!Rank[i]) continue;
	        if (k) --k;
	        int j=sa[Rank[i]-1];
	        while (i+k<n&&j+k<n&&s[i+k]==s[j+k]) ++k;//必须是原数组s 不能是现数组X/x
	        height[Rank[i]]=k;
	    }
	}
	//Rank[i]:下标位置在i的后缀的排名 
	//sa[i]:后缀排名第i的下标位置  
	//Rank和sa互为反函数 范围均在[0,n-1] 
	//height[i]:排名第i和排名第i-1的LCP长度 
	void PR()
	{
		string p(s);
		for(int i=0;i<n;++i)
		printf("Rank[%d]:%d\n",i,Rank[i]);
		for(int i=0;i<n;++i)
		{
		 	printf("sa[%d]:%d ",i,sa[i]);
		 	cout<<p.substr(sa[i])<<endl;
		}
		for(int i=0;i<n;++i)
		printf("height[%d]:%d\n",i,height[i]);
	}
}sa;
int main()
{
    scanf("%d",&T);
    while (T--)
    {
        sa.clear();
        sa.init();
        sa.get_sa();
        sa.get_height();
        sa.PR();
    }
    return 0; 
} 

经典问题(2024.7.13更新)

后缀数组+RMQ+二分(询问每个串在原串的出现次数,可离线)
#include<bits/stdc++.h>
#include<iostream>
using namespace std;
#define rep(i,a,b) for(int i=(a);i<=(b);++i)
#define per(i,a,b) for(int i=(a);i>=(b);--i)
typedef long long ll;
typedef double db;
typedef pair<int,int> P;
#define fi first
#define se second
#define pb push_back
#define dbg(x) cerr<<(#x)<<":"<<x<<" ";
#define dbg2(x) cerr<<(#x)<<":"<<x<<endl;
#define SZ(a) (int)(a.size())
#define sci(a) scanf("%d",&(a))
#define pt(a) printf("%d",a);
#define pte(a) printf("%d\n",a)
#define ptlle(a) printf("%lld\n",a)
#define debug(...) fprintf(stderr, __VA_ARGS__)
typedef unsigned ui;
typedef long long ll;
const int N=2e6+10,M=21;
int sz[N],st[N],dp[N][M],lg[N],sum[N];   
struct SuffixArray{
    char ss[N],tt[N];
    int n,sa[N], rk[N], ht[N], s[N<<1], t[N<<1], p[N], cnt[N], cur[N];
    #define pushS(x) sa[cur[s[x]]--] = x
    #define pushL(x) sa[cur[s[x]]++] = x
    #define inducedSort(v) \
	    fill_n(sa, n, -1); fill_n(cnt, m, 0);                                     \
	    for (int i = 0; i < n; i++) cnt[s[i]]++;                                  \
	    for (int i = 1; i < m; i++) cnt[i] += cnt[i-1];                           \
	    for (int i = 0; i < m; i++) cur[i] = cnt[i]-1;                            \
	    for (int i = n1-1; ~i; i--) pushS(v[i]);                                  \
	    for (int i = 1; i < m; i++) cur[i] = cnt[i-1];                            \
	    for (int i = 0; i < n; i++) if (sa[i] > 0 &&  t[sa[i]-1]) pushL(sa[i]-1); \
	    for (int i = 0; i < m; i++) cur[i] = cnt[i]-1;                            \
	    for (int i = n-1;  ~i; i--) if (sa[i] > 0 && !t[sa[i]-1]) pushS(sa[i]-1);
    void sais(int n, int m, int *s, int *t, int *p) {
        int n1 = t[n-1] = 0, ch = rk[0] = -1, *s1 = s+n;
        for (int i = n-2; ~i; i--) t[i] = s[i] == s[i+1] ? t[i+1] : s[i] > s[i+1];
        for (int i = 1; i < n; i++) rk[i] = t[i-1] && !t[i] ? (p[n1] = i, n1++) : -1;
        inducedSort(p);
        for (int i = 0, x, y; i < n; i++) if (~(x = rk[sa[i]])) {
            if (ch < 1 || p[x+1] - p[x] != p[y+1] - p[y]) ch++;
            else for (int j = p[x], k = p[y]; j <= p[x+1]; j++, k++)
                if ((s[j]<<1|t[j]) != (s[k]<<1|t[k])) {ch++; break;}
            s1[y = x] = ch;
        }
        if (ch+1 < n1) sais(n1, ch+1, s1, t+n, p+n1);
        else for (int i = 0; i < n1; i++) sa[s1[i]] = i;
        for (int i = 0; i < n1; i++) s1[i] = p[sa[i]];
        inducedSort(s1);
    }
    template<typename T>
    int mapCharToInt(int n, const T *str) {
        int m = *max_element(str, str+n);
        fill_n(rk, m+1, 0);
        for (int i = 0; i < n; i++) rk[str[i]] = 1;
        for (int i = 0; i < m; i++) rk[i+1] += rk[i];
        for (int i = 0; i < n; i++) s[i] = rk[str[i]] - 1;
        return rk[m];
    }
    // Ensure that str[n] is the unique lexicographically smallest character in str.
    template<typename T>
    void suffixArray(int n, const T *str) {
    	//s[n++]='a'-1;
        int m = mapCharToInt(++n, str);
        sais(n, m, s, t, p);
        for (int i = 0; i < n; i++) rk[sa[i]] = i;
        for (int i = 0, h = ht[0] = 0; i < n-1; i++) {
            int j = sa[rk[i]-1];
            while (i+h < n && j+h < n && s[i+h] == s[j+h]) h++;
            if (ht[rk[i]] = h) h--;
        }
    }
    inline void PR(){
        string p(ss);
        for(int i=0;i<n;++i)//i∈[0,n) rank[i]∈[1,n]
        printf("Rank[%d]:%d\n",i,rk[i]);
        for(int i=0;i<=n;++i){//i∈[1,n] sa[i]∈[0,n)
            printf("sa[%d]:%d ",i,sa[i]);
            cout<<p.substr(sa[i])<<endl;
        }
        for(int i=1;i<=n;++i)//i∈[1,n] ht[1]=0
        printf("ht[%d]:%d\n",i,ht[i]);
    }
    inline void ST(int n){
        for(int i=2;i<=n;++i)lg[i]=lg[i>>1]+1;   
        for(int i=1;i<=n;++i)dp[i][0]=ht[i];
        for(int j=1;(1<<j)<=n;++j){
            for(int i=1;i+(1<<j)-1<=n;++i){
                dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]);
            }
        } 
    }
    inline int RMQ(int l,int r){
        int k=lg[r-l+1];
        return min(dp[l][k],dp[r-(1<<k)+1][k]); 
    }
    void solve(){
        scanf("%s",tt);
        st[1]=0;
        sz[1]=strlen(tt);
        int &x=sz[1];
        rep(j,0,x-1){
            ss[n++]=tt[j];
        }
        ss[n++]='#';
        int m;
        sci(m);
        rep(i,2,m+1){
            scanf("%s",tt);
            st[i]=n;
            sz[i]=strlen(tt);
            int &x=sz[i];
            rep(j,0,x-1){
                ss[n++]=tt[j];
            }
            ss[n++]='#';
        }
        suffixArray(n, ss);
        //PR();
        ST(n);
        rep(i,1,n)sum[i]=sum[i-1]+(sa[i]<sz[1]);
        rep(i,2,m+1){
            int p=rk[st[i]];
            int l=1,r=p,L,R;
            while(l<=r){
                int mid=l+(r-l)/2;
                if(RMQ(mid,p)>=sz[i])r=mid-1;
                else l=mid+1;
            }
            L=l-1;
            l=p+1,r=n;
            //printf("l:%d r:%d\n",l,r);
            while(l<=r){
                int mid=l+(r-l)/2;
                //printf("mid:%d rmq:%d\n",mid,RMQ(p+1,mid));
                if(RMQ(p+1,mid)>=sz[i]){
                    l=mid+1;
                }
                else r=mid-1;
            }
            R=r;
            //printf("i:%d st:%d p:%d sz:%d L:%d R:%d\n",i,st[i],p,sz[i],L,R);
            printf("%d\n",sum[R]-sum[L-1]);
        }
        // rep(i,2,m+1){
        // }
    }
}sa;
int main(){
    sa.solve();
    return 0; 
}

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Code92007

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值