一些说明
rk[i]: 下标位置在i的后缀的排名
sa[i]: 后缀排名第i的下标位置
ht[i]: 排名第i和排名第i-1的LCP长度
基数排序
代码中的--(cnt[rk[tmp[i]]]--)主要是为了解决值相同的情形
如果两个值相同,则先遍历到的位置占据靠后的位置,后遍历到的位置占据靠前的位置
如果没有值相同的情况,不减也可以
inline void base_sort(){
memset(cnt,0,sizeof(*cnt)*(mx+1));
for(int i=1;i<=n;++i)++cnt[rk[i]];
for(int i=1;i<=mx;++i)cnt[i]+=cnt[i-1];
for(int i=n;i;--i)sa[cnt[rk[tmp[i]]]--]=tmp[i]; //--主要用于解决值相同的情形
}
SAIS板子
参考来源
还在写倍增后缀数组? SA-IS算法了解一下~ - shadowice1984 的博客 - 洛谷博客
提交记录 #309676 - Universal Online Judge
Tip
这份板子有提到在串最后需要加一个小于所有字母字典序的字符,有点不明所以
不太确定是uoj35需要这么做还是SAIS需要这么做,没加这个字符也把gym题通过去了
下标和新倍增板子下标略有不同,详见PR()函数
题目
#include<bits/stdc++.h>
//#include<iostream>
using namespace std;
#define rep(i,a,b) for(int i=(a);i<=(b);++i)
#define per(i,a,b) for(int i=(a);i>=(b);--i)
typedef long long ll;
typedef double db;
typedef pair<int,int> P;
#define fi first
#define se second
#define pb push_back
#define dbg(x) cerr<<(#x)<<":"<<x<<" ";
#define dbg2(x) cerr<<(#x)<<":"<<x<<endl;
#define SZ(a) (int)(a.size())
#define sci(a) scanf("%d",&(a))
#define pt(a) printf("%d",a);
#define pte(a) printf("%d\n",a)
#define ptlle(a) printf("%lld\n",a)
#define debug(...) fprintf(stderr, __VA_ARGS__)
typedef unsigned ui;
typedef long long ll;
const int N=4e6+10;
int par[N],sz[N],ed[N];
bool used[N];
int find(int x){
return par[x]==x?x:par[x]=find(par[x]);
}
struct SuffixArray{
char ss[N],tt[N];
int to[N];
P id[N];
int n,sa[N], rk[N], ht[N], s[N<<1], t[N<<1], p[N], cnt[N], cur[N];
#define pushS(x) sa[cur[s[x]]--] = x
#define pushL(x) sa[cur[s[x]]++] = x
#define inducedSort(v) \
fill_n(sa, n, -1); fill_n(cnt, m, 0); \
for (int i = 0; i < n; i++) cnt[s[i]]++; \
for (int i = 1; i < m; i++) cnt[i] += cnt[i-1]; \
for (int i = 0; i < m; i++) cur[i] = cnt[i]-1; \
for (int i = n1-1; ~i; i--) pushS(v[i]); \
for (int i = 1; i < m; i++) cur[i] = cnt[i-1]; \
for (int i = 0; i < n; i++) if (sa[i] > 0 && t[sa[i]-1]) pushL(sa[i]-1); \
for (int i = 0; i < m; i++) cur[i] = cnt[i]-1; \
for (int i = n-1; ~i; i--) if (sa[i] > 0 && !t[sa[i]-1]) pushS(sa[i]-1);
void sais(int n, int m, int *s, int *t, int *p) {
int n1 = t[n-1] = 0, ch = rk[0] = -1, *s1 = s+n;
for (int i = n-2; ~i; i--) t[i] = s[i] == s[i+1] ? t[i+1] : s[i] > s[i+1];
for (int i = 1; i < n; i++) rk[i] = t[i-1] && !t[i] ? (p[n1] = i, n1++) : -1;
inducedSort(p);
for (int i = 0, x, y; i < n; i++) if (~(x = rk[sa[i]])) {
if (ch < 1 || p[x+1] - p[x] != p[y+1] - p[y]) ch++;
else for (int j = p[x], k = p[y]; j <= p[x+1]; j++, k++)
if ((s[j]<<1|t[j]) != (s[k]<<1|t[k])) {ch++; break;}
s1[y = x] = ch;
}
if (ch+1 < n1) sais(n1, ch+1, s1, t+n, p+n1);
else for (int i = 0; i < n1; i++) sa[s1[i]] = i;
for (int i = 0; i < n1; i++) s1[i] = p[sa[i]];
inducedSort(s1);
}
template<typename T>
int mapCharToInt(int n, const T *str) {
int m = *max_element(str, str+n);
fill_n(rk, m+1, 0);
for (int i = 0; i < n; i++) rk[str[i]] = 1;
for (int i = 0; i < m; i++) rk[i+1] += rk[i];
for (int i = 0; i < n; i++) s[i] = rk[str[i]] - 1;
return rk[m];
}
// Ensure that str[n] is the unique lexicographically smallest character in str.
template<typename T>
void suffixArray(int n, const T *str) {
//s[n++]='a'-1;
int m = mapCharToInt(++n, str);
sais(n, m, s, t, p);
for (int i = 0; i < n; i++) rk[sa[i]] = i;
for (int i = 0, h = ht[0] = 0; i < n-1; i++) {
int j = sa[rk[i]-1];
while (i+h < n && j+h < n && s[i+h] == s[j+h]) h++;
if (ht[rk[i]] = h) h--;
}
}
inline void PR(){
string p(ss);
for(int i=0;i<n;++i)//i∈[0,n) rank[i]∈[1,n]
printf("Rank[%d]:%d\n",i,rk[i]);
for(int i=0;i<=n;++i){//i∈[1,n] sa[i]∈[0,n)
printf("sa[%d]:%d ",i,sa[i]);
cout<<p.substr(sa[i])<<endl;
}
for(int i=1;i<=n;++i)//i∈[1,n] ht[1]=0
printf("ht[%d]:%d\n",i,ht[i]);
}
ll solve(){
int m;
sci(m);
rep(i,1,m){
par[i]=i;
scanf("%s",tt);
sz[i]=strlen(tt);
int &x=sz[i];
rep(j,0,x-1){
ss[n]=tt[j];
to[n++]=i;
}
ed[i]=n-1;
ss[n++]='#';
}
suffixArray(n, ss);
//PR();
rep(i,1,n){
id[i]=P(ht[i],i);
int p=id[i].second,x=sa[p],y=sa[p-1];
int px=to[x],py=to[y],v=id[i].first;
if(ss[x]=='#' || ss[y]=='#')continue;
if(px==0 || py==0)continue;
int ux=ed[px]-x+1,uy=ed[py]-y+1;
id[i].first=min(id[i].first,ux);
id[i].first=min(id[i].first,uy);
//printf("i1:%d p:%d x:%d y:%d px:%d py:%d v:%d w:%d\n",i,p,x,y,px,py,v,w);
}
sort(id+1,id+n+1,greater<P>());
ll ans=0;
int cnt=0;
rep(i,1,n){
int p=id[i].second,x=sa[p],y=sa[p-1];
int px=to[x],py=to[y],v=id[i].first;
if(ss[x]=='#' || ss[y]=='#')continue;
if(px==0 || py==0)continue;
if(px==py)continue;
int pu=find(px),pv=find(py);
if(pv==pu)continue;
//printf("i2:%d p:%d x:%d y:%d px:%d py:%d v:%d\n",i,p,x,y,px,py,v);
par[pv]=pu;
cnt++;
ans+=v;
}
//printf("cnt:%d\n",cnt);
assert(cnt==m-1);
return ans;
}
}sa;
int main(){
printf("%lld\n",sa.solve());
return 0;
}
/*
7
jia
ran
jin
tian
chi
shen
me
2-4 ans=2
3-6
6-2
3-1
1-5
6-7
*/
新倍增板子
2023.1.27更新,整理了一个常数更小的倍增板子
Codeforces Round #846 (Div. 2), problem: (G) Delicious Dessert
这个题原来的倍增板子2.5s,现在的倍增板子1s
#include<iostream>
#include<cstring>
#include<cstdio>
#include<vector>
using namespace std;
typedef long long ll;
const int maxn=1e6+10;
struct SuffixArray{
typedef long long ll;
static const int maxn=1e6+10;
char s[maxn];
int cnt[maxn],mx,n,rk[maxn],sa[maxn],tmp[maxn],ht[maxn];
inline void base_sort(){
memset(cnt,0,sizeof(*cnt)*(mx+1));
for(int i=1;i<=n;++i)++cnt[rk[i]];
for(int i=1;i<=mx;++i)cnt[i]+=cnt[i-1];
for(int i=n;i;--i)sa[cnt[rk[tmp[i]]]--]=tmp[i]; //--主要用于解决值相同的情形
}
inline void suffix_sort(){
mx=0;
for(int i=1;i<=n;++i)mx=max(mx,rk[i]=s[i]),tmp[i]=i;
base_sort();
for(int len=1,dif=0;dif<n;len<<=1,mx=dif){
int p=0;
for(int i=n-len+1;i<=n;++i)tmp[++p]=i;
for(int i=1;i<=n;++i)
if(sa[i]>len)
tmp[++p]=sa[i]-len;
base_sort();
swap(rk,tmp);
rk[sa[1]]=dif=1;
for(int i=2;i<=n;++i){
if(tmp[sa[i-1]]!=tmp[sa[i]]||tmp[sa[i-1]+len]!=tmp[sa[i]+len])++dif;
rk[sa[i]]=dif;
}
}
}
inline void calc_ht(){
for(int i=1,h=0;i<=n;++i){
if(h)--h;
int j=sa[rk[i]-1];
while(s[i+h]==s[j+h])++h;
ht[rk[i]]=h;
}
}
//rk[i]: 下标位置在i的后缀的排名
//sa[i]: 后缀排名第i的下标位置
//ht[i]: 排名第i和排名第i-1的LCP长度
//rk和sa互为反函数,rk、sa、ht下标、值均为[1,n]
inline void PR(){
string p(s+1);
for(int i=1;i<=n;++i)
printf("Rank[%d]:%d\n",i,rk[i]);
for(int i=1;i<=n;++i){
printf("sa[%d]:%d ",i,sa[i]);
cout<<p.substr(sa[i]-1)<<endl;
}
for(int i=1;i<=n;++i)
printf("ht[%d]:%d\n",i,ht[i]);
}
void solve(){
}
}sa;
int main(){
sa.solve();
return 0;
}
原倍增板子
#include<iostream>
#include<cstring>
#include<cstdio>
using namespace std;
int T,ans;
struct SuffixArray
{
#define N 50005
char s[N];
int n,m;
int *x,*y,X[N],Y[N],c[N],sa[N],height[N],Rank[N];
void clear()
{
memset(X,0,sizeof(X));memset(Y,0,sizeof(Y));memset(c,0,sizeof(c));
memset(sa,0,sizeof(sa));memset(height,0,sizeof(height));memset(Rank,0,sizeof(Rank));
}
void init()
{
scanf("%s",s);
n=strlen(s);
}
void get_sa(int _m=30)//_m为字符集大小
{
m=_m;
x=X,y=Y;
for (int i=0;i<m;++i) c[i]=0;
for (int i=0;i<n;++i) x[i]=s[i]-'a',++c[x[i]];
for (int i=1;i<m;++i) c[i]+=c[i-1];
for (int i=n-1;i>=0;--i) sa[--c[x[i]]]=i;
for (int k=1;k<=n;k<<=1)
{
int p=0;
for (int i=n-k;i<n;++i) y[p++]=i;
for (int i=0;i<n;++i) if (sa[i]>=k) y[p++]=sa[i]-k;
for (int i=0;i<m;++i) c[i]=0;
for (int i=0;i<n;++i) ++c[x[y[i]]];
for (int i=1;i<m;++i) c[i]+=c[i-1];
for (int i=n-1;i>=0;--i) sa[--c[x[y[i]]]]=y[i];
swap(x,y);
p=1;x[sa[0]]=0;
for (int i=1;i<n;++i)
x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&((sa[i-1]+k<n?y[sa[i-1]+k]:-1)==(sa[i]+k<n?y[sa[i]+k]:-1))?p-1:p++;
if (p>n) break;
m=p;
}
}
void get_height()
{
for (int i=0;i<n;++i) Rank[sa[i]]=i;
int k=0;height[0]=0;
for (int i=0;i<n;++i)
{
if (!Rank[i]) continue;
if (k) --k;
int j=sa[Rank[i]-1];
while (i+k<n&&j+k<n&&s[i+k]==s[j+k]) ++k;//必须是原数组s 不能是现数组X/x
height[Rank[i]]=k;
}
}
//Rank[i]:下标位置在i的后缀的排名
//sa[i]:后缀排名第i的下标位置
//Rank和sa互为反函数 范围均在[0,n-1]
//height[i]:排名第i和排名第i-1的LCP长度
void PR()
{
string p(s);
for(int i=0;i<n;++i)
printf("Rank[%d]:%d\n",i,Rank[i]);
for(int i=0;i<n;++i)
{
printf("sa[%d]:%d ",i,sa[i]);
cout<<p.substr(sa[i])<<endl;
}
for(int i=0;i<n;++i)
printf("height[%d]:%d\n",i,height[i]);
}
}sa;
int main()
{
scanf("%d",&T);
while (T--)
{
sa.clear();
sa.init();
sa.get_sa();
sa.get_height();
sa.PR();
}
return 0;
}
经典问题(2024.7.13更新)
后缀数组+RMQ+二分(询问每个串在原串的出现次数,可离线)
#include<bits/stdc++.h>
#include<iostream>
using namespace std;
#define rep(i,a,b) for(int i=(a);i<=(b);++i)
#define per(i,a,b) for(int i=(a);i>=(b);--i)
typedef long long ll;
typedef double db;
typedef pair<int,int> P;
#define fi first
#define se second
#define pb push_back
#define dbg(x) cerr<<(#x)<<":"<<x<<" ";
#define dbg2(x) cerr<<(#x)<<":"<<x<<endl;
#define SZ(a) (int)(a.size())
#define sci(a) scanf("%d",&(a))
#define pt(a) printf("%d",a);
#define pte(a) printf("%d\n",a)
#define ptlle(a) printf("%lld\n",a)
#define debug(...) fprintf(stderr, __VA_ARGS__)
typedef unsigned ui;
typedef long long ll;
const int N=2e6+10,M=21;
int sz[N],st[N],dp[N][M],lg[N],sum[N];
struct SuffixArray{
char ss[N],tt[N];
int n,sa[N], rk[N], ht[N], s[N<<1], t[N<<1], p[N], cnt[N], cur[N];
#define pushS(x) sa[cur[s[x]]--] = x
#define pushL(x) sa[cur[s[x]]++] = x
#define inducedSort(v) \
fill_n(sa, n, -1); fill_n(cnt, m, 0); \
for (int i = 0; i < n; i++) cnt[s[i]]++; \
for (int i = 1; i < m; i++) cnt[i] += cnt[i-1]; \
for (int i = 0; i < m; i++) cur[i] = cnt[i]-1; \
for (int i = n1-1; ~i; i--) pushS(v[i]); \
for (int i = 1; i < m; i++) cur[i] = cnt[i-1]; \
for (int i = 0; i < n; i++) if (sa[i] > 0 && t[sa[i]-1]) pushL(sa[i]-1); \
for (int i = 0; i < m; i++) cur[i] = cnt[i]-1; \
for (int i = n-1; ~i; i--) if (sa[i] > 0 && !t[sa[i]-1]) pushS(sa[i]-1);
void sais(int n, int m, int *s, int *t, int *p) {
int n1 = t[n-1] = 0, ch = rk[0] = -1, *s1 = s+n;
for (int i = n-2; ~i; i--) t[i] = s[i] == s[i+1] ? t[i+1] : s[i] > s[i+1];
for (int i = 1; i < n; i++) rk[i] = t[i-1] && !t[i] ? (p[n1] = i, n1++) : -1;
inducedSort(p);
for (int i = 0, x, y; i < n; i++) if (~(x = rk[sa[i]])) {
if (ch < 1 || p[x+1] - p[x] != p[y+1] - p[y]) ch++;
else for (int j = p[x], k = p[y]; j <= p[x+1]; j++, k++)
if ((s[j]<<1|t[j]) != (s[k]<<1|t[k])) {ch++; break;}
s1[y = x] = ch;
}
if (ch+1 < n1) sais(n1, ch+1, s1, t+n, p+n1);
else for (int i = 0; i < n1; i++) sa[s1[i]] = i;
for (int i = 0; i < n1; i++) s1[i] = p[sa[i]];
inducedSort(s1);
}
template<typename T>
int mapCharToInt(int n, const T *str) {
int m = *max_element(str, str+n);
fill_n(rk, m+1, 0);
for (int i = 0; i < n; i++) rk[str[i]] = 1;
for (int i = 0; i < m; i++) rk[i+1] += rk[i];
for (int i = 0; i < n; i++) s[i] = rk[str[i]] - 1;
return rk[m];
}
// Ensure that str[n] is the unique lexicographically smallest character in str.
template<typename T>
void suffixArray(int n, const T *str) {
//s[n++]='a'-1;
int m = mapCharToInt(++n, str);
sais(n, m, s, t, p);
for (int i = 0; i < n; i++) rk[sa[i]] = i;
for (int i = 0, h = ht[0] = 0; i < n-1; i++) {
int j = sa[rk[i]-1];
while (i+h < n && j+h < n && s[i+h] == s[j+h]) h++;
if (ht[rk[i]] = h) h--;
}
}
inline void PR(){
string p(ss);
for(int i=0;i<n;++i)//i∈[0,n) rank[i]∈[1,n]
printf("Rank[%d]:%d\n",i,rk[i]);
for(int i=0;i<=n;++i){//i∈[1,n] sa[i]∈[0,n)
printf("sa[%d]:%d ",i,sa[i]);
cout<<p.substr(sa[i])<<endl;
}
for(int i=1;i<=n;++i)//i∈[1,n] ht[1]=0
printf("ht[%d]:%d\n",i,ht[i]);
}
inline void ST(int n){
for(int i=2;i<=n;++i)lg[i]=lg[i>>1]+1;
for(int i=1;i<=n;++i)dp[i][0]=ht[i];
for(int j=1;(1<<j)<=n;++j){
for(int i=1;i+(1<<j)-1<=n;++i){
dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]);
}
}
}
inline int RMQ(int l,int r){
int k=lg[r-l+1];
return min(dp[l][k],dp[r-(1<<k)+1][k]);
}
void solve(){
scanf("%s",tt);
st[1]=0;
sz[1]=strlen(tt);
int &x=sz[1];
rep(j,0,x-1){
ss[n++]=tt[j];
}
ss[n++]='#';
int m;
sci(m);
rep(i,2,m+1){
scanf("%s",tt);
st[i]=n;
sz[i]=strlen(tt);
int &x=sz[i];
rep(j,0,x-1){
ss[n++]=tt[j];
}
ss[n++]='#';
}
suffixArray(n, ss);
//PR();
ST(n);
rep(i,1,n)sum[i]=sum[i-1]+(sa[i]<sz[1]);
rep(i,2,m+1){
int p=rk[st[i]];
int l=1,r=p,L,R;
while(l<=r){
int mid=l+(r-l)/2;
if(RMQ(mid,p)>=sz[i])r=mid-1;
else l=mid+1;
}
L=l-1;
l=p+1,r=n;
//printf("l:%d r:%d\n",l,r);
while(l<=r){
int mid=l+(r-l)/2;
//printf("mid:%d rmq:%d\n",mid,RMQ(p+1,mid));
if(RMQ(p+1,mid)>=sz[i]){
l=mid+1;
}
else r=mid-1;
}
R=r;
//printf("i:%d st:%d p:%d sz:%d L:%d R:%d\n",i,st[i],p,sz[i],L,R);
printf("%d\n",sum[R]-sum[L-1]);
}
// rep(i,2,m+1){
// }
}
}sa;
int main(){
sa.solve();
return 0;
}