后缀数组(SA)
sa[i] 排名为i的后缀串的起始点下标
rk[i] 以第i个为起始点往后形成的后缀串的排名
hight[i] 排名第i个和排名第i-1个后缀串的最长公共前缀长度
LCP(Longest Common Prefix)是指后缀数组中相邻两个后缀的最长公共前缀的长度,实际可求后缀数组中排名i到j区间内的最长公共前缀。
基数排序
拓展应用
参考论文
最长公共前缀
不同子串的数目
n
∗
(
n
+
1
)
/
2
−
∑
0
n
−
1
H
e
i
g
h
t
[
r
a
k
[
i
]
]
n*(n+1)/2- \sum ^ {n-1}_0 Height[rak[i]]
n∗(n+1)/2−0∑n−1Height[rak[i]]
最长回文子串 O(n)
连续重复子串
最长公共子串 O(A+B)
基本过程
lcp
引自lcp
- 对任意i<=k<=j,有LCP(i,j) = min(LCP(i,k),LCP(k,j))
- LCP(i,j)=min(LCP(k-1,k))(i<k<=j)
- h[i]>=h[i-1]-1
模板
模板1
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
const int MAX=1e6+5;
int n,m;
int tax[MAX],rak[MAX],tp[MAX],sa[MAX];
char s[MAX];
void sort(int a[],int b[]){
for(int i=0;i<=m;i++)tax[i]=0;
for(int i=1;i<=n;i++)tax[a[i]]++;
for(int i=1;i<=m;i++)tax[i]+=tax[i-1];
for(int i=n;i>=1;i--)sa[tax[a[b[i]]]--]=b[i];
}
bool comp(int r[],int a,int b,int k){
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void get_sa(int a[],int b[]){
for(int i=1;i<=n;i++)
m=max(m,a[i]=s[i]-'0'),b[i]=i;
sort(a,b);
for(int p=0,j=1;p<n;j<<=1,m=p){
p=0;
for(int i=1;i<=j;i++)b[++p]=n-j+i;
for(int i=1;i<=n;i++)if(sa[i]>j)b[++p]=sa[i]-j;
sort(a,b);
int *t=a;a=b;b=t;
a[sa[1]]=p=1;
for(int i=2;i<=n;i++)
a[sa[i]]=comp(b,sa[i],sa[i-1],j)?p:++p;
}
}
int main(){
scanf("%s",s+1);
n=strlen(s+1);
get_sa(rak,tp);
for(int i=1;i<=n;i++)printf("%d ",sa[i]);
}
模板2
#include <bits/stdc++.h>
using namespace std;
int n;
int sa[150], x[150], c[150], y[150];
char a[150];
inline void SA() {
int m = 128;
for (int i = 0; i <= m; i++) c[i] = 0;
for (int i = 1; i <= n; i++) c[x[i]]++;
for (int i = 1; i <= m; i++) c[i] += c[i - 1];
for (int i = n; i; i--) sa[c[x[i]]--] = i;
for (int k = 1, p; k <= n; k <<= 1) {
p = 0;
for (int i = n; i > n - k; i--) y[++p] = i;
for (int i = 1; i <= n; i++)
if (sa[i] > k) y[++p] = sa[i] - k;
for (int i = 0; i <= m; i++) c[i] = 0;
for (int i = 1; i <= n; i++) c[x[i]]++;
for (int i = 1; i <= m; i++) c[i] += c[i - 1];
for (int i = n; i; i--) sa[c[x[y[i]]]--] = y[i];
p = y[sa[1]] = 1;
for (int i = 2, a, b; i <= n; i++) {
a = sa[i] + k > n ? -1 : x[sa[i] + k];
b = sa[i - 1] + k > n ? -1 : x[sa[i - 1] + k];
y[sa[i]] = (x[sa[i]] == x[sa[i - 1]]) && (a == b) ? p : ++p;
}
swap(x, y);
m = p;
}
}
int main() {
scanf("%s", a + 1);
n = strlen(a + 1);
for (int i = 1; i <= n; i++) x[i] = a[i];
SA();
for (int i = 1; i <= n; i++) printf("%d", sa[i]);
exit(0);
}
lcp
#include<cstdio>
#include<cstring>
#include<cmath>
#include<algorithm>
using namespace std;
const int maxl=200000,maxt=18;
int len,SA[maxl+5],rk[maxl+5],t[maxl+5],ha[maxl+5],H[maxl+5],RMQ[maxl+5][maxt+5];
char now[maxl+5];
void make_SA(char* s)
{
int MAX=0;len=strlen(s+1);
memset(ha,0,sizeof(ha));
for (int i=1;i<=len;i++) {ha[rk[i]=s[i]]++;if (rk[i]>MAX) MAX=rk[i];}
for (int i=1;i<=MAX;i++) ha[i]+=ha[i-1];
for (int i=len;i>=1;i--) SA[ha[rk[i]]--]=i;
for (int k=1;k<=len;k<<=1)
{
int p=0;
for (int i=len-k+1;i<=len;i++) t[++p]=i;
for (int i=1;i<=len;i++) if (SA[i]>k) t[++p]=SA[i]-k;
memset(ha,0,sizeof(ha));
for (int i=1;i<=len;i++) ha[rk[t[i]]]++;
for (int i=1;i<=MAX;i++) ha[i]+=ha[i-1];
for (int i=len;i>=1;i--) SA[ha[rk[t[i]]]--]=t[i];
memcpy(t,rk,sizeof(t));
p=1;rk[SA[1]]=1;
for (int i=2;i<=len;i++)
if (t[SA[i-1]]==t[SA[i]]&&t[SA[i-1]+k]==t[SA[i]+k]) rk[SA[i]]=p; else rk[SA[i]]=++p;
if (p==len) break;
MAX=p;
}
}
void make_H() //处理H数组
{
int k=0;
for (int i=1;i<=len;i++)
{
if (k) k--;int j=SA[rk[i]-1];if (j==0) continue;
while (now[i+k]==now[j+k]) k++;
RMQ[rk[i]][0]=H[rk[i]]=k;
}
int ln=log2(len); //RMQ
for (int j=1;j<=ln;j++)
for (int i=1;i<=len-(1<<j)+1;i++)
RMQ[i][j]=min(RMQ[i][j-1],RMQ[i+(1<<j-1)][j-1]);
}
int LCP(int x,int y) //求后缀x和后缀y的LCP
{
x=SA[rk[x]];y=SA[rk[y]];if (x>y) swap(x,y);x++;int j=log2(y-x+1);
return min(RMQ[x][j],RMQ[y-(1<<j)+1][j]);
}
bool Eoln(char ch) {return ch==10||ch==13||ch==EOF;}
int reads(char* s)
{
int len=0;
char ch=getchar();if (ch==EOF) return 2;
s[++len]=ch;while (!Eoln(s[len])) s[++len]=getchar();s[len--]=0;
return 0;
}
void writei(int x,bool fl=false)
{
if (x==0) {if (fl==false) putchar('0');return;}
writei(x/10,true);putchar(x%10+48);
}
int main()
{
freopen("LCP.in","r",stdin);
freopen("LCP.out","w",stdout);
reads(now);make_SA(now);make_H();
return 0;
}