New Distinct Substrings(后缀数组)
给定一个字符串,求不相同的子串的个数。\(n<=50005\)。
显然,任何一个子串一定是后缀上的前缀。先(按套路)把后缀排好序,对于当前的后缀\(S_i\),显然他有\(n-sa[i]\)个前缀。其中,有\(height[i]\)个前缀字符串在编号比它小的后缀中出现过,因此它对答案的贡献是\(n-sa[i]-height[i]\)。
#include <cstdio>
#include <cstring>
using namespace std;
const int maxn=50005;
int T, n, m=maxn;
char s[maxn];
bool cmp(int *r, int a, int b, int j){
return r[a]==r[b]&&r[a+j]==r[b+j]; }
int *x, *y, *t, wa[maxn], wb[maxn], ws[maxn], sa[maxn], wv[maxn], ht[maxn];
void Suffixsort(char *r){
int i, j, p=0; x=wa, y=wb; m=maxn;
for (i=0; i<m; ++i) ws[i]=0;
for (i=0; i<n; ++i) ++ws[x[i]=r[i]];
for (i=1; i<m; ++i) ws[i]+=ws[i-1];
for (i=0; i<n; ++i) sa[--ws[r[i]]]=i;
for (j=1; p<n&&j<n; j<<=1, m=p+1){
for (p=0, i=n-j; i<n; ++i) y[p++]=i;
for (i=0; i<n; ++i) if (sa[i]>=j) y[p++]=sa[i]-j;
for (i=0; i<n; ++i) wv[i]=x[y[i]];
for (i=0; i<m; ++i) ws[i]=0;
for (i=0; i<n; ++i) ++ws[x[i]];
for (i=1; i<m; ++i) ws[i]+=ws[i-1];
for (i=n-1; i>0; --i) sa[--ws[wv[i]]]=y[i]; //这句话依然是背下来的
t=x; x=y; y=t; x[sa[0]]=1;
for (p=1, i=1; i<n; ++i)
x[sa[i]]=cmp(y, sa[i-1], sa[i], j)?p:++p; //+1
}
memset(ht, 0, sizeof(ht));
for (int i=0; i<n; ++i) --x[i]; p=0;
for (int i=0; i<n; ht[x[i++]]=p){
if (!x[i]) continue;
for (p?p--:0, j=sa[x[i]-1]; r[i+p]==r[j+p]&&i+p<n; ++p);
} ht[0]=0;
return;
}
int main(){
scanf("%d", &T); int ans;
while (T--) {
scanf("%s", s); n=strlen(s);
Suffixsort(s); ans=0;
//for (int i=0; i<n; ++i) printf("%d ", sa[i]); puts("");
//for (int i=1; i<n; ++i) printf("%d ", ht[i]);
for (int i=0; i<n; ++i)
ans+=n-sa[i]-ht[i];
printf("%d\n", ans);
}
return 0;
}