后缀数组求不同的子串数。。
每一个子串都是某个后缀的前缀,每加入一个后缀都会增加 n-sa[ i ] 个子串,但是有h[ i ]个子串会是重复的,所以对每增加的一个后缀会产生 n-sa[ i ]-h[ i ]个不同的子串
Time Limit: 1000MS | Memory Limit: Unknown | 64bit IO Format: %lld & %llu |
Description
Given a string, we need to find the total number of its distinct substrings.
Input
T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000
Output
For each test case output one number saying the number of distinct substrings.
Example
Sample Input:
2
CCCCC
ABABA
Sample Output:
5
9
Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.
Source
#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
using namespace std;
const int maxn=10000;
int sa[maxn],rank[maxn],rank2[maxn],h[maxn],c[maxn],*x,*y,ans[maxn],n;
char str[maxn];
bool cmp(int* r,int a,int b,int l,int n)
{
if(r[a]==r[b]&&a+l<n&&b+l<n&&r[a+l]==r[b+l]) return true;
return false;
}
void radix_sort(int n,int sz)
{
for(int i=0;i<sz;i++) c[i]=0;
for(int i=0;i<n;i++) c[x[y[i]]]++;
for(int i=1;i<sz;i++) c[i]+=c[i-1];
for(int i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
}
void get_sa(char c[],int n,int sz=128)
{
x=rank,y=rank2;
for(int i=0;i<n;i++) x[i]=c[i],y[i]=i;
radix_sort(n,sz);
for(int len=1;len<n;len<<=1)
{
int yid=0;
for(int i=n-len;i<n;i++) y[yid++]=i;
for(int i=0;i<n;i++) if(sa[i]>=len) y[yid++]=sa[i]-len;
radix_sort(n,sz);
swap(x,y);
x[sa[0]]=yid=0;
for(int i=1;i<n;i++)
{
x[sa[i]]=cmp(y,sa[i],sa[i-1],len,n)?yid:++yid;
}
sz=yid+1;
if(sz>=n) break;
}
for(int i=0;i<n;i++) rank[i]=x[i];
}
void get_h(char str[],int n)
{
int k=0;h[0]=0;
for(int i=0;i<n;i++)
{
if(rank[i]==0) continue;
k=max(k-1,0);
int j=sa[rank[i]-1];
while(i+k<n&&j+k<n&&str[i+k]==str[j+k]) k++;
h[rank[i]]=k;
}
}
int main()
{
int T_T;
scanf("%d",&T_T);
while(T_T--)
{
scanf("%s",str);
n=strlen(str);
get_sa(str,n); get_h(str,n);
int ans=0;
for(int i=0;i<n;i++)
{
ans+=n-sa[i]-h[i];
}
printf("%d\n",ans);
}
return 0;
}