题意:
求一个字符串的不同子串的个数?
思路:
后缀数组算法:
每一个子串都是某一个后缀的前缀,那么问题转换为 求 所有后缀的不同前缀个数。
如果按照后缀数组 sa[0],sa[1],sa[2],,,sa[k]访问的话,
那么访问sa[k] 便会新加入n - sa[k] + 1个 前缀。
这些前缀中 有height[k] 是与前面重复的,减去即可。
#include <cstdio>
#include <cstring>
#include <algorithm>
using namespace std;
const int maxn = 1000 + 10;
int n, k;
int Rank[maxn];
char s[maxn];
int tmp[maxn ], sa[maxn], lcp[maxn];
bool cmp(int i,int j){
if (Rank[i] != Rank[j]) return Rank[i] < Rank[j];
else {
int ri = i + k <= n ? Rank[i+k] : -1;
int rj = j + k <= n ? Rank[j + k]: -1;
return ri < rj;
}
}
void build_sa(char* s,int * sa){
for (int i = 0; i <= n; ++i){
sa[i] = i;
Rank[i] = i < n ? s[i] : -1;
}
for (k = 1; k <= n; k *= 2){
sort(sa, sa+n+1, cmp);
tmp[sa[0] ] = 0;
for (int i = 1; i <= n; ++i){
tmp[sa[i] ] = tmp[sa[i-1] ] + (cmp(sa[i-1], sa[i]) ? 1:0);
}
for (int i = 0; i <= n; ++i) {
Rank[i] = tmp[i];
}
}
}
void get_height(char* s, int* sa, int* lcp){
for (int i = 0; i <= n; ++i) Rank[sa[i] ] = i;
int h = 0;
lcp[0] = 0;
for (int i = 0; i < n; ++i){
int j = sa[Rank[i] -1 ];
if (h > 0) h--;
for (; j + h < n && i + h < n; ++h){
if (s[j+h] != s[i+h])break;
}
lcp[Rank[i]-1 ] = h;
}
}
int main(){
int T;
scanf("%d",&T);
while(T--){
memset(tmp,0,sizeof tmp);
memset(sa,0,sizeof sa);
memset(lcp,0,sizeof lcp);
memset(Rank,0,sizeof Rank);
// memset(tmp,0,sizeof tmp);
scanf("%s",s);
n = strlen(s);
build_sa(s, sa);
get_height(s,sa,lcp);
int ans = 0;
// for (int i = 1; i <= n; ++i) printf("%d %d\n", sa[i],lcp[i]);
for (int i = 1; i <= n; ++i){
ans += n-sa[i];
ans -= lcp[i];
}
printf("%d\n",ans);
}
return 0;
}
DISUBSTR - Distinct Substrings
Given a string, we need to find the total number of its distinct substrings.
Input
T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000
Output
For each test case output one number saying the number of distinct substrings.
Example
Sample Input:
2
CCCCC
ABABA
Sample Output:
5
9
Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.