Given a string, we need to find the total number of its distinct substrings.
Input
T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000Output
For each test case output one number saying the number of distinct substrings.
Example
Sample Input:
2
CCCCC
ABABASample Output:
5
9Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.
题目解释的很清楚,样例解释也非常清楚
就是在一个字符串中求不相同的子串的个数
分析:
每一个子串一定是某个后缀的前缀,那么原问题等价于求所有后缀之间 的不相同的前缀的个数,如果所有的后缀按照
suffix(sa[1]),suffix(sa[2],.........suffix(sa[n])的顺序计算,不难发现,对于每一次新加进来的后缀saffix(sa[k]),它将产生
n - sa[k] + 1个新的前缀。但是其中有height[k]个是和前面的字符串中的前缀是相同的。所以suffix(sa[k])将“贡献”出
n - sa[k] + 1 - height[k]个不同的子串,累加后便是原问题的答案,时间复杂负为O(n);
代码:
#include <cstdio>
#include <iostream>
#include <cstring>
using namespace std;
const int maxn = 100000 + 10;
int sa[maxn],rankk[maxn],height[maxn];
int wa[maxn],wb[maxn],Ws[maxn],wv[maxn];
char str[maxn];
int cmp (int *r,int a,int b,int l){
return r[a] == r[b] && r[a + l] == r[b + l];
}
void get_sa (int *r,int n,int m){
int i , j, p , *x = wa,*y = wb,*t;
for (i = 0; i < m; i++) Ws[i] = 0;
for (i = 0; i < n; i++) Ws[x[i] = r[i]]++;
for (i = 1; i < m; i++) Ws[i] += Ws[i - 1];
for (i = n - 1; i >= 0; i--) sa[--Ws[x[i]]] = i;
for (j = 1, p = 1; p < n; j *= 2, m = p){
for (p = 0,i = n - j; i < n; i++) y[p++] = i;
for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j ;
for (i = 0; i < n; i++) wv[i] = x[y[i]];
for (i = 0; i < m; i++) Ws[i] = 0;
for (i = 0; i < n; i++) Ws[wv[i]]++;
for (i = 0; i < m; i++) Ws[i] += Ws[i - 1];
for (i = n - 1; i >= 0; i--) sa[--Ws[wv[i]]] = y[i];
for (t = x, x = y, y = t,p = 1, x[sa[0]] = 0, i = 1; i < n ; i++){
x[sa[i]] = cmp(y,sa[i - 1],sa[i],j) ? p - 1 : p++;
}
}
}
void get_height(int *r,int n){
int k = 0, j ;
for (int i = 1 ;i <= n; i++) rankk[sa[i]] = i;
for (int i = 0; i < n; height[rankk[i++]] = k){
for (k ? k-- : 0, j = sa[rankk[i] - 1]; r[i + k] == r[j + k]; k++);
}
}
int a[maxn];
int main (){
int T;
scanf("%d",&T);
getchar();
while (T--){
scanf("%s",str);
int len = strlen(str);
for (int i = 0 ; i < len; i++) {
a[i] = str[i];
}
int m = 128;
a[len] = 0;
get_sa(a,len + 1,m);
get_height(a,len);
int ans = 0;
for (int i = 1; i <= len; i++){
ans += len - sa[i] - height[i];
}
printf("%d\n",ans);
}
return 0;
}
相对于其它的后缀数组应用,自我感觉这个还算简单。。。