题目链接: SPOJ - SUBST1 New Distinct Substrings
题目大意
一个字符串, 求不重复子串的个数
思路
先求出所有子串的个数len*(len+1)/2, 在减去重复的个数, 重复的个数可以通过高度数组求出, 对于两个后缀, 如果他们的最长公共前缀=lcp[i], 那么他们贡献了lcp[i]个重复子串, 所以只要高度数组所有的值就好了
注意要用long long不然会溢出
代码
#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
using namespace std;
const int maxn = 5e4+100;
int rk[maxn], tmp[maxn], sa[maxn], k, lcp[maxn], n;
bool cmp_sa(int i, int j)
{
if (rk[i] != rk[j]) return rk[i] < rk[j];
return (i + k <= n ? rk[i + k] : -1) < (j + k <= n ? rk[j + k] : -1);
}
void construct_sa(char *s)
{
for (int i = 0; i <= n; ++i)
{
sa[i] = i;
rk[i] = s[i];
}
rk[n] = -1;
for (k = 1; k <= n; k *= 2)
{
sort(sa, sa + n + 1, cmp_sa);
tmp[sa[0]] = 0;
for (int i = 1; i <= n; ++i)
tmp[sa[i]] = tmp[sa[i - 1]] + (cmp_sa(sa[i - 1], sa[i]) ? 1 : 0);
copy(tmp, tmp + n + 1, rk);
}
}
void construct_lcp(char *s)
{
for (int i = 0; i <= n; ++i) rk[sa[i]] = i;
int h = 0;
lcp[0] = 0;
for (int i = 0; i < n; ++i)
{
int j = sa[rk[i] - 1];
if (h > 0) --h;
for (; j + h < n && i + h < n; ++h)
if (s[j + h] != s[i + h]) break;
lcp[rk[i] - 1] = h;
}
}
char s[maxn];
int main()
{
int T;
for(scanf("%d", &T); T; --T)
{
scanf("%s", s);
n = strlen(s);
construct_sa(s);
construct_lcp(s);
long long cnt = (1+(long long)n)*n/2;
for(int i=1; i<n; ++i) cnt -= lcp[i];
cout << cnt << endl;
}
return 0;
}