我们定义2个字符串的相似度等于两个串的相同前缀的长度。例如 “abc” 同 “abd” 的相似度为2,“aaa” 同 “aaab” 的相似度为3。
给出一个字符串S,计算S同他所有后缀的相似度之和。例如:S = “ababaa”,所有后缀为:
ababaa 6
babaa 0
abaa 3
baa 0
aa 1
a 1
S同所有后缀的相似度的和 = 6 + 0 + 3 + 0 + 1 + 1 = 11
输入
输入一个字符串S(1 <= L <= 1000000),L为字符串S的长度,且S由a-z的小写字母组成。
输出
输出S同所有后缀的相似度的和。
输入样例
ababaa
输出样例
11
后缀数组模板题,倍增算法O(nlogn)复杂度的过不去,需要DC3算法O(n)复杂度才能卡过去(正解是exKMP算法)但是我不会
倍增算法
#include <bits/stdc++.h>
using namespace std;
const int maxn = 1e6 + 5;
char s[maxn];
int height[maxn], sa[maxn], tx[maxn], rak[maxn];
struct node {
int x, y, id;
}a[maxn], b[maxn];
int n, m, p;
void rsort() {
for (int i = 1; i <= m; i++) {
tx[i] = 0;
}
for (int i = 1; i <= n; i++) {
tx[a[i].y]++;
}
for (int i = 1; i <= m; i++) {
tx[i] += tx[i - 1];
}
for (int i = 1; i <= n; i++) {
b[tx[a[i].y]--] = a[i];
}
for (int i = 1; i <= m; i++) {
tx[i] = 0;
}
for (int i = 1; i <= n; i++) {
tx[b[i].x]++;
}
for (int i = 1; i <= m; i++) {
tx[i] += tx[i - 1];
}
for (int i = n; i >= 1; i--) {
a[tx[b[i].x]--] = b[i];
}
}
void ssort() {
rsort();
p = 0;
for (int i = 1; i <= n; i++) {
if (a[i].x != a[i - 1].x || a[i].y != a[i - 1].y) {
++p;
}
rak[a[i].id] = p;
}
for (int i = 1; i <= n; i++) {
a[i].x = rak[i];
a[i].id = sa[rak[i]] = i;
a[i].y = 0;
}
m = p;
}
void solve() {
m = 127;
for (int i = 1; i <= n; i++) {
a[i].x = a[i].y = s[i];
a[i].id = i;
}
ssort();
for (int j = 1; j <= n; j <<= 1) {
for (int i = 1; i + j <= n; i++) {
a[i].y = a[i + j].x;
}
ssort();
if (p == n) {
break;
}
}
}
void get_Height() {
int k = 0;
for (int i = 1; i <= n; i++) {
if (k) {
k--;
}
int j = sa[rak[i] - 1];
while (i + k <= n && j + k <= n && s[i + k] == s[j + k]) {
k++;
}
height[rak[i]] = k;
}
}
int main() {
scanf("%s", s + 1);
n = strlen(s + 1);
solve(); get_Height();
long long ans = 0;
int mm = 1e9;
for (int i = rak[1] + 1; i <= n; i++) {
mm = min(mm, height[i]);
ans += mm;
}
mm = 1e9;
for (int i = rak[1]; i >= 1; i--) {
mm = min(mm, height[i]);
ans += mm;
}
cout << ans + n << endl;
return 0;
}
DC3算法
#pragma GCC optimize(2)
#pragma GCC optimize(3)
#pragma GCC optimize(4)
#include<bits/stdc++.h>
#define rank Rank
#define F(x) ((x)/3+((x)%3==1?0:tb))
#define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2)
using namespace std;
const int maxn=1e6*3+5;
int n;
char s[maxn];
int sa[maxn],rank[maxn],height[maxn];
int r[maxn],wa[maxn],wb[maxn],wv[maxn],wws[maxn];
void sort(int *r,int *a,int *b,int n,int m)
{
int i;
for(i=0; i<n; i++) wv[i]=r[a[i]];
for(i=0; i<m; i++) wws[i]=0;
for(i=0; i<n; i++) wws[wv[i]]++;
for(i=1; i<m; i++) wws[i]+=wws[i-1];
for(i=n-1; i>=0; i--) b[--wws[wv[i]]]=a[i];
return;
}
int c0(int *r,int a,int b)
{
return r[a]==r[b]&&r[a+1]==r[b+1]&&r[a+2]==r[b+2];
}
int c12(int k,int *r,int a,int b)
{
if(k==2) return r[a]<r[b]||r[a]==r[b]&&c12(1,r,a+1,b+1);
else return r[a]<r[b]||r[a]==r[b]&&wv[a+1]<wv[b+1];
}
void get_sa_dc3(int *r,int *sa,int n,int m)
{
int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p;
r[n]=r[n+1]=0;
for(i=0; i<n; i++) if(i%3!=0) wa[tbc++]=i;
sort(r+2,wa,wb,tbc,m);
sort(r+1,wb,wa,tbc,m);
sort(r,wa,wb,tbc,m);
for(p=1,rn[F(wb[0])]=0,i=1; i<tbc; i++) rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++;
if(p<tbc) get_sa_dc3(rn,san,tbc,p);
else for(i=0; i<tbc; i++) san[rn[i]]=i;
for(i=0; i<tbc; i++) if(san[i]<tb) wb[ta++]=san[i]*3;
if(n%3==1) wb[ta++]=n-1;
sort(r,wb,wa,ta,m);
for(i=0; i<tbc; i++) wv[wb[i]=G(san[i])]=i;
for(i=0,j=0,p=0; i<ta && j<tbc; p++) sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++];
for(; i<ta; p++) sa[p]=wa[i++];
for(; j<tbc; p++) sa[p]=wb[j++];
return;
}
void get_height(int *r, int *sa, int n)
{
int i, j, k = 0;
for (i = 1; i <= n; ++i) rank[sa[i]] = i;
for (i = 0; i < n; height[rank[i++]] = k)
for (k ? k-- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; ++k);
return;
}
int main()
{
scanf("%s", s);
int Max=1227,ans=1;
n=strlen(s);
for(int i=0;i<n;i++) r[i]=s[i];
r[n]=0;
get_sa_dc3(r,sa,n+1,Max+1);
get_height(r,sa,n);
long long res = 0;
int mm = 1e9;
for (int i = rank[0] + 1; i <= 3 * n; i++) {
mm = min(mm, height[i]);
res += mm;
}
mm = 1e9;
for (int i = rank[0]; i >= 0; i--) {
mm = min(mm, height[i]);
res += mm;
}
printf("%lld\n", res + n);
return 0;
}