解法一:后缀数组+set
国家集训队论文《后缀数组——处理字符串的有力工具
》中介绍过,长度为
n
n
的字符串不同的非空子串个数,等于
由于 ∑ni=1sa[i]=n(n+1)2 ∑ i = 1 n s a [ i ] = n ( n + 1 ) 2 ,因此整理式子得到
n(n+1)2−∑i=1nheight[i]
n
(
n
+
1
)
2
−
∑
i
=
1
n
h
e
i
g
h
t
[
i
]
因此考虑怎样动态地维护 height h e i g h t 以及 height h e i g h t 的和。为了方便考虑,假定每次不是从 S S 的末尾加入字符(因为这样所有的后缀都会变),而是从的 开头(最左端)加入字符(在开头加入和在末尾加入是等价的)。
首先一次加入所有字符,求出后缀数组。然后用一个set,以字典序为关键字维护后缀。
而对于任意一个 1≤x≤n 1 ≤ x ≤ n ,子串 [x,n] [ x , n ] 的后缀数组 SA′ S A ′ ,和全串的后缀数组 SA S A ,任意一对 x≤i,j≤n x ≤ i , j ≤ n 且 rank′[i]+1=rank′[j] r a n k ′ [ i ] + 1 = r a n k ′ [ j ] ,有:
height′[j]=mink=rank[i]+1rank[j]height[k]
h
e
i
g
h
t
′
[
j
]
=
min
k
=
r
a
n
k
[
i
]
+
1
r
a
n
k
[
j
]
h
e
i
g
h
t
[
k
]
同时,在set中插入一个后缀,最多只会有 2 2 个后缀的值被更改(即刚刚插入的后缀和它在set中的后继)。这样就能在set上统计贡献,得到 height′ h e i g h t ′ 的和。
代码:
#include <set>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
inline int read() {
int res = 0; bool bo = 0; char c;
while (((c = getchar()) < '0' || c > '9') && c != '-');
if (c == '-') bo = 1; else res = c - 48;
while ((c = getchar()) >= '0' && c <= '9')
res = (res << 3) + (res << 1) + (c - 48);
return bo ? ~res + 1 : res;
}
typedef long long ll;
const int N = 1e5 + 5, LogN = 21;
int n, s[N], sa[N], rank[N], height[N], RMQ[N][LogN], Log[N], w[N],
a[N]; ll ans;
void buildSA() {
int i, j, k, m = n; int *x = rank, *y = height;
for (i = 1; i <= n; i++) w[x[i] = s[i]]++;
for (i = 2; i <= m; i++) w[i] += w[i - 1];
for (i = 1; i <= n; i++) sa[w[x[i]]--] = i;
for (k = 1; k < n; k <<= 1, swap(x, y)) {
int tt = 0; for (i = n - k + 1; i <= n; i++) y[++tt] = i;
for (i = 1; i <= n; i++) if (sa[i] > k) y[++tt] = sa[i] - k;
memset(w, 0, sizeof(w));
for (i = 1; i <= n; i++) w[x[i]]++;
for (i = 2; i <= m; i++) w[i] += w[i - 1];
for (i = n; i; i--) sa[w[x[y[i]]]--] = y[i];
m = 0; for (i = 1; i <= n; i++) {
int u = sa[i], v = sa[i - 1];
y[u] = x[u] != x[v] || x[u + k] != x[v + k] ? ++m : m;
}
if (m == n) break;
}
if (y != rank) copy(y, y + n + 1, rank);
for (i = 1, k = height[1] = 0; i <= n; i++) {
if (k) k--; j = sa[rank[i] - 1];
while (s[i + k] == s[j + k]) k++;
height[rank[i]] = k;
}
Log[0] = -1; for (i = 1; i <= n; i++) Log[i] = Log[i >> 1] + 1;
for (i = 1; i <= n; i++) RMQ[i][0] = height[i];
for (j = 1; j <= 19; j++)
for (i = 1; i + (1 << j) - 1 <= n; i++)
RMQ[i][j] = min(RMQ[i][j - 1], RMQ[i + (1 << j - 1)][j - 1]);
}
int query(int l, int r) {
int x = Log[r - l + 1];
return min(RMQ[l][x], RMQ[r - (1 << x) + 1][x]);
}
struct comp {
inline bool operator () (const int &a, const int &b) {
return rank[a] < rank[b];
}
};
set<int, comp> cyx;
set<int, comp>::iterator it;
int mins() {
it = cyx.begin();
return *it;
}
int maxs() {
it = cyx.end();
return *--it;
}
int pre(int x) {
it = cyx.find(x);
return it == cyx.begin() ? -1 : *--it;
}
int suf(int x) {
it = cyx.find(x);
return ++it == cyx.end() ? -1 : *it;
}
void ins(int x) {
cyx.insert(x); int l = pre(x), r = suf(x);
if (l != -1) ans += query(rank[l] + 1, rank[x]);
if (r != -1) ans += query(rank[x] + 1, rank[r]);
if (l != -1 && r != -1) ans -= query(rank[l] + 1, rank[r]);
}
int main() {
int i, m; n = read(); for (i = n; i; i--) s[i] = a[i] = read();
sort(a + 1, a + n + 1); m = unique(a + 1, a + n + 1) - a - 1;
for (i = 1; i <= n; i++) s[i] = lower_bound(a + 1, a + m + 1, s[i]) - a;
buildSA(); for (i = n; i; i--) ins(i),
printf("%lld\n", (1ll * (n - i + 1) * (n - i + 2) >> 1) - ans);
return 0;
}
解法二:后缀自动机
由于此题是不断往后添加字符,因此可以使用后缀自动机在线处理,边构建边统计答案。
在后缀自动机中,一个状态
s
s
能表示出的本质不同的子串数量为:
又由于每一次添加字符之后Parent树上最多只有 3 3 <script type="math/tex" id="MathJax-Element-306">3</script>条边被改变,因此可以在Parent上统计答案。注意字符集会很大,因此要用map存转移。
代码:
#include <map>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
inline int read() {
int res = 0; bool bo = 0; char c;
while (((c = getchar()) < '0' || c > '9') && c != '-');
if (c == '-') bo = 1; else res = c - 48;
while ((c = getchar()) >= '0' && c <= '9')
res = (res << 3) + (res << 1) + (c - 48);
return bo ? ~res + 1 : res;
}
typedef long long ll;
const int N = 2e5 + 5;
int n, fa[N], maxl[N], QAQ, lst;
map<int, int> go[N]; ll ans;
void ins(int x) {
int i = lst; maxl[lst = ++QAQ] = maxl[i] + 1;
for (; i && !go[i][x]; i = fa[i]) go[i][x] = lst;
if (!i) fa[lst] = 1, ans += maxl[lst];
else {
int j = go[i][x];
if (maxl[i] + 1 == maxl[j]) fa[lst] = j, ans += maxl[lst] - maxl[j];
else {
int p = ++QAQ; fa[p] = fa[j]; maxl[p] = maxl[i] + 1; go[p] = go[j];
ans -= maxl[j] - maxl[fa[j]]; fa[lst] = fa[j] = p;
ans += maxl[p] - maxl[fa[p]];
ans += maxl[lst] - maxl[p]; ans += maxl[j] - maxl[p];
for (; i && go[i][x] == j; i = fa[i]) go[i][x] = p;
}
}
}
int main() {
int i, x; n = read(); QAQ = lst = 1; for (i = 1; i <= n; i++)
x = read(), ins(x), printf("%lld\n", ans);
return 0;
}