【思路要点】
- 首先在后缀 T r i e Trie Trie 上考虑本题,考虑枚举不同的位置在后缀 T r i e Trie Trie 上对应的节点,则后接的字符串应当对应相同。
- 那么,维护各节点 i i i 子树中长度为 M M M 的字符串的后 M − d e p t h i M-depth_i M−depthi 位的哈希值,则后接字符串对应相同即为哈希值对应相同,启发式合并即可更新答案。
- 需要对哈希表进行整体加的操作,因此需要为每个哈希表维护一个全局加减标记。
- 不难发现上述算法可以直接拓展到后缀树上,只需要额外实现一个字符串区间哈希值查询即可。
- 时间复杂度 O ( N L o g N × α ) O(NLogN\times\alpha) O(NLogN×α) ,其中 α \alpha α 为使用 u n o r d e r e d _ m a p unordered\_map unordered_map 的常数因子。
【代码】
#include<bits/stdc++.h> using namespace std; const int MAXN = 2e5 + 5; const int P = 1e5 + 3; typedef long long ll; typedef long double ld; typedef unsigned long long ull; template <typename T> void chkmax(T &x, T y) {x = max(x, y); } template <typename T> void chkmin(T &x, T y) {x = min(x, y); } template <typename T> void read(T &x) { x = 0; int f = 1; char c = getchar(); for (; !isdigit(c); c = getchar()) if (c == '-') f = -f; for (; isdigit(c); c = getchar()) x = x * 10 + c - '0'; x *= f; } template <typename T> void write(T x) { if (x < 0) x = -x, putchar('-'); if (x > 9) write(x / 10); putchar(x % 10 + '0'); } template <typename T> void writeln(T x) { write(x); puts(""); } unordered_map <ull, int> ans; char s[MAXN]; int n, m, ind[MAXN]; ull base[MAXN], pre[MAXN]; ull queryHash(int l, int r) { return pre[r] - pre[l - 1] * base[r - l + 1]; } namespace SuffixAutomaton { const int MAXN = 4e5; const int MAXC = 5; int root, size, last; vector <int> a[MAXN]; int child[MAXN][MAXC], home[MAXN]; int fail[MAXN], depth[MAXN], cnt[MAXN]; unordered_map <ull, int> scnt[MAXN]; ull delta[MAXN]; void merge(int x, int y) { if (scnt[x].size() < scnt[y].size()) { swap(delta[x], delta[y]); swap(scnt[x], scnt[y]); } for (auto v : scnt[y]) scnt[x][v.first + delta[y] - delta[x]] += v.second; } void debug(int pos) { cerr << pos << ' ' << delta[pos] << endl; for (auto x : scnt[pos]) cerr << x.first << ' ' << x.second << endl; cerr << endl; } void work(int pos) { for (auto x : a[pos]) { if (depth[x] >= m) scnt[x][queryHash(home[x] + depth[pos] + 1, home[x] + m - 1)] += cnt[x]; else { work(x); delta[x] += queryHash(home[x] + depth[pos] + 1, home[x] + depth[x] - 1) * base[m - depth[x]]; } //debug(x); } for (unsigned i = 0; i < a[pos].size(); i++) for (unsigned j = i + 1; j < a[pos].size(); j++) { int x = a[pos][i], y = a[pos][j]; if (scnt[x].size() < scnt[y].size()) swap(x, y); //debug(x), debug(y); for (auto v : scnt[y]) { if (scnt[x].count(v.first + delta[y] - delta[x])) { int inc = scnt[x][v.first + delta[y] - delta[x]], jnc = v.second; ans[v.first + delta[y] + queryHash(home[y], home[y] + depth[pos]) * base[m - depth[pos] - 1]] += inc; ans[v.first + delta[y] + queryHash(home[x], home[x] + depth[pos]) * base[m - depth[pos] - 1]] += jnc; } } } for (auto x : a[pos]) { delta[x] += queryHash(home[x] + depth[pos], home[x] + depth[pos]) * base[m - depth[pos] - 1]; merge(pos, x); } } int newnode(int dep) { fail[size] = 0; depth[size] = dep; memset(child[size], 0, sizeof(child[size])); return size++; } void extend(int ch, int from) { int p = last, np = newnode(depth[last] + 1); while (child[p][ch] == 0) { child[p][ch] = np; p = fail[p]; } if (child[p][ch] == np) fail[np] = root; else { int q = child[p][ch]; if (depth[q] == depth[p] + 1) fail[np] = q; else { int nq = newnode(depth[p] + 1); fail[nq] = fail[q]; fail[q] = fail[np] = nq; memcpy(child[nq], child[q], sizeof(child[q])); while (child[p][ch] == q) { child[p][ch] = nq; p = fail[p]; } } } cnt[last = np]++; home[np] = from; } void dfs(int pos) { for (auto x : a[pos]) { dfs(x); cnt[pos] += cnt[x]; home[pos] = home[x]; if (depth[pos] < m && depth[x] >= m) ans[queryHash(home[x], home[x] + m - 1)] += cnt[x]; } } void init(int *s) { size = 0, root = last = newnode(0); for (int i = n; i >= 1; i--) extend(s[i], i); for (int i = 1; i < size; i++) a[fail[i]].push_back(i); dfs(0); } } int main() { read(n), read(m), scanf("%s", s + 1); base[0] = 1; for (int i = 1; i <= n; i++) { if (s[i] == 'A') ind[i] = 1; if (s[i] == 'C') ind[i] = 2; if (s[i] == 'G') ind[i] = 3; if (s[i] == 'T') ind[i] = 4; pre[i] = pre[i - 1] * P + ind[i]; base[i] = base[i - 1] * P; } SuffixAutomaton :: init(ind); SuffixAutomaton :: work(0); for (int i = 1; i <= n - m + 1; i++) printf("%d ", ans[queryHash(i, i + m - 1)] - 1); return 0; }