推荐:
AcWing 1282. 搜索关键词(Trie+KMP+bfs)
AcWing 1053. 修复DNA【线性DP+AC自动机模型+优化思路】
讲的很清楚,基本上把算法提高课这一节的内容,从KMP到AC自动机到优化版都讲明白了(虽然我还不是很懂但讲的比我好比我详细,强推!)
(而且我修复DNA的代码还是跟着打的…嗯可以直接看这俩链接不用往下看了
KMP求next数组:
next[0] = next[1] = 0;
for (int i = 2; i <= m; i++)
{
int j = next[i - 1];
while (j && p[i] != p[j + 1]) j = next[j];
if (s[i] == p[j + 1]) j++;
next[i] = j;
}
改成AC自动机时,将链式结构对应到树形结构,这里的一个字母对应同深度的所有节点,算第i层用到前i-1层信息,应该按深度遍历
while (队列不为空)
{
t = q[hh + 1];//对应第i - 1层
for (c = 0; c < 26; c++)//枚举儿子
{
ch = tr[t, c];//对应第i层
j = next[t];
while (j && !tr[j, c])//j的儿子不存在c
j = next[j];
if (tr[j, c]) j = tr[j, c];
next[ch] = j;
把ch加入队列
}
}
题目:AcWing1282. 搜索关键词
(没买算法提高课的别看了做不了)
代码:(手写queue写少了条件写成了h < t以及遍历时条件用了i < strlen(str)导致一直超时我真的不知道为啥T^T)
#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
using namespace std;
const int N = 10005, M = 1000005, S = 55;
char str[M];
int ne[N*S], cnt[N*S], q[N*S];
int tr[N*S][26];
int idx;
void insert()
{
int p = 0;
for (int i = 0; str[i]; i++)
{
int c = str[i] - 'a';
if (!tr[p][c]) tr[p][c] = ++idx;
p = tr[p][c];
}
cnt[p]++;
}
void build()
{
int h = 0, t = -1;
for (int i = 0; i < 26; i++)
if (tr[0][i]) q[++t] = tr[0][i];
while (h <= t)
{
int x = q[h++];
for (int i = 0; i < 26; i++)
{
int ch = tr[x][i];
if (!ch) continue;
int j = ne[x];
while (j && !tr[j][i]) j = ne[j];
if (tr[j][i]) j = tr[j][i];
ne[ch] = j;
q[++t] = ch;
}
}
}
int main()
{
int T, n;
scanf("%d", &T);
while (T--)
{
idx = 0;
memset(tr, 0, sizeof tr);
memset(ne, 0, sizeof ne);
memset(cnt, 0, sizeof cnt);
scanf("%d", &n);
for (int i = 0; i < n; i++)
{
scanf("%s", str);
insert();
}
scanf("%s", str);
build();
int res = 0;
for (int i = 0, j = 0; str[i]; i++)
{
int c = str[i] - 'a';
while (j && !tr[j][c]) j = ne[j];
if (tr[j][c]) j = tr[j][c];
int p = j;
while (p)
{
res += cnt[p];
cnt[p] = 0;
p = ne[p];
}
}
printf("%d\n", res);
}
return 0;
}
优化:即使在没有匹配时,也要将trie树构建完整,这样求next数组以及匹配时就不用跳多次
#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
using namespace std;
const int N = 10005, M = 1000005, S = 55;
char str[M];
int ne[N*S], cnt[N*S], q[N*S];
int tr[N*S][26];
int idx;
void insert()
{
int p = 0;
for (int i = 0; str[i]; i++)
{
int c = str[i] - 'a';
if (!tr[p][c]) tr[p][c] = ++idx;
p = tr[p][c];
}
cnt[p]++;
}
void build()
{
int h = 0, t = -1;
for (int i = 0; i < 26; i++)
if (tr[0][i]) q[++t] = tr[0][i];
while (h <= t)
{
int x = q[h++];
for (int i = 0; i < 26; i++)
{
int ch = tr[x][i];
if (!ch) tr[x][i] = tr[ne[x]][i];
else
{
ne[ch] = tr[ne[x]][i];
q[++t] = ch;
}
}
}
}
int main()
{
int T, n;
scanf("%d", &T);
while (T--)
{
idx = 0;
memset(tr, 0, sizeof tr);
memset(ne, 0, sizeof ne);
memset(cnt, 0, sizeof cnt);
scanf("%d", &n);
for (int i = 0; i < n; i++)
{
scanf("%s", str);
insert();
}
build();
scanf("%s", str);
int res = 0;
for (int i = 0, j = 0; str[i]; i++)
{
int c = str[i] - 'a';
j = tr[j][c];
int p = j;
while (p)
{
res += cnt[p];
cnt[p] = 0;
p = ne[p];
}
}
printf("%d\n", res);
}
return 0;
}
题目:AcWing 1053. 修复DNA
#include <iostream>
#include <cstdio>
#include <string>
#include <cstring>
using namespace std;
const int N = 1005, INF = 0x3f3f3f3f;
int tr[N][4], ne[N], cnt[N], idx;
int q[N];
char str[N];
int f[N][N];
int get(char c)
{
if (c == 'A') return 0;
if (c == 'T') return 1;
if (c == 'C') return 2;
return 3;
}
void insert()
{
int x = 0;
for (int i = 0; i < strlen(str); i++)
{
int ch = get(str[i]);
if (!tr[x][ch]) tr[x][ch] = ++idx;
x = tr[x][ch];
}
cnt[x]++;
}
void build()
{
int h = 0, t = -1;
for (int i = 0; i < 4; i++)
if (tr[0][i]) q[++t] = tr[0][i];
while (h <= t)
{
int x = q[h++];
for (int i = 0; i < 4; i++)
{
int ch = tr[x][i];
if (!ch) tr[x][i] = tr[ne[x]][i];
else
{
ne[ch] = tr[ne[x]][i];
q[++t] = ch;
}
}
}
}
int main()
{
int T = 1;
int n;
while (scanf("%d", &n) && n)
{
idx = 0;
memset(ne, 0, sizeof ne);
memset(tr, 0, sizeof tr);
memset(cnt, 0, sizeof cnt);
for (int i = 0; i < n; i++)
{
scanf("%s", str);
insert();
}
build();
scanf("%s", str);
n = strlen(str);
memset(f, 0x3f, sizeof f);
f[0][0] = 0;
for (int i = 0; i < n; i++)
{
for (int j = 0; j <= idx; j++)
{
for (int k = 0; k < 4; k++)
{
int cost = (get(str[i]) != k);
int p = tr[j][k];
int fl = 1;
int t = p;
while (t)
{
if (cnt[t])
{
fl = 0;
break;
}
t = ne[t];
}
if (fl) f[i + 1][p] = min(f[i + 1][p], f[i][j] + cost);
}
}
}
int res = INF;
for (int j = 0; j <= idx; j++)
res = min(res, f[n][j]);
if (res == INF) res = -1;
printf("Case %d: %d\n", T, res);
T++;
}
return 0;
}
优化版,其实就是把求解时沿next一直跳的过程在build里预处理了,妙
#include <iostream>
#include <cstdio>
#include <string>
#include <cstring>
using namespace std;
const int N = 1005, INF = 0x3f3f3f3f;
int tr[N][4], ne[N], cnt[N], idx;
int q[N];
char str[N];
int f[N][N];
int get(char c)
{
if (c == 'A') return 0;
if (c == 'T') return 1;
if (c == 'C') return 2;
return 3;
}
void insert()
{
int x = 0;
for (int i = 0; i < strlen(str); i++)
{
int ch = get(str[i]);
if (!tr[x][ch]) tr[x][ch] = ++idx;
x = tr[x][ch];
}
cnt[x] = 1;
}
void build()
{
int h = 0, t = -1;
for (int i = 0; i < 4; i++)
if (tr[0][i]) q[++t] = tr[0][i];
while (h <= t)
{
int x = q[h++];
for (int i = 0; i < 4; i++)
{
int ch = tr[x][i];
if (!ch) tr[x][i] = tr[ne[x]][i];
else
{
ne[ch] = tr[ne[x]][i];
cnt[ch] |= cnt[ne[ch]];
q[++t] = ch;
}
}
}
}
int main()
{
int T = 1;
int n;
while (scanf("%d", &n) && n)
{
idx = 0;
memset(ne, 0, sizeof ne);
memset(tr, 0, sizeof tr);
memset(cnt, 0, sizeof cnt);
for (int i = 0; i < n; i++)
{
scanf("%s", str);
insert();
}
build();
scanf("%s", str);
n = strlen(str);
memset(f, 0x3f, sizeof f);
f[0][0] = 0;
for (int i = 0; i < n; i++)
{
for (int j = 0; j <= idx; j++)
{
for (int k = 0; k < 4; k++)
{
int cost = (get(str[i]) != k);
int p = tr[j][k];
if (!cnt[p]) f[i + 1][p] = min(f[i + 1][p], f[i][j] + cost);
}
}
}
int res = INF;
for (int j = 0; j <= idx; j++)
res = min(res, f[n][j]);
if (res == INF) res = -1;
printf("Case %d: %d\n", T, res);
T++;
}
return 0;
}
题目:AcWing 1285. 单词
每个单词s出现多少次实际上就是树上有多少前缀以s为后缀,这样不好算所以可以每个“后缀”对“前缀”节点做贡献,也就是以x为结尾的后缀会对以next[x]结尾的前缀做贡献
每个节点x对next[x]的答案做贡献,所以可以沿着树的底层节点往上跳,顺序就是深度遍历反过来,所以可以直接用队列的逆序(妙啊,要是我已经用stl的queue再用vector倒一次了)
因为要计数的是前缀所以插入时每个字母都要加cnt
(好绕好绕好绕
(以及学到这儿了才发现大家都用fail吗…
#include <iostream>
#include <cstdio>
#include <cstring>
using namespace std;
const int N = 205, M = 1000005;
char str[M];
int tr[M][26], ne[M], cnt[M], idx, id[N];
int q[M], f[M];
void insert(int x)
{
int p = 0;
for (int i = 0; str[i]; i ++)
{
int ch = str[i] - 'a';
if (!tr[p][ch]) tr[p][ch] = ++idx;
p = tr[p][ch];
cnt[p]++;
}
id[x] = p;
}
void build()
{
int h = 0, t = -1;
for (int i = 0; i < 26; i++)
if (tr[0][i]) q[++t] = tr[0][i];
while (h <= t)
{
int x = q[h++];
for (int i = 0; i < 26; i++)
{
int ch = tr[x][i];
if (!ch) tr[x][i] = tr[ne[x]][i];
else
{
ne[ch] = tr[ne[x]][i];
q[++t] = ch;
}
}
}
}
int main()
{
idx = 0;
memset(cnt, 0, sizeof cnt);
memset(ne, 0, sizeof ne);
memset(tr, 0, sizeof tr);
int n;
scanf("%d", &n);
for (int i = 0; i < n; i++)
{
scanf("%s", str);
insert(i);
}
build();
scanf("%s", str);
memset(f, 0, sizeof f);
for (int i = idx; i >= 0; i--)
{
int x = q[i];
f[x] += cnt[x];
f[ne[x]] += f[x];
}
for (int i = 0; i < n; i++)
printf("%d\n", f[id[i]]);
return 0;
}