1 介绍
本博客用来记录AC自动机相关题目。
AC自动机是以Trie的结构为基础,结合KMP的思想建立的自动机,用于解决多模式匹配等任务。
2 训练
题目1:1282搜索关键词
C++代码如下,
#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
const int N = 10010, S = 55, M = 1000010;
int n;
int tr[N*S][26], cnt[N*S], idx;
char str[M];
int q[N*S], ne[N*S];
void insert() {
int p = 0;
for (int i = 0; str[i]; ++i) {
int t = str[i] - 'a';
if (!tr[p][t]) tr[p][t] = ++idx;
p = tr[p][t];
}
cnt[p]++;
}
void build() {
int hh = 0, tt = -1;
for (int i = 0; i < 26; ++i) {
if (tr[0][i]) {
q[++tt] = tr[0][i];
}
}
while (hh <= tt) {
int t = q[hh++];
for (int i = 0; i < 26; ++i) {
int p = tr[t][i];
if (!p) tr[t][i] = tr[ne[t]][i];
else {
ne[p] = tr[ne[t]][i];
q[++tt] = p;
}
}
}
}
int main() {
int T;
scanf("%d", &T);
while (T--) {
memset(tr, 0, sizeof tr);
memset(cnt, 0, sizeof cnt);
memset(ne, 0, sizeof ne);
idx = 0;
scanf("%d", &n);
for (int i = 0; i < n; ++i) {
scanf("%s", str);
insert();
}
build();
scanf("%s", str);
int res = 0;
for (int i = 0, j = 0; str[i]; ++i) {
int t = str[i] - 'a';
j = tr[j][t];
int p = j;
while (p) {
res += cnt[p];
cnt[p] = 0;
p = ne[p];
}
}
printf("%d\n", res);
}
return 0;
}
用string类型的find()函数的暴力写法,通过了 12/13个数据,
#include <iostream>
#include <algorithm>
#include <cstring>
#include <vector>
using namespace std;
int main() {
int T;
cin >> T;
while (T--) {
int n;
cin >> n;
vector<string> words(n);
for (int i = 0; i < n; ++i) cin >> words[i];
string article;
cin >> article;
int res = 0;
for (auto word : words) {
if (article.find(word) != string::npos) res++;
}
cout << res << endl;
}
return 0;
}
题目2:1285单词
C++代码如下,
#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
const int N = 1000010;
int n;
int tr[N][26], f[N], idx;
int q[N], ne[N];
char str[N];
int id[210];
void insert(int x) {
int p = 0;
for (int i = 0; str[i]; i++) {
int t = str[i] - 'a';
if (!tr[p][t]) tr[p][t] = ++idx;
p = tr[p][t];
f[p]++;
}
id[x] = p;
}
void build() {
int hh = 0, tt = -1;
for (int i = 0; i < 26; ++i) {
if (tr[0][i]) {
q[++tt] = tr[0][i];
}
}
while (hh <= tt) {
int t = q[hh++];
for (int i = 0; i < 26; ++i) {
int &p = tr[t][i];
if (!p) p = tr[ne[t]][i];
else {
ne[p] = tr[ne[t]][i];
q[++tt] = p;
}
}
}
}
int main() {
scanf("%d", &n);
for (int i = 0; i < n; ++i) {
scanf("%s", str);
insert(i);
}
build();
for (int i = idx - 1; i >= 0; --i) f[ne[q[i]]] += f[q[i]];
for (int i = 0; i < n; ++i) printf("%d\n", f[id[i]]);
return 0;
}
使用kmp来做,超时了,通过了 9/11个数据
#include <cstring>
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
using namespace std;
void kmp(string &pattern_str, string &match_str) {
//cout << "pattern_str = " << pattern_str << ", match_str = " << match_str << endl;
int n = pattern_str.size();
int m = match_str.size();
pattern_str = '#' + pattern_str;
match_str = '$' + match_str;
//求取next数组ne
vector<int> ne(n + 10, 0);
for (int i = 2, j = 0; i <= n; ++i) {
while (j && pattern_str[i] != pattern_str[j+1]) j = ne[j];
if (pattern_str[i] == pattern_str[j+1]) j++;
ne[i] = j;
}
//返回匹配的下标位置
vector<int> idxs; //匹配串match_str中的下标位置,表示从改下标起,与模式串pattern_str匹配
for (int i = 1, j = 0; i <= m; ++i) {
while (j && match_str[i] != pattern_str[j+1]) j = ne[j];
if (match_str[i] == pattern_str[j+1]) j++;
if (j == n) {
//cout << i - n << " ";
idxs.emplace_back(i-n);
j = ne[j];
}
}
// for (int idx : idxs) {
// cout << idx << " ";
// }
// cout << endl;
cout << idxs.size() << endl;
pattern_str = pattern_str.substr(1);
match_str = match_str.substr(1);
return;
}
int main() {
int n;
cin >> n;
vector<string> words(n);
for (int i = 0; i < n; ++i) cin >> words[i];
string article;
for (auto word : words) article += word + "?";
for (auto word : words) {
kmp(word, article);
}
return 0;
}
题目3:1053修复DNA
C++代码如下,
#include <iostream>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
const int N = 1010;
int n, m;
int tr[N][4], dar[N], idx;
int q[N], ne[N];
char str[N];
int f[N][N];
int get(char c) {
if (c == 'A') return 0;
if (c == 'T') return 1;
if (c == 'G') return 2;
return 3;
}
void insert() {
int p = 0;
for (int i = 0; str[i]; ++i) {
int t = get(str[i]);
if (tr[p][t] == 0) tr[p][t] = ++idx;
p = tr[p][t];
}
dar[p] = 1;
}
void build() {
int hh = 0, tt = -1;
for (int i = 0; i < 4; ++i) {
if (tr[0][i]) {
q[++tt] = tr[0][i];
}
}
while (hh <= tt) {
int t = q[hh++];
for (int i = 0; i < 4; ++i) {
int p = tr[t][i];
if (!p) tr[t][i] = tr[ne[t]][i];
else {
ne[p] = tr[ne[t]][i];
q[++tt] = p;
dar[p] |= dar[ne[p]];
}
}
}
}
int main() {
int T = 1;
while (scanf("%d", &n), n) {
memset(tr, 0, sizeof tr);
memset(dar, 0, sizeof dar);
memset(ne, 0, sizeof ne);
idx = 0;
for (int i = 0; i < n; ++i) {
scanf("%s", str);
insert();
}
build();
scanf("%s", str + 1);
m = strlen(str + 1);
memset(f, 0x3f, sizeof f);
f[0][0] = 0;
for (int i = 0; i < m; ++i) {
for (int j = 0; j <= idx; ++j) {
for (int k = 0; k < 4; ++k) {
int t = get(str[i+1]) != k;
int p = tr[j][k];
if (!dar[p]) f[i+1][p] = min(f[i+1][p], f[i][j] + t);
}
}
}
int res = 0x3f3f3f3f;
for (int i = 0; i <= idx; ++i) res = min(res, f[m][i]);
if (res == 0x3f3f3f3f) res = -1;
printf("Case %d: %d\n", T++, res);
}
return 0;
}