题意:
题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=2457
给出只包含AGCT四个字符的基因序列,其中给出n个字符串,要求序列中不含有这些子串,问最少改变多少个字符?
思路:
AC自动机+dp,不能存在的子串构成一个AC自动机,然后在自动机上dp,dp[x][y]表示第x个字符已经到了自动机结点y之后能修改的最少步骤。判断字符是否需要修改,然后转移即可。这里采用了记忆化搜索,直接for循环也可以。
重点要注意build中end要保存失配之后结点的所有信息。
代码:
#include <bits/stdc++.h>
using namespace std;
const int INF = 0x3f3f3f3f;
struct ACauto {
int next[1005][5], fail[1005], end[1005];
int root, sz;
int newnode() {
for (int i = 0; i < 4; i++)
next[sz][i] = -1;
end[sz++] = 0;
return sz - 1;
}
void init() {
sz = 0;
root = newnode();
}
int idx(char c) {
if (c == 'A') return 0;
if (c == 'C') return 1;
if (c == 'G') return 2;
return 3;
}
void insert(char *buf, int id) {
int len = strlen(buf);
int now = root;
for (int i = 0; i < len; i++) {
int id = idx(buf[i]);
if (next[now][id] == -1)
next[now][id] = newnode();
now = next[now][id];
}
end[now]++;
}
void build() {
queue <int> Q;
fail[root] = root;
for (int i = 0; i < 4; i++) {
if (next[root][i] == -1)
next[root][i] = root;
else {
fail[next[root][i]] = root;
Q.push(next[root][i]);
}
}
while (!Q.empty()) {
int now = Q.front();
Q.pop();
end[now] += end[fail[now]]; // 关键
for (int i = 0; i < 4; i++) {
if (next[now][i] == -1)
next[now][i] = next[fail[now]][i];
else {
fail[next[now][i]] = next[fail[now]][i];
Q.push(next[now][i]);
}
}
}
}
} ac;
int len;
int dp[1005][1005];
char str[1005];
const char d[] = {'A', 'C', 'G', 'T'};
int dfs(int x, int y) {
if (x == len) return 0;
if (dp[x][y] != -1) return dp[x][y];
int res = INF;
for (int i = 0; i < 4; i++) {
int nx = x + 1, ny = ac.next[y][i];
if (ac.end[ny] > 0) continue;
if (d[i] == str[x])
res = min(res, dfs(nx, ny));
else
res = min(res, dfs(nx, ny) + 1);
}
dp[x][y] = res;
return res;
}
int main() {
//freopen("in.txt", "r", stdin);
int n, cs = 0;
while (scanf("%d", &n), n) {
ac.init();
for (int i = 1; i <= n; i++) {
scanf("%s", str);
ac.insert(str, i);
}
ac.build();
/*for (int i = 0; i < ac.sz; i++) {
printf("%d : ", i);
for (int j = 0; j < 4; j++)
printf("%d ", ac.next[i][j]);
printf("\n");
}*/
scanf("%s", str);
len = strlen(str);
memset(dp, -1, sizeof(dp));
int ans = dfs(0, 0);
if (ans >= INF) printf("Case %d: -1\n", ++cs);
else printf("Case %d: %d\n", ++cs, ans);
}
return 0;
}