http://poj.org/problem?id=3691
题意:
给出n个DNA病毒串,然后给出一个需要修改的DNA片段,问需要最少修改多少个字符才能是该DNA片段不含DNA病毒串,修改后的DNA片段长度不变
思路:
这题看了一天了,DP那地方好难懂。首先这里是多串匹配,我们用Trie树和fail的构造确定性有限状态自动机(DFA),然后再DFA上进行DP;
这里我DP理解了很长时间,dp[i][j]表示主串匹配到了第i个位置,然后到达的是AC自动机上的j状态时修改字符的个数,我们保证j状态不是模式串(DNA病毒串)的结束节点,然后不断地往后走选出一条匹配完主串,并且修改字符串数最少的的一条。
dp[i + 1][H[j].next[k]->id] = min(dp[i + 1][H[j].next[k]->id],dp[i][j] + add);
//#pragma comment(linker,"/STACK:327680000,327680000")
#include <iostream>
#include <cstdio>
#include <cmath>
#include <vector>
#include <cstring>
#include <algorithm>
#include <string>
#include <set>
#include <functional>
#include <numeric>
#include <sstream>
#include <stack>
#include <map>
#include <queue>
#define CL(arr, val) memset(arr, val, sizeof(arr))
#define lc l,m,rt<<1
#define rc m + 1,r,rt<<1|1
#define pi acos(-1.0)
#define ll long long
#define L(x) (x) << 1
#define R(x) (x) << 1 | 1
#define MID(l, r) (l + r) >> 1
#define Min(x, y) (x) < (y) ? (x) : (y)
#define Max(x, y) (x) < (y) ? (y) : (x)
#define E(x) (1 << (x))
#define iabs(x) (x) < 0 ? -(x) : (x)
#define OUT(x) printf("%I64d\n", x)
#define lowbit(x) (x)&(-x)
#define Read() freopen("din.txt", "r", stdin)
#define Write() freopen("dout.txt", "w", stdout);
#define M 23
#define N 1000007
using namespace std;
const int inf = 0x7f7f7f7f;
struct node
{
node *next[4];
node *fail;
int cnt;
int id;
void newnode()
{
fail = NULL;
cnt = 0;
for (int i= 0; i < 4; ++i)
{
next[i] = NULL;
}
}
};
class Ac_automat
{
public:
node *root,*q[N],H[N];
int fr,tl;
int t,dp[1007][1007];
void init()
{
fr = tl = 0;
t = 0;
H[t].newnode();
H[t].id = t;//对Trie树上的每一个节点给一个状态号
root = &H[t++];
}
int getVal(char ch)
{
if (ch == 'A') return 0;
else if (ch == 'G') return 1;
else if (ch == 'C') return 2;
else return 3;
}
void insert(char *s)
{
int i,k;
int len = strlen(s);
node *p = root;
for (i = 0; i < len; ++i)
{
k = getVal(s[i]);
if (p->next[k] == NULL)
{
H[t].newnode();
H[t].id = t;//给定状态的编号
p->next[k] = &H[t++];
}
p = p->next[k];
}
p->cnt = 1;
}
void build()
{
root->fail = NULL;
q[tl] = root;
while (fr <= tl)
{
node *tmp = q[fr++];
//这里解决出现aact aac这种情况,只要包含aac就是非法的的有病毒的
if (tmp != root)
tmp->cnt = tmp->cnt||tmp->fail->cnt;
for (int i = 0; i < 4; ++i)
{
//这里要更新每一个next为空的指针,好循环匹配
if (tmp->next[i] == NULL)
{
if (tmp == root) tmp->next[i] = root;
else tmp->next[i] = tmp->fail->next[i];
}
else//这里还是构造匹配失败指针
{
if (tmp == root) tmp->next[i]->fail = root;
else
{
node *p = tmp->fail;
while (p != NULL)
{
if (p->next[i])
{
tmp->next[i]->fail = p->next[i];
break;
}
p = p->fail;
}
if (p == NULL) tmp->next[i]->fail = root;
}
q[++tl] = tmp->next[i];
}
}
}
}
void solve(char *s,int cas)
{
int i,j,k;
int len = strlen(s);
for (i = 0; i <= len; ++i)
{
for (j = 0; j < t; ++j)
{
dp[i][j] = inf;
}
}
dp[0][0] = 0;//根节点开始
for (i = 0; i < len; ++i)//走主串
{
for (j = 0; j < t; ++j)//枚举每一个状态
{
if (dp[i][j] != inf && H[j].cnt== 0)//该状态合法
{
//printf(">>>>>\n");
for (k = 0; k < 4; ++k)//枚举可走的边
{
if (H[j].next[k]->cnt != 0) continue;
int add = (k != getVal(s[i]));
dp[i + 1][H[j].next[k]->id] = min(dp[i + 1][H[j].next[k]->id],dp[i][j] + add);
}
}
}
}
int ans = -1;
for (i = 0; i < t; ++i)
{
// printf("%d\n",dp[len][i]);
if (dp[len][i] != inf && (ans == -1 || dp[len][i] < ans)) ans = dp[len][i];
}
printf("Case %d: %d\n",cas++,ans);
}
}ac;
int n;
char str[N],ts[1007];
int main()
{
int i;
int cas = 1;
while (~scanf("%d",&n))
{
if (!n) break;
ac.init();
for (i = 0; i < n; ++i)
{
scanf("%s",str);
ac.insert(str);
}
ac.build();
scanf("%s",ts);
ac.solve(ts,cas);
cas++;
}
return 0;
}