DNA Sequence
Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 11198 | Accepted: 4269 |
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3 AT AC AG AA
Sample Output
36
题意:输入m和n求出不包含上面m个序列且长度为n的序列的种数。。
分析:有多组字符串要先处理,所以肯定与AC自动机有关了,按照一般的形式建立字典树,AC自动机(只是注意从根到他的fail是他的最长后缀,如果这个节点的fail指针的end大于0,说明这个节点也是危险的),然后我们扫描各个节点,看加上去一个字符是否是危险的,而且之前我们已经够成了一个图,在这个节点加上一个字符后可能会转移到另一个节点(son为-1代表这个节点之后没有接这个字符的病毒),我们就需要判断转移到的那个节点是不是危险的(如果son不为-1,可以从end查看是不是危险的,end==0说明不是危险的),如果son为-1的,但是end不为0的话,我们就只能去看fail了,一直沿fail找下去直到找到son不为-1的也就是表中存在这个转移(也要满足基本要求end==0),我们开一个二维数组记录节点的转移(跳转表),g[i][j]代表从i节点转移到j节点,如果可以转移到自身就相当于g[i][i]++;
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<queue>
using namespace std;
const int size=120;
const int MOD=100000;
__int64 g[size][size];
struct node
{
int fail,son[4],end;
void init()
{
fail=-1;
memset(son,-1,sizeof(son));
end=0;
}
}node[size];
int num;
queue<int> q;
int Index(char a)
{
switch(a)
{
case 'A': return 0;
case 'G': return 1;
case 'C': return 2;
case 'T': return 3;
}
}
void insert(char *str)
{
int i=0,index;
int p=0;
while(str[i])
{
index=Index(str[i]);
if(node[p].end)
break;
if(node[p].son[index]==-1)
{
num++;
node[p].son[index]=num;
node[num].init();
}
p=node[p].son[index];
i++;
}
node[p].end++;
}
void build_ac_autuomation()
{
int temp = 0, p = 0, i;
q.push(0);
while (!q.empty())
{
temp=q.front();
q.pop();
for (i = 0; i < 4; i++)
if (node[temp].son[i] != -1)
{
if (temp == 0) node[node[temp].son[i]].fail = 0;
else
{
p = node[temp].fail;
while (p != -1)
{
if (node[p].son[i] != -1)
{
node[node[temp].son[i]].fail = node[p].son[i];
if (node[node[p].son[i]].end)
node[node[temp].son[i]].end++;
break;
}
p = node[p].fail;
}
if (p == -1) node[node[temp].son[i]].fail = 0;
}
q.push(node[temp].son[i]);
}
}
}
void MatrixMul(__int64 b[][size], __int64 c[][size], int sz)
{
int i, j, k;
__int64 temp[size][size] = {0};
for (i = 0; i < sz; i++)
for (j = 0; j < sz; j++)
for (k = 0; k < sz; k++)
{
temp[i][j] += b[i][k]*c[k][j];
if (temp[i][j] >= MOD)
temp[i][j] %= MOD;
}
for (i = 0; i < sz; i++)
for (j = 0; j < sz; j++)
b[i][j] = temp[i][j];
}
void MatrixPow(__int64 tot[][size], __int64 a[][size], int sz, int n)
{
while (n > 0)
{
if (n&1) MatrixMul(tot,a, sz);
MatrixMul(a, a, sz);
n >>= 1;
}
}
__int64 tot[size][size];
char s[15];
int n;
int m;
int main()
{
int i,j;
while(scanf("%d%d",&m,&n)!=EOF)
{
num=0;
node[0].init();
memset(g,0,sizeof(g));
for(i=0;i<m;i++)
{
scanf("%s",s);
insert(s);
}
build_ac_autuomation();
num++;
for(i=0;i<num;i++)
if(!node[i].end) //这个节点自身肯定不能是危险的,不然就没意义了
for(j=0;j<4;j++)
{
if(node[i].son[j]!=-1&&node[node[i].son[j]].end==0) //可以从一个状态转移到另一个状态
g[i][node[i].son[j]]++;
else if(node[i].son[j]==-1)
{
if(i==0) //是根节点就可以直接加
g[0][0]++;
else
{
int temp=i;
while(node[temp].son[j]==-1) //一直沿fail找下去,直到找到son不为-1,也就是存在一个状态的转移
{
if(temp==0)
break;
temp=node[temp].fail;
}
if(node[temp].son[j]!=-1&&node[node[temp].son[j]].end==0) //转移符合要求,end==0;
g[i][node[temp].son[j]]++;
else if(node[temp].son[j]==-1&&temp==0) //转移到自身了
g[i][0]++;
}
}
}
memset(tot, 0, sizeof(tot));
for (i = 0; i < num; i++)
tot[i][i] = 1;
MatrixPow(tot, g, num, n);
__int64 ans = 0;
for (i = 0; i < num; i++)
if (node[i].end==0)
{
ans += tot[0][i];
if (ans>=MOD) ans %= MOD;
}
printf("%I64d\n",ans);
}
return 0;
}