DNA Sequence
Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 6232 | Accepted: 2213 |
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3
AT
AC
AG
AA
Sample Output
36
题意:给定m个DNA片段(A,C,G,T),求出长度为n的所有DNA中不含这些片段的有多少
(0 <= m <= 10, 1 <= n <=2000000000)
源代码:
#include<iostream>
using namespace std;
const int KIND=4;
const int MAX=105;
struct TrieNode
{
bool unsafe;
int index;
TrieNode *fail;
TrieNode *next[KIND];
};
TrieNode memory[MAX];
int allocp;
TrieNode *q[MAX];
int m,c2i[100];
int n;
//n次的矩阵中,dp[i][j]表示从i状态到j状态之间连接n个字符,有dp[i][j]种安全的组合
__int64 g[MAX][MAX],dp[MAX][MAX];
TrieNode *CreateTrieNode()
{
TrieNode *p=&memory[allocp];
p->unsafe=false;
p->index=allocp;
allocp++;
p->fail=NULL;
memset(p->next,0,sizeof(p->next));
return p;
}
void InsertTrieNode(TrieNode *pRoot,char s[])
{
TrieNode *p=pRoot;
int i=0;
while(s[i])
{
int k=c2i[s[i]];
if(p->next[k]==NULL)
p->next[k]=CreateTrieNode();
i++;
p=p->next[k];
}
p->unsafe=true;
}
void Build_AC_Automation(TrieNode *pRoot)
{
int head=0,tail=0,i;
TrieNode *p;
q[tail++]=pRoot;
pRoot->fail=NULL;
while(head!=tail)
{
p=q[head++];
for(i=0;i<KIND;i++)
if(p->next[i]!=NULL)
{
if(p==pRoot)
p->next[i]->fail=pRoot;
else
{
p->next[i]->fail=p->fail->next[i];
if(p->next[i]->fail->unsafe)
p->next[i]->unsafe=true;
}
q[tail++]=p->next[i];
}
else
{
if(p==pRoot)
p->next[i]=pRoot;
else
p->next[i]=p->fail->next[i];
}
}
}
void MatrixMul(__int64 a[][MAX],__int64 b[][MAX],int sz)
{
__int64 tmp[MAX][MAX]={0};
int i,j,k;
for(i=0;i<sz;i++)
for(j=0;j<sz;j++)
for(k=0;k<sz;k++)
{
tmp[i][j] += a[i][k]*b[k][j]; //两个<100000的数相乘会超过int,所以要用64位
if(tmp[i][j] >=100000)
tmp[i][j] %= 100000;
}
for(i=0;i<sz;i++)
for(j=0;j<sz;j++)
a[i][j]=tmp[i][j];
}
void MatrixPow(__int64 t[][MAX],__int64 a[][MAX],int sz,int n)
{
while(n>0)
{
if(1&n)
MatrixMul(t,a,sz);
MatrixMul(a,a,sz);
n >>= 1;
}
}
int main()
{
int i,j,k;
char word[15];
TrieNode *pRoot;
c2i['A']=0; c2i['C']=1;
c2i['G']=2; c2i['T']=3;
while(cin>>m>>n)
{
allocp=0;
pRoot=CreateTrieNode();
for(i=0;i<m;i++)
{
cin>>word;
InsertTrieNode(pRoot,word);
}
Build_AC_Automation(pRoot);
memset(g,0,sizeof(g));
for(i=0;i<allocp;i++) //构建矩阵
for(j=0;j<KIND;j++)
{
TrieNode *tmp=memory[i].next[j];
if(memory[i].unsafe==false && tmp->unsafe==false)//要安全的
g[i][tmp->index]++;
}
//初始化dp为单位矩阵
for(i=0;i<allocp;i++)
for(j=0;j<allocp;j++)
{
if(i==j) dp[i][j]=1;
else dp[i][j]=0;
}
MatrixPow(dp,g,allocp,n);
//cout<<n<<endl;
__int64 ans=0;
for(i=0;i<allocp;i++)
ans += dp[0][i];
printf("%I64d\n",ans%100000);
}
return 0;
}