DNA Sequence
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. Output
An integer, the number of DNA sequences, mod 100000.
Sample Input 4 3 AT AC AG AA Sample Output 36 Source |
[Submit] [Go Back] [Status] [Discuss]
题意:给你n个非法序列,让你构造一个长为n的合法序列(不包含非法序列),问你能构造多少种。
题解:首先毫无疑问的是将所有非法序列搞入树中,然后跑一发AC自动机。
具体搞法和做的http://blog.csdn.net/haut_ykc/article/details/76999441类似。。唯一的难点便是如何转化的问题。
这里用了有向图的可达矩阵的性质,不需要回顾了吧,
详情看我这篇博客:http://blog.csdn.net/haut_ykc/article/details/76572083
这怎么又和可达矩阵联系在一起了呢?我们苏靠这样一个问题,可达矩阵是怎么构造出来的,不就是有向图的点与点之间的关系问题嘛,呢这个字符串怎么用可达矩阵搞? 其实是可以的(不可以也不行,毕竟n这么大 Orz),我们可以这样搞,在弄矩阵之前我们不是用AC自动机存了非法序列的状态并标记了不能出现的结点,故我们可以将所有合法的转化状态当做有向图中连边的两点,注意是有向的!!!呢构造的矩阵的含义不就是从一个状态经过一步到达另一个状态的情况啦,呢你求长为n的合法序列的话我们求矩阵的n次方就行了。。n很大,所以还要快速幂一下。。。。
一篇讲解的不错的博客:http://blog.csdn.net/morgan_xww/article/details/7834801
#include<map>
#include<stack>
#include<queue>
#include<vector>
#include<math.h>
#include<stdio.h>
#include<iostream>
#include<string.h>
#include<stdlib.h>
#include<algorithm>
using namespace std;
typedef long long ll;
#define inf 1000000000
#define mod 100000
#define maxn 550
#define lowbit(x) (x&-x)
#define eps 1e-10
int pre[maxn],a[maxn][5],flag[maxn],size,n,m;
char s1[25][25];
queue<int>q;
struct node
{
ll mat[105][105];
}b,d;
int c(char x)
{
if(x=='A')return 0;
if(x=='T')return 1;
if(x=='G')return 2;
if(x=='C')return 3;
}
void insert(int num)
{
int i,len=strlen(s1[num]),now=0,cnt=0;
for(i=0;i<len;i++)
{
int v=c(s1[num][i]);
if(!a[now][v])
{
flag[size]=0;
memset(a[size],0,sizeof(a[size]));
a[now][v]=size++;
}
now=a[now][v];
}
flag[now]=1;
}
void build_fail()
{
int now,i;
for(i=0;i<4;i++)
{
int tmp=a[0][i];
if(tmp)
pre[tmp]=0,q.push(tmp);
}
while(q.empty()==0)
{
now=q.front();
q.pop();
if(flag[pre[now]])
flag[now]=1;
for(i=0;i<4;i++)
{
if(a[now][i]==0)
{
a[now][i]=a[pre[now]][i];
continue;
}
pre[a[now][i]]=a[pre[now]][i];
q.push(a[now][i]);
}
}
}
void build_juzhen()
{
int i,j;
memset(b.mat,0,sizeof(b.mat));
for(i=0;i<size;i++)
for(j=0;j<4;j++)
if(!flag[i] && !flag[a[i][j]])
b.mat[i][a[i][j]]++;
}
node q2(node a,node b)
{
node res;
int i,j,k;
for(i=0;i<size;i++)
for(j=0;j<size;j++)
{
res.mat[i][j]=0;
for(k=0;k<size;k++)
res.mat[i][j]+=a.mat[i][k]*b.mat[k][j];
res.mat[i][j]%=mod;
}
return res;
}
node q1(node b,int y,int num)
{
node res;
for(int i=0;i<num;i++)
for(int j=0;j<num;j++)
{
if(i==j)
res.mat[i][j]=1;
else
res.mat[i][j]=0;
}
while(y)
{
if(y%2)
res=q2(res,b);
b=q2(b,b);
y/=2;
}
return res;
}
int main(void)
{
int i;
while(scanf("%d%d",&m,&n)!=EOF)
{
size=1;pre[0]=0;flag[0]=0;
memset(a[0],0,sizeof(a[0]));
memset(pre,0,sizeof(pre));
for(i=1;i<=m;i++)
{
scanf("%s",s1[i]);
insert(i);
}
build_fail();
build_juzhen();
d=q1(b,n,size);
int ans=0;
for(i=0;i<size;i++)
ans=(ans+d.mat[0][i])%mod;
printf("%d\n",ans);
while(q.empty()==0)
q.pop();
}
return 0;
}