DNA Sequence
Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 12628 | Accepted: 4818 |
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3 AT AC AG AA
Sample Output
36
求长度为n的合法串,假设已经确定了前k个,第k+1个的选择就取决于 前k个是否存在某个后缀是不合法子串的前缀。假设不合法子串是AT,前k个确定了是*****A(最后一个是A),那么第k+1个只能选择C\T\G三种。所以这题的关键是序列的不同后缀之间的状态转移。而字符串间状态的转移就是一个AC自动机状态转移的过程。比如刚刚的情况,假如第k+1个选择了C,那么后缀就是****AC了,这时候考虑AC是不是某个不合法串的前缀,发现不是,则继续考虑后缀C是不是不合法串的前缀,发现也不是则前面的后缀都不需要考虑了,回到初始状态。整个过程和AC自动机匹配字符串的过程是一样的。
#include <iostream>
#include <cstring>
#include <string>
#include <cstdio>
#include <algorithm>
#include <vector>
#include <set>
#include <map>
#include <stack>
#include <queue>
#include <set>
using namespace std;
#define maxn 2010
#define mod 100000
typedef long long ll;
typedef vector<ll> vec;
typedef vector<vec> mat;
mat mul(mat &a, mat &b)
{
mat c(a.size(), vec(b[0].size(),0));
for(int i=0; i<a.size(); i++)
for(int j=0; j<b.size(); j++)
for(int k=0; k<b[0].size(); k++)
c[i][k]=(c[i][k]+a[i][j]*b[j][k]%mod)%mod;
return c;
}
int matrix[105][105];
mat pow(mat a, int n)
{
mat ret(a.size(), vec(a.size(),0));
for(int i=0; i<ret[0].size(); i++) ret[i][i]=1;
while(n){
if(n&1) ret=mul(ret, a);
a=mul(a,a);
n>>=1;
}
return ret;
}
struct Trie
{
int next[maxn][4], fail[maxn], endd[maxn],tag[maxn];
int root, L;
int newnode()
{
for(int i=0; i<4; i++)
next[L][i]=-1;
endd[L++]=0;
return L-1;
}
void init()
{
L=0;
root=newnode();
}
void insert(char *buf)
{
int len=strlen(buf);
int now=root;
for(int i=0; i<len ; i++){
if(next[now][buf[i]-'a']==-1)
next[now][buf[i]-'a']=newnode();
now=next[now][buf[i]-'a'];
}
endd[now]++;
}
int build()
{
memset(matrix, 0, sizeof(matrix));
queue<int> que;
fail[root]=root;
int scnt=0;
tag[root]=scnt++;
for(int i=0; i<4; i++)
if(next[root][i]==-1){
next[root][i]=root;
matrix[tag[root]][tag[root]]++;
}
else if(!endd[next[root][i]]){
fail[next[root][i]]=root;
tag[next[root][i]]=scnt++;
matrix[tag[root]][tag[next[root][i]]]++;
que.push(next[root][i]);
}
while(!que.empty()){
int now=que.front(); que.pop();
for(int i=0; i<4; i++)
if(next[now][i]==-1){
next[now][i]=next[fail[now]][i];
if(!endd[next[fail[now]][i]]) //注意后缀节点不合法,则该节点也不合法
matrix[tag[now]][tag[next[fail[now]][i]]]++;
}
else if(!endd[next[now][i]]){
int tmp=fail[next[now][i]]=next[fail[now]][i];
if(endd[tmp]){ //注意后缀节点不合法,则该节点也不合法。标记下传
endd[next[now][i]]=1;
continue;
}
tag[next[now][i]]=scnt++;
matrix[tag[now]][tag[next[now][i]]]++;
que.push(next[now][i]);
}
}
return scnt;
}
};
char buf[100];
void turn(char *buf)
{
int len=strlen(buf);
for(int i=0; i<len; i++){
switch(buf[i])
{
case 'A':
buf[i]='a';
break;
case 'G':
buf[i]='b';
break;
case 'C':
buf[i]='c';
break;
case 'T':
buf[i]='d';
}
}
}
Trie trie;
int main()
{
ll n,m;
while(cin>>m>>n){
trie.init();
for(int i=0; i<m; i++){
scanf("%s", buf);
turn(buf);
trie.insert(buf);
}
int scnt=trie.build();
mat m(scnt, vec(scnt));
for(int i=0; i<scnt; i++){
for(int j=0; j<scnt; j++){
m[i][j]=matrix[i][j];
// cout<<matrix[i][j]<<' ';
}
// cout<<endl;
}
m=pow(m, n);
mat a(1, vec(scnt,0));
a[0][0]=1;
a=mul(a, m);
ll ans=0;
for(int i=0; i<a[0].size(); i++)
ans =(ans+a[0][i])%mod;
cout<<ans<<endl;
}
return 0;
}