It’s well known that DNA Sequence is a sequence only contains A, C, T and G, and it’s very useful to analyze a segment of DNA Sequence,For example, if a animal’s DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don’t contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input:
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
题意:
输入n和m
下面n行每行给出一个字符串。(其中字符只有四种‘A’,‘T’,‘C’,‘G’)。
有多少个不同的字符串(字符串中不包含这n个字符串之一)。
思路:
这是一个由[‘ACG’]和[‘C’]组成的AC自动机fail树。根据题目的意思,字符是只有四种的。除了失配边外,我们还要把这些没有后继节点的也按失配边处理了。比如说4号节点,对于[‘A’,‘T’,‘C’,‘G’]都是不存在的。那么直接按失配处理了。(刘汝佳书中有详细介绍)。
这样,问题转化成走m步,并且每一步都是安全的。我们首先找出一步之内能到达的路径。然后使用矩阵快速幂。这样就可以求出m步的状况。
注意:对于u节点,val[u]=0;但是val[f[u]]!=0;这个时候val[u]也要处理为1.(可以回想一下fail树是干嘛的,因为以f[u]结尾的串是以u结尾的串的字串。或者说找到u就找到了f[u]。可以详细看一下刘汝佳书上对这段的描写。)
AC代码:
#include<bits/stdc++.h>
#include<iostream>
#include<stdio.h>
#include<algorithm>
#define MAXN 101000
#define ssize 200010
#define SIGMA_SIZE 200
#define mod 100000
using namespace std;
typedef long long LL;
int trie[105][5],val[105];
int f[105],last[105],n,m,tot;
char str[15];
struct matrix
{
LL a[105][105];
void Init()
{
memset(a,0,sizeof(a));
}
};
matrix mutimatrix(matrix& a,matrix& b)
{
matrix c;
c.Init();
for(int i=0;i<tot;i++)
{
for(int j=0;j<tot;j++)
{
for(int k=0;k<tot;k++)
{
c.a[i][j]+=a.a[i][k]*b.a[k][j];
if(c.a[i][j]>mod)
c.a[i][j]%=mod;
}
}
}
return c;
}
int getnum(char c)
{
if(c=='A')
return 0;
else if(c=='T')
return 1;
else if(c=='C')
return 2;
return 3;
}
void init()
{
tot=1;
memset(f,0,sizeof(f));
memset(val,0,sizeof(val));
memset(last,0,sizeof(last));
}
void Insert(char *str,int v)
{
int len=strlen(str);
int root=0;
for(int i=0;i<len;i++)
{
int id=getnum(str[i]);
if(!trie[root][id])
{
memset(trie[tot],0,sizeof(trie[tot]));
val[tot]=0;
trie[root][id]=tot++;
}
root=trie[root][id];
}
val[root]=v;
}
void getfail()
{
queue<int> q;
f[0]=0;
for(int c=0;c<4;c++)
{
int u=trie[0][c];
if(u)
{
q.push(u);f[u]=0;last[u]=0;
}
}
while(!q.empty())
{
int r=q.front();q.pop();
for(int c=0;c<4;c++)
{
int u=trie[r][c];
if(!u)
{
trie[r][c]=trie[f[r]][c];
continue;
}
q.push(u);
int v=f[r];f[u]=trie[v][c];
last[u]=val[f[u]]?f[u]:last[f[u]];
val[r]|=val[f[r]];
}
}
}
void solve()
{
matrix A;
A.Init();
for(int i=0;i<tot;i++)
{
if(val[i])
continue;
for(int j=0;j<4;j++)
{
int v=trie[i][j];
if(!val[v])
{
A.a[i][v]++;
}
}
}
matrix ans;
ans.Init();
for(int i=0;i<tot;i++)
ans.a[i][i]=1;
while(n)
{
if(n&1)
ans=mutimatrix(ans,A);
A=mutimatrix(A,A);
n>>=1;
}
LL sum=0;
for(int i=0;i<tot;i++)
{
sum=sum+ans.a[0][i];
if(sum>mod)
sum%=mod;
}
printf("%lld\n",sum);
}
int main()
{
while(scanf("%d %d",&m,&n)!=EOF)
{
init();
for(int i=0;i<m;i++)
{
scanf("%s",str);
Insert(str,1);
}
getfail();
solve();
}
return 0;
}