# POJ2778 AC自动机经典题

## DNA Sequence

Time Limit: 1000MS Memory Limit: 65536K

###### Description

It’s well known that DNA Sequence is a sequence only contains A, C, T and G, and it’s very useful to analyze a segment of DNA Sequence，For example, if a animal’s DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don’t contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G，and the length of sequences is a given integer n.

###### Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

###### Output

An integer, the number of DNA sequences, mod 100000.

4 3
AT
AC
AG
AA

###### Sample Output

36

AC自动机经典题

AC自动机上的边连来连去不就可以抽象成一个有向图，每走一步就相当于一个字符。

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>
#include <queue>
using namespace std ;
#define ll long long
const ll maxn = 20, modd = 100000, maxN = 110 ;
char pat[maxn] ;
ll to ( char c ) {
if ( c == 'A' )  return 0 ;
else if ( c == 'G' ) return 1 ;
else if ( c == 'T' ) return 2 ;
else return 3 ;
}
struct node {
node* nxt[4] ;
node* fail ;
ll tim, id ;
ll ch ;
node() {
for ( ll i = 0 ; i < 4 ; ++ i )
nxt[i] = NULL ;
fail = NULL ;
id = tim = 0 ;
ch = -1 ;
}
} *h, *p, *q, *dfn[maxN] ;
struct Matrix {
ll a[maxN][maxN], N ;
Matrix() {
for ( int i = 0 ; i < maxN ; ++ i )
for ( int j = 0 ; j < maxN ; j ++ )
a[i][j] = 0 ;
N = 0 ;
}
friend Matrix operator * ( Matrix A, Matrix B ) {
Matrix C ;
C.N = A.N ;
int i, j, k ;
for ( i = 1 ; i <= C.N ; ++ i )
for ( j = 1 ; j <= C.N ; j ++ )
for ( k = 1 ; k <= C.N ; k ++ )
C.a[i][j] = ( C.a[i][j] + A.a[i][k]*B.a[k][j] ) %modd ;
return C ;
}
friend Matrix operator ^ ( Matrix A, ll b ) {
Matrix C ;
int i, j, k ;
C.N = A.N ;
for ( i = 1 ; i <= C.N ; ++ i )
C.a[i][i] = 1 ;
for ( ; b ; b >>= 1, A = A*A )
if ( b&1 ) C = A*C ;
return C ;
}
void out() {
int i, j, k ;
for ( i = 1 ; i <= N ; ++ i )
for ( j = 1 ; j <= N ; j ++ )
printf ( "%lld%c", a[i][j], j==N?'\n':' ' ) ;
}
} ;

ll n, m, len, tot ;

void insert() {
p = h ;
int i, j, k, index ;
for ( i = 1 ; i <= len ; ++ i ) {
index = to(pat[i]) ;
if ( p->nxt[index] ) p = p->nxt[index] ;
else {
p->nxt[index] = new node ;
p = p->nxt[index] ;
p->id = ++tot ;
dfn[tot] = p ;
p->ch = index ;
}
}
p->tim = 1 ;
}

queue <node*> Q ;
void get_fail() {
while ( !Q.empty() ) Q.pop() ;
int i, j, index ;
node* x ;
Q.push(h) ;
while ( !Q.empty() ) {
x = Q.front() ;
Q.pop() ;
for ( i = 0 ; i < 4 ; ++ i ) {
if ( x->nxt[i] ) {
for ( p = x->fail ; p && !p->nxt[i] ; p = p->fail ) ;
x->nxt[i]->fail = p? p->nxt[i]:h ;
if ( x->nxt[i]->fail->tim )
x->nxt[i]->tim = 1 ;
Q.push(x->nxt[i]) ;
} else {
if ( x->fail ) x->nxt[i] = x->fail->nxt[i] ;
else x->nxt[i] = h ;
}
}
}
}

Matrix get_Matrix() {
Matrix A ;
int i, j ;
A.N = tot ;
for ( i = 1 ; i <= tot ; ++ i ) {
for ( j = 0 ; j < 4 ; j ++ )
if ( !dfn[i]->tim && !dfn[i]->nxt[j]->tim ) {
++A.a[i][dfn[i]->nxt[j]->id] ;
A.a[i][dfn[i]->nxt[j]->id] %= modd ;
}
}
return A ;
}

ll qpow ( ll a, ll b, ll rec = 1 ) {
for ( ; b ; b>>=1, a *= a, rec %= modd, a %= modd )
if ( b&1 ) rec *= a ;
return rec ;
}

int main() {
int i, j, k ;
scanf ( "%lld%lld", &n, &m ) ;
if ( !n ) {
printf ( "%lld\n", qpow(4,m) ) ;
return 0 ;
}
h = new node ;
h->id = ++tot ;
dfn[tot] = h ;
for ( i = 1 ; i <= n ; ++ i ) {
scanf ( "%s", pat+1 ) ;
len = strlen(pat+1) ;
insert() ;
}
get_fail() ;
Matrix A = get_Matrix() ;
//A.out() ;
A = A^m ;
//A.out() ;
ll ans = 0 ;
for ( i = 1 ; i <= A.N ; ++ i )
ans = ( ans + A.a[1][i] ) % modd ;
printf ( "%lld\n", ans ) ;
return 0 ;
}

• 广告
• 抄袭
• 版权
• 政治
• 色情
• 无意义
• 其他

120