poj2778--DNA Sequence(AC自动机+矩阵优化)

最新推荐文章于 2021-10-29 20:11:53 发布

刀刀狗0102

最新推荐文章于 2021-10-29 20:11:53 发布

阅读量2.9k

点赞数 1

分类专栏：数据结构

本文链接：https://blog.csdn.net/winddreams/article/details/43452145

版权

数据结构专栏收录该内容

85 篇文章 0 订阅

订阅专栏

DNA Sequence

Time Limit: 1000MS		Memory Limit: 65536K
Total Submissions: 12252		Accepted: 4661

Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence，For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G，and the length of sequences is a given integer n.

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

给出患病的DNA序列，问序列长度为n的，且不包含患病的DNA序列有多少种。

首先处理患病的DNA串，连接为字典树后，添加fail指针，完成AC自动机，给每个节点一个编号，然后用矩阵统计每个节点走一步可以走到的节点的种数，其中，不能包含会患病的序列，最后矩阵相乘。

注意1.__int64 相乘会超出范围。

注意2.处理自动机时，注意，如果发现某个节点的fail会返回到一个代表序列结束的节点上，那么这个节点也是不可达的。不能被统计到矩阵中。

注意3.矩阵快速幂要写成非递归的形式。

给出测试案例：

2 1

ACG

其中矩阵应该为

2 1 0 0 0

0 0 0 0 0

#include <cstdio>
#include <cstring>
#include <queue>
#include <algorithm>
using namespace std ;
#define MOD 100000
#define LL __int64
struct node{
    int flag , id ;
    node *next[4] , *fail ;
};
struct nnode{
    LL Map[110][110] , n ;
};
queue <node*> que ;
char c[5] = "ACGT" ;
char str[20] ;
int num , vis[110] ;
node *newnode()
{
    node *p = new node ;
    p->flag = 0 ;
    p->id = num++ ;
    p->fail = NULL ;
    for(int i = 0 ; i < 4 ; i++)
        p->next[i] = NULL ;
    return p ;
}
void settree(char *s,node *rt,int temp)
{
    int i , k , l = strlen(s) ;
    node *p = rt ;
    for(i = 0 ; i < l ; i++)
    {
        for(k = 0 ; k < 4 ; k++)
            if( s[i] == c[k] )
                break ;
        if( p->next[k] == NULL )
            p->next[k] = newnode() ;
        p = p->next[k] ;
    }
    p->flag = 1 ;
    return ;
}
void setfail(node *rt)
{
    int i ;
    node *p = rt , *temp ;
    p->fail = NULL ;
    while( !que.empty() ) que.pop() ;
    que.push(p) ;
    while( !que.empty() )
    {
        p = que.front() ;
        que.pop() ;
        for(i = 0 ; i < 4 ; i++)
        {
            if( p->next[i] )
            {
                temp = p->fail ;
                while( temp && !temp->next[i] )
                    temp = temp->fail ;
                p->next[i]->fail = temp ? temp->next[i] : rt ;
                if( temp != NULL && temp->next[i]->flag )
                    p->next[i]->flag = 1 ;
                que.push(p->next[i]) ;
            }
            else
                p->next[i] = p == rt ? rt : p->fail->next[i] ;
        }
    }
}
nnode setmat(node *rt)
{
    int i , j , u , v ;
    nnode q ;
    node *p = rt ;
    while( !que.empty() ) que.pop() ;
    memset(q.Map,0,sizeof(q.Map)) ;
    memset(vis,0,sizeof(vis)) ;
    que.push(p) ;
    q.n = num ;
    vis[ p->id ] = 1 ;
    while( !que.empty() )
    {
        p = que.front() ;
        que.pop() ;
        u = p->id ;
        for(i = 0 ; i < 4 ; i++)
        {
            if( !p->flag && !p->next[i]->flag )
                q.Map[ p->id ][ p->next[i]->id ]++ ;
            if( !vis[p->next[i]->id] )
            {
                vis[ p->next[i]->id ] = 1 ;
                que.push( p->next[i] ) ;
            }
        }
    }
    return q ;
}
nnode mul(nnode a,nnode b)
{
    nnode c ;
    c.n  = a.n ;
    int i , j , k ;
    for(i = 0 ; i < a.n ; i++)
    {
        for(j = 0 ; j < a.n ; j++)
        {
            c.Map[i][j] = 0 ;
            for(k = 0 ; k < a.n ; k++)
                c.Map[i][j] = ( c.Map[i][j] + a.Map[i][k]*b.Map[k][j] ) % MOD ;
        }
    }
    return c ;
}
nnode pow(nnode p,int k)
{
    nnode temp ;
    int i , j ;
    temp.n = p.n ;
    memset(temp.Map,0,sizeof(temp.Map)) ;
    for(i = 0 ; i < p.n ; i++)
        temp.Map[i][i] = 1 ;
    while( k )
    {
        if( k&1 )
            temp = mul(temp,p) ;
        p = mul(p,p) ;
        k >>= 1 ;
    }
    return temp ;
}
int main()
{
    int n , m , i , j ;
    node *rt ;
    nnode p ;
    while( scanf("%d %d", &m, &n) != EOF )
    {
        num = 0 ;
        rt = newnode() ;
        for(i = 1 ; i <= m ; i++)
        {
            scanf("%s", str) ;
            settree(str,rt,i) ;
        }
        setfail(rt) ;
        p = setmat(rt) ;
        /*for(i = 0 ; i < p.n ; i++)
        {
            for(j = 0 ; j < p.n ; j++)
                printf("%d ", p.Map[i][j]) ;
            printf("\n") ;
        }*/
        p = pow(p,n) ;
        LL ans = 0 ;
        for(i = 0 ; i < p.n ; i++)
            ans = ( ans + p.Map[0][i] ) % MOD ;
        printf("%d\n", ans) ;
    }
    return 0 ;
}

刀刀狗0102

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
poj2778--DNA Sequence(AC自动机+矩阵优化)

DNA SequenceTime Limit: 1000MS Memory Limit: 65536KTotal Submissions: 12252 Accepted: 4661DescriptionIt's well known that DNA Sequence is a sequence only contains A
复制链接

扫一扫