POJ 2778 DNA Sequence (AC自动机 + 矩阵快速幂)

15 篇文章 0 订阅
5 篇文章 0 订阅

题意:

给你n个病毒串, 要求组成一个长度为n 的字符串, 使得字符串中不包含 病毒串, 求字符串的方案数。

思路:

先给n 个病毒串 建立AC自动机, 标记处病毒节点来。

然后构造一个矩阵。

a[i][j] 表示从i 节点 到 j 节点 走一步的方案数。

那么根据离散知识(听别人说的= =)

矩阵的n 次方就是 i 到 j 走n 步的方案数。

那么答案就是 a[0][0] + a[0][1] + ... + a[0][n]


注意 :

1.标记病毒节点时,  如果一个节点的fail 指针包含 病毒串, 那么这个节点也是病毒结点。

2. 矩阵中不要包含病毒结点。 所以给非病毒结点离散化一下就好了。

#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
using namespace std;

const int mod = 100000;

const int maxn = 1000;

int get(char ch){
    if (ch == 'A') return 0;
    if (ch == 'C') return 1;
    if (ch == 'G') return 2;
    if (ch == 'T') return 3;
}

struct Mar{
    int n;
    int a[107][107];
    void init(int n_){
        memset(a,0,sizeof a);
        n = n_;

    }
    void init2(int n){
        init(n);
        for (int i = 0; i < n; ++i){
            a[i][i] = 1;
        }
    }

    Mar mul(Mar b){
        Mar ans;
        ans.init(n);
        for (int i = 0; i < n; ++i){
            for (int j = 0; j < n; ++j){
                for (int k = 0; k < n; ++k){
                    ans.a[i][j] = (ans.a[i][j] + ((long long )a[i][k] * b.a[k][j]) % mod ) % mod;
                }
            }
        }
        return ans;
    }

    void print(){
        for (int i = 0; i < n; ++i){
            for (int j = 0; j < n; ++j){
                printf("%d ", a[i][j]);

            }
            putchar('\n');

        }

    }
};

Mar pow(Mar a, int n){
    Mar ans;
    ans.init2(a.n);
    while(n){

        if (n & 1)
            ans = ans.mul(a);
        a = a.mul(a);
        n >>= 1;
    }
    return ans;
}




struct Trie{
    int L, root;
    int flag[maxn];
    int next[maxn][4];
    int fail[maxn];
    int mp[maxn];/// 给非病毒结点 离散化一下。
    int fmp[maxn]; ///
    int cur; /// 离散化编号

    void init(){
        cur = 0;
        L = 0;
        root = newnode();
    }

    int newnode(){
        for (int i = 0; i < 4; ++i){
            next[L][i] = -1;
        }
        flag[L] = 0;
        return L++;
    }


    void insert(char* s){
        int len = strlen(s);

        int nod = root;
        for (int i = 0; i < len; ++i){
            int id = get(s[i]);
            if (next[nod][id] == -1){
                next[nod][id] = newnode();
            }
            nod = next[nod][id];
        }
        flag[nod] = 1;
    }

    void bfs(){
        fail[root] = root;

        queue<int>q;
        for (int i = 0; i < 4; ++i){
            if (next[root][i] == -1){
                next[root][i] = root;

            }
            else{
                fail[next[root][i] ] = root;
                q.push(next[root][i]);
            }
        }

        while(!q.empty()){
            int u = q.front(); q.pop();

            for (int i = 0; i < 4; ++i){
                if (next[u][i] == -1){
                    next[u][i] = next[fail[u] ][i];

                }
                else {
                    fail[next[u][i] ] = next[fail[u] ][i];
                    q.push(next[u][i]);
                }
            }
        }
    }

    void deal(){ /// 重新安排病毒结点, 如果一个结点的fail指针包含病毒, 那么这个也是病毒结点。
        for (int i = 0; i < L; ++i){
            int tmp = i;
            if (flag[tmp]) continue;
            while(tmp != root){
                if (flag[tmp]) {
                    flag[i] = 1;
                    break;
                }
                tmp = fail[tmp];
            }
            if (!flag[i]){
                mp[cur++] = i;
                fmp[i] = cur - 1;
            }
        }
    }

    void solve(int n){ /// 构造 方案矩阵。
        Mar mar;
        mar.init(cur);
        for (int i = 0; i < cur; ++i){
            for (int j = 0; j < 4; ++j){
                int nx = next[mp[i] ][j];
                if (flag[nx]) continue;
                mar.a[ i ][ fmp[nx] ]++;
            }
        }
//        mar.print();
        mar = pow(mar, n);
        int ans = 0;
        for (int i = 0; i < cur; ++i){
            ans = (ans + mar.a[0][i]) % mod;
        }
        printf("%d\n", ans);
    }
}ac;




char s[107];
int main(){
    int m, n;
    while(~scanf("%d %d",&m, &n)){
        ac.init();

        for (int i = 0; i < m; ++i){
            scanf("%s", s);
            ac.insert(s);
        }
        ac.bfs();
        ac.deal();
        ac.solve(n);
    }
    return 0;
}


DNA Sequence
Time Limit: 1000MS Memory Limit: 65536K
Total Submissions: 17316 Accepted: 6676

Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments. 

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. 

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences. 

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. 

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

Source

[Submit]   [Go Back]   [Status]   [Discuss]



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值