【POJ 2778】DNA Sequence 中文题意&题解&代码（C++）

最新推荐文章于 2022-03-15 16:02:48 发布

deritt

最新推荐文章于 2022-03-15 16:02:48 发布

阅读量963

点赞数

分类专栏： oi之路 DERIT的博客专栏文章标签： c语言 poj ac自动机 dp 矩阵快速幂

本文链接：https://blog.csdn.net/DERITt/article/details/50804932

版权

oi之路同时被 2 个专栏收录

101 篇文章 0 订阅

订阅专栏

DERIT的博客专栏

56 篇文章 2 订阅

订阅专栏

DNA Sequence

Time Limit: 1000MS Memory Limit: 65536K

Description
It’s well known that DNA Sequence is a sequence only contains A, C, T and G, and it’s very useful to analyze a segment of DNA Sequence，For example, if a animal’s DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don’t contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G，and the length of sequences is a given integer n.

Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output
An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

中文题意：
给出字符串长度n(<=2000000000)，给出不可以包含的序列，最多10个，每个长度最大是10。问长度为n的合法序列有多少个？序列中只可能包含ACTG四个字符。

题解：
做过ac自动机+dp的题后不难发现这是一道经典dp题，然而看到他的数据范围时，就会发现普通的dp是无法解决问题的，而那夸张的数据范围让我们能想到的只有O(log n)的算法才能过，二分貌似没什么办法，那再结合dp的转移方程，我们发现每一个dp[i][j] 的 j 都只能由dp[i-1][x]转移到，其中所有x为可以连接到编号为j节点的j的父亲，而j的父亲在一开始自动机tire图构造好之后就已经时确定的，也就说假如dp[i][j] 由dp[i-1][x] 更新而来，那么dp[i+1][j] 就一定由dp[i][x] 转移而来，这样的话就可以用矩阵快速幂来优化。
网上有人说递归快速幂会超时，然而博主亲身测试递归的快速幂并不会超时，然而注意不要一边加一边mod，否则会超时！！！

代码：

#include<iostream>
#include<algorithm>
#include<stdio.h>
#include<string.h>
#include<queue>
#define mmod (100000)
using namespace std;
queue<int>q;
int tot,tr[101][4],flag[405],fail[405],m,n;
long long c[105][105],unit[105][105],tmp[105][105],f[2][105];
char s[12];
int getc(char x)
{
    if (x=='A') return 0;
    if (x=='T') return 1;
    if (x=='C') return 2;
    if (x=='G') return 3;
}
void init(int x)
{
    for (int i=0;i<4;i++)
    tr[x][i]=0;
    flag[x]=0;
    fail[x]=0;
}
void add()
{
    int now=0;
    int len=strlen(s);
    for (int i=0;i<len;i++)
    {
        int tmp=getc(s[i]);
        if (!tr[now][tmp])
        {
            tot++;
            tr[now][tmp]=tot;
            init(tot);
        }
        now=tr[now][tmp];
    }
    flag[now]=1;
}
void jzc(long long a[105][105],long long b[105][105])
{
    for (int i=0;i<=tot;i++)
    for (int j=0;j<=tot;j++)
    {
        c[i][j]=0;
        for (int k=0;k<=tot;k++)
        c[i][j]+=a[i][k]*b[k][j];
        c[i][j]=c[i][j]%mmod;
    }
    for (int i=0;i<=tot;i++)
    for (int j=0;j<=tot;j++)
    a[i][j]=c[i][j];    
}
inline void getfail()
{
    for (int i=0;i<4;i++)
    if (tr[0][i]) q.push(tr[0][i]);
    while(!q.empty())
    {
        int now=q.front();q.pop();
        for (int i=0;i<4;i++)
        if (tr[now][i])
        {
            fail[tr[now][i]]=tr[fail[now]][i];
            flag[tr[now][i]]+=flag[tr[fail[now]][i]];
            q.push(tr[now][i]); 
        }
        else tr[now][i]=tr[fail[now]][i];
    }
}
void ksm(int x)
{
//  while(x)
//  {
//      if (x & 1) jzc(tmp,unit);
//      jzc(unit,unit);
//      x>>=1;
//  }
//上面是快速幂的位运算写法，有兴趣自己研究。。。
    if (x<=1) return ;
    ksm(x/2);
    jzc(tmp,tmp);
    if (x%2==1)
    jzc(tmp,unit);
}
int main()
{
    scanf("%d%d",&m,&n);
    for (int i=1;i<=m;i++)
    {
        scanf("%s",s);
        add();
    }
    getfail();

    for (int i=0;i<=tot;i++)
    for (int k=0;k<4;k++)
    if (flag[tr[i][k]]==0&&flag[i]==0)
    {
        tmp[i][tr[i][k]]++;
        tmp[i][tr[i][k]]%=mmod;
        unit[i][tr[i][k]]=tmp[i][tr[i][k]];
    }
    //这个初始化数组不加也可以。。
    for (int i=0;i<4;i++)
    if (flag[tr[0][i]]==0) f[0][tr[0][i]]=(f[0][tr[0][i]]+1)%mmod;

        int  ans=0;
    if (n<=1)
    {   
        for (int i=0;i<tot;i++)
        ans+=f[0][tot];
        ans%=mmod;  
    }
    else 
    {
        ksm(n-1);   
        for (int i=0;i<=tot;i++)
        {
            unit[0][i]=0;
            for (int k=0;k<=tot;k++)
            unit[0][i]+=f[0][k]*tmp[k][i];
            unit[0][i]%=mmod;
        }
        for (int i=0;i<=tot;i++)
        ans+=unit[0][i];
        ans%=mmod;
    }
    printf("%d\n",ans); 
}

不加初始化数组的做法，自己想想为什么可以不初始化：

#include<iostream>
#include<algorithm>
#include<stdio.h>
#include<string.h>
#include<queue>
#define mmod (100000)
using namespace std;
queue<int>q;
int tot,tr[101][4],flag[405],fail[405],m,n;
long long c[105][105],unit[105][105],tmp[105][105];
char s[12];
int getc(char x)
{
    if (x=='A') return 0;
    if (x=='T') return 1;
    if (x=='C') return 2;
    if (x=='G') return 3;
}
void add()
{
    int now=0;
    int len=strlen(s);
    for (int i=0;i<len;i++)
    {
        int tmp=getc(s[i]);
        if (!tr[now][tmp])
        {
            tot++;
            tr[now][tmp]=tot;
        }
        now=tr[now][tmp];
    }
    flag[now]=1;
}
void mul(long long a[105][105], long long b[105][105])
{
    for (int i = 0; i <= tot; i++)  
    for (int j = 0; j <= tot; j++)  
    {
        c[i][j] = 0;  
        for (int k = 0; k <= tot; k++)  
        c[i][j] += a[i][k] * b[k][j]; 
        c[i][j] %= 100000;
    }
    for(int i = 0;i <= tot; i++)
    for(int j = 0; j <= tot; j++)
    a[i][j] = c[i][j];
}
inline void getfail()
{
    for (int i=0;i<4;i++)
    if (tr[0][i]) q.push(tr[0][i]);
    while(!q.empty())
    {
        int now=q.front();q.pop();
        for (int i=0;i<4;i++)
        if (tr[now][i])
        {
            fail[tr[now][i]]=tr[fail[now]][i];
            flag[tr[now][i]]+=flag[tr[fail[now]][i]];
            q.push(tr[now][i]); 
        }
        else tr[now][i]=tr[fail[now]][i];
    }
}
void ksm(int x)
{
    while(x>0)
    {
        if (x & 1) mul(tmp,unit);
        mul(unit,unit);
        x>>=1;
    }
}
int main()
{
    scanf("%d%d",&m,&n);
    for (int i=1;i<=m;i++)
    {
        scanf("%s",s);
        add();
    }
    getfail();

    for (int i=0;i<=tot;i++)
    for (int k=0;k<4;k++)
    if (flag[tr[i][k]]==0&&flag[i]==0)
    {
        tmp[i][tr[i][k]]=(tmp[i][tr[i][k]]+1)%mmod;
        unit[i][tr[i][k]]=tmp[i][tr[i][k]];
    }
    int  ans=0;
    ksm(n-1);   
    for (int i=0;i<=tot;i++)
    ans=ans+tmp[0][i];
    ans=ans%mmod;
    printf("%d\n",ans); 
}