DNA Sequence
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. Output
An integer, the number of DNA sequences, mod 100000.
Sample Input 4 3 AT AC AG AA Sample Output 36 Source |
[Submit] [Go Back] [Status] [Discuss]
题目大意就是让你统计长度为n的不包含病毒串的字符串有多少个.
对于这类多字符串失配问题要想到AC自动机. 先把所有病毒串建成一个AC自动机. 我们知道AC自动机如果算上转移边和fail边就是一个图, 那么问题就转化成了在一个图上从根节点出发走n步不经过危险节点的方案数. 危险节点指的是结尾节点, 或者fail链上有结尾节点的节点(因为会是当前的后缀). 那么这就是一个经典问题了, 直接上邻接矩阵加矩阵快速幂即可. 对于危险节点特判一下即可.
#include<stdio.h>
#include<queue>
#include<cstring>
using namespace std;
typedef long long lnt;
const int mod = 1e5;
char ss[mod];
bool vis[mod];
int n, m, ans, tot;
int c[111][5], mean[111], fail[111];
inline void insert() {
int p = 0;
for (int i = 0; ss[i]; ++ i) {
int idx = mean[(int)ss[i]];
if (!c[p][idx]) c[p][idx] = ++ tot;
p = c[p][idx];
}
vis[p] = true;
}
queue<int> q;
inline void bfs() {
for (int i = 0; i < 4; ++ i)
if (c[0][i]) q.push(c[0][i]);
while (!q.empty()) {
int u = q.front(); q.pop();
for (int i = 0; i < 4; ++ i) {
int &v = c[u][i];
if (!v) {v = c[fail[u]][i]; continue;}
fail[v] = c[fail[u]][i], vis[v] |= vis[fail[v]];
q.push(v);
}
}
}
struct Matrix {
lnt mat[111][111];
Matrix() {
memset(mat, 0, sizeof(mat));
}
inline friend Matrix operator * (const Matrix &a, const Matrix &b) {
Matrix c;
for (int i = 0; i <= tot; ++ i)
for (int k = 0; k <= tot; ++ k) if (a.mat[i][k])
for (int j = 0; j <= tot; ++ j) if (b.mat[k][j])
c.mat[i][j] = (c.mat[i][j] + a.mat[i][k] * b.mat[k][j]) % mod;
return c;
}
}a, ret;
int main() {
mean['A'] = 0, mean['G'] = 1, mean['C'] = 2, mean['T'] = 3;
scanf("%d%d", &m, &n);
for (int i = 0; i < m; ++ i) {
scanf("%s", ss);
insert();
}
bfs();
for (int u = 0; u <= tot; ++ u)
if (!vis[u])
for (int i = 0; i < 4; ++ i)
if (!vis[c[u][i]]) ++ a.mat[u][c[u][i]];
for (int i = 0; i <= tot; ++ i) ret.mat[i][i] = 1;
while (n) {
if (n & 1) ret = ret * a;
a = a * a, n >>= 1;
}
for (int i = 0; i <= tot; ++ i) {
ans += ret.mat[0][i];
if (ans >= mod) ans -= mod;
}
printf("%d\n", ans);
return 0;
}