题意:
给你n个病毒串, 要求组成一个长度为n 的字符串, 使得字符串中不包含 病毒串, 求字符串的方案数。
思路:
先给n 个病毒串 建立AC自动机, 标记处病毒节点来。
然后构造一个矩阵。
a[i][j] 表示从i 节点 到 j 节点 走一步的方案数。
那么根据离散知识(听别人说的= =)
矩阵的n 次方就是 i 到 j 走n 步的方案数。
那么答案就是 a[0][0] + a[0][1] + ... + a[0][n]
注意 :
1.标记病毒节点时, 如果一个节点的fail 指针包含 病毒串, 那么这个节点也是病毒结点。
2. 矩阵中不要包含病毒结点。 所以给非病毒结点离散化一下就好了。
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <queue>
using namespace std;
const int mod = 100000;
const int maxn = 1000;
int get(char ch){
if (ch == 'A') return 0;
if (ch == 'C') return 1;
if (ch == 'G') return 2;
if (ch == 'T') return 3;
}
struct Mar{
int n;
int a[107][107];
void init(int n_){
memset(a,0,sizeof a);
n = n_;
}
void init2(int n){
init(n);
for (int i = 0; i < n; ++i){
a[i][i] = 1;
}
}
Mar mul(Mar b){
Mar ans;
ans.init(n);
for (int i = 0; i < n; ++i){
for (int j = 0; j < n; ++j){
for (int k = 0; k < n; ++k){
ans.a[i][j] = (ans.a[i][j] + ((long long )a[i][k] * b.a[k][j]) % mod ) % mod;
}
}
}
return ans;
}
void print(){
for (int i = 0; i < n; ++i){
for (int j = 0; j < n; ++j){
printf("%d ", a[i][j]);
}
putchar('\n');
}
}
};
Mar pow(Mar a, int n){
Mar ans;
ans.init2(a.n);
while(n){
if (n & 1)
ans = ans.mul(a);
a = a.mul(a);
n >>= 1;
}
return ans;
}
struct Trie{
int L, root;
int flag[maxn];
int next[maxn][4];
int fail[maxn];
int mp[maxn];/// 给非病毒结点 离散化一下。
int fmp[maxn]; ///
int cur; /// 离散化编号
void init(){
cur = 0;
L = 0;
root = newnode();
}
int newnode(){
for (int i = 0; i < 4; ++i){
next[L][i] = -1;
}
flag[L] = 0;
return L++;
}
void insert(char* s){
int len = strlen(s);
int nod = root;
for (int i = 0; i < len; ++i){
int id = get(s[i]);
if (next[nod][id] == -1){
next[nod][id] = newnode();
}
nod = next[nod][id];
}
flag[nod] = 1;
}
void bfs(){
fail[root] = root;
queue<int>q;
for (int i = 0; i < 4; ++i){
if (next[root][i] == -1){
next[root][i] = root;
}
else{
fail[next[root][i] ] = root;
q.push(next[root][i]);
}
}
while(!q.empty()){
int u = q.front(); q.pop();
for (int i = 0; i < 4; ++i){
if (next[u][i] == -1){
next[u][i] = next[fail[u] ][i];
}
else {
fail[next[u][i] ] = next[fail[u] ][i];
q.push(next[u][i]);
}
}
}
}
void deal(){ /// 重新安排病毒结点, 如果一个结点的fail指针包含病毒, 那么这个也是病毒结点。
for (int i = 0; i < L; ++i){
int tmp = i;
if (flag[tmp]) continue;
while(tmp != root){
if (flag[tmp]) {
flag[i] = 1;
break;
}
tmp = fail[tmp];
}
if (!flag[i]){
mp[cur++] = i;
fmp[i] = cur - 1;
}
}
}
void solve(int n){ /// 构造 方案矩阵。
Mar mar;
mar.init(cur);
for (int i = 0; i < cur; ++i){
for (int j = 0; j < 4; ++j){
int nx = next[mp[i] ][j];
if (flag[nx]) continue;
mar.a[ i ][ fmp[nx] ]++;
}
}
// mar.print();
mar = pow(mar, n);
int ans = 0;
for (int i = 0; i < cur; ++i){
ans = (ans + mar.a[0][i]) % mod;
}
printf("%d\n", ans);
}
}ac;
char s[107];
int main(){
int m, n;
while(~scanf("%d %d",&m, &n)){
ac.init();
for (int i = 0; i < m; ++i){
scanf("%s", s);
ac.insert(s);
}
ac.bfs();
ac.deal();
ac.solve(n);
}
return 0;
}
DNA Sequence
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. Output
An integer, the number of DNA sequences, mod 100000.
Sample Input 4 3 AT AC AG AA Sample Output 36 Source |
[Submit] [Go Back] [Status] [Discuss]