/*
这篇会陆陆续续补充完整的,计划目录如下:
1. Trie树的概念、构造与应用(附1~2题?)
2. 从Trie树构造AC自动机(Trie树上的多模式串KMP):构造fail指针
3. 从AC自动机构造Trie图(增强确定性成为DFA):补边
4. Problems & Solutions
*/
Problems
题目大意:给定字母集合N以及P个模式串,求长度为M的字符串中,不含任一模式串的有多少个(|N|<=50, M<=50, P<=10, 模式串长度<=10)。
先根据给定的P个模式串构造好Trie图,然后根据题意,主串在图上状态转移的时候不能经过危险节点,我们考虑在图上进行动态规划。用dp[i][j]表示长度为i且最后走到节点j的合法字符串个数(j为安全节点),那么容易发现,对每个j的安全子节点child(j),都有dp[i+1][child(j)] += dp[i][j],初始条件为dp[0][1] = 1,最后所求的答案即为Σdp[M][j](j取遍所有安全节点)。由于答案数字会很大,所以还需要写高精度加法。
注意:有可能会存在ASCII码大于127的字母,如果直接转换为int会变成负数,所以需要先转换为unsigned char或者先加上128,然后再作为映射数组的下标。
1 // Problem: poj1625 - Censored! 2 // Category: AC Automaton + DP + High precision 3 // Author: Niwatori 4 // Date: 2016/07/26 5 6 #include <stdio.h> 7 #include <string.h> 8 #include <queue> 9 #define MAXDIGIT 50 10 #define MAXNLETTERS 60 11 #define MAXLEN 60 12 #define MAXNNODES 110 13 14 int nLetters, hash[300]; // From char to No. 15 char letter[MAXNLETTERS]; // From No. to char 16 17 struct HugeInt 18 { 19 int a[MAXDIGIT]; 20 HugeInt(int n = 0) 21 { 22 memset(a, 0, sizeof(a)); 23 a[0] = 1; a[1] = n; 24 } 25 int & operator[](int k) {return a[k];} 26 27 void operator+=(HugeInt & b) 28 { 29 HugeInt c(0); 30 c[0] = a[0] > b[0] ? a[0] : b[0]; 31 for (int i = 1; i <= c[0]; ++i) 32 { 33 c[i] += a[i] + b[i]; 34 c[i + 1] += c[i] / 10000; 35 c[i] %= 10000; 36 } 37 if (c[c[0] + 1] > 0) ++c[0]; 38 memcpy(a, c.a, sizeof(a)); 39 } 40 41 void print() 42 { 43 printf("%d", a[a[0]]); 44 for (int i = a[0] - 1; i > 0; --i) 45 { 46 if (a[i] < 1000) printf("0"); 47 if (a[i] < 100 ) printf("0"); 48 if (a[i] < 10 ) printf("0"); 49 printf("%d", a[i]); 50 } 51 } 52 53 } dp[MAXLEN][MAXNNODES]; 54 55 struct Node 56 { 57 int id; 58 Node * letter[MAXNLETTERS]; 59 Node * fail; 60 bool isDangerous; 61 Node(int id_) 62 { 63 memset(letter, 0, sizeof(letter)); 64 fail = NULL; id = id_; 65 isDangerous = 0; 66 } 67 }; 68 69 struct AC_Automaton 70 { 71 Node * Trie[MAXNNODES]; 72 int nNodes; 73 74 AC_Automaton() 75 { 76 for (int i = 0; i < MAXNNODES; ++i) 77 Trie[i] = new Node(i); 78 nNodes = 2; 79 } 80 81 void Insert(char p[]) 82 { 83 Node * root = Trie[1]; 84 for (int i = 0; p[i]; ++i) 85 { 86 if (root->letter[hash[(unsigned char)p[i]]] == NULL) 87 root->letter[hash[(unsigned char)p[i]]] = Trie[nNodes++]; 88 root = root->letter[hash[(unsigned char)p[i]]]; 89 } 90 root->isDangerous = 1; 91 } 92 93 void ConstructDFA() 94 { 95 for (int i = 0; i < nLetters; ++i) 96 Trie[0]->letter[i] = Trie[1]; 97 Trie[0]->fail = NULL; 98 Trie[1]->fail = Trie[0]; 99 100 std::queue<Node*> q; 101 q.push(Trie[1]); 102 while (!q.empty()) 103 { 104 Node * now = q.front(); q.pop(); 105 for (int i = 0; i < nLetters; ++i) 106 if (now->letter[i] == NULL) 107 now->letter[i] = now->fail->letter[i]; 108 else 109 { 110 now->letter[i]->fail = now->fail->letter[i]; 111 now->letter[i]->isDangerous |= now->fail->letter[i]->isDangerous; 112 q.push(now->letter[i]); 113 } 114 } 115 } 116 117 void Solve(int len) 118 { 119 memset(dp, 0, sizeof(0)); 120 dp[0][1] = 1; 121 for (int i = 0; i < len; ++i) 122 for (int j = 1; j < nNodes; ++j) 123 if (!Trie[j]->isDangerous) 124 for (int k = 0; k < nLetters; ++k) 125 if (!Trie[j]->letter[k]->isDangerous) 126 dp[i + 1][Trie[j]->letter[k]->id] += dp[i][j]; 127 128 HugeInt ans(0); 129 for (int i = 1; i < nNodes; ++i) 130 if (!Trie[i]->isDangerous) 131 ans += dp[len][i]; 132 ans.print(); 133 } 134 135 } ac; 136 137 int main() 138 { 139 int len, nPatterns; 140 scanf("%d%d%d", &nLetters, &len, &nPatterns); 141 scanf("%s", letter); 142 for (int i = 0; i < nLetters; ++i) 143 hash[(unsigned char)letter[i]] = i; // Unsigned 144 145 while (nPatterns--) 146 { 147 char p[MAXLEN]; 148 scanf("%s", p); 149 ac.Insert(p); 150 } 151 152 ac.ConstructDFA(); 153 154 ac.Solve(len); 155 156 return 0; 157 }
题目大意:给定字母集合N以及P个模式串,求长度为M的字符串中,不含任一模式串的有多少个,答案模100,000(|N|=4, M<=2,000,000,000, P<=10, 模式串长度<=10)。
题意其实和上一题一模一样-_-#,不同的是递推规模变大了,而且不要求高精度。我们知道,这题要求的实际上就是从1号节点开始,在Trie图上走M步且不经过危险节点的方案数,因而我们将Trie图转化为邻接矩阵的形式,危险节点所在的行列都置为0,再对该矩阵求M次幂(利用快速幂优化),得到的矩阵中第(i, j)元就是从节点i经过M步走到节点j(不经过危险节点)的方案数,最后对Σm[1][i](i取遍所有节点)求和即可。
1 // Problem: poj2778 - DNA Sequence 2 // Category: AC Automaton + Matrix multiplication 3 // Author: Niwatori 4 // Date: 2016/07/27 5 6 #include <stdio.h> 7 #include <string.h> 8 #include <queue> 9 #define MAXNLETTERS 4 10 #define MAXNNODES 110 11 #define MOD 100000 12 13 char letter[MAXNLETTERS] = {'A', 'C', 'G', 'T'}; 14 int hash[300]; 15 16 struct Matrix 17 { 18 long long a[MAXNNODES][MAXNNODES]; 19 int n; 20 Matrix(long long x, int n_):n(n_) 21 { 22 memset(a, 0, sizeof(a)); 23 for (int i = 0; i < n; ++i) 24 a[i][i] = x; 25 } 26 long long * operator[](int i){return a[i];} 27 28 Matrix operator*(Matrix & b) 29 { 30 Matrix c(0, n); 31 for (int i = 0; i < n; ++i) 32 for (int j = 0; j < n; ++j) 33 for (int k = 0; k < n; ++k) 34 c[i][j] = (c[i][j] + a[i][k] * b[k][j]) % MOD; 35 return c; 36 } 37 38 Matrix power(int exp) 39 { 40 Matrix ans(1, n), tmp(*this); 41 while (exp) 42 { 43 if (exp & 1) ans = ans * tmp; 44 tmp = tmp * tmp; 45 exp = exp >> 1; 46 } 47 return ans; 48 } 49 }; 50 51 struct Node 52 { 53 int id; 54 Node * letter[MAXNLETTERS]; 55 Node * fail; 56 bool isDangerous; 57 Node(int id_) 58 { 59 memset(letter, 0, sizeof(letter)); 60 fail = NULL; id = id_; 61 isDangerous = 0; 62 } 63 }; 64 65 struct AC_Automaton 66 { 67 Node * Trie[MAXNNODES]; 68 int nNodes; 69 70 AC_Automaton() 71 { 72 for (int i = 0; i < MAXNNODES; ++i) 73 Trie[i] = new Node(i); 74 nNodes = 2; 75 } 76 77 void Insert(char p[]) 78 { 79 Node * root = Trie[1]; 80 for (int i = 0; p[i]; ++i) 81 { 82 if (root->letter[hash[p[i]]] == NULL) 83 root->letter[hash[p[i]]] = Trie[nNodes++]; 84 root = root->letter[hash[p[i]]]; 85 } 86 root->isDangerous = 1; 87 } 88 89 void ConstructDFA() 90 { 91 for (int i = 0; i < MAXNLETTERS; ++i) 92 Trie[0]->letter[i] = Trie[1]; 93 Trie[0]->fail = NULL; 94 Trie[1]->fail = Trie[0]; 95 96 std::queue<Node*> q; 97 q.push(Trie[1]); 98 while (!q.empty()) 99 { 100 Node * now = q.front(); q.pop(); 101 for (int i = 0; i < MAXNLETTERS; ++i) 102 if (now->letter[i] == NULL) 103 now->letter[i] = now->fail->letter[i]; 104 else 105 { 106 now->letter[i]->fail = now->fail->letter[i]; 107 now->letter[i]->isDangerous |= now->fail->letter[i]->isDangerous; 108 q.push(now->letter[i]); 109 } 110 } 111 } 112 113 void Solve(int len) 114 { 115 Matrix m(0, nNodes); 116 for (int i = 1; i < nNodes; ++i) 117 for (int j = 0; j < MAXNLETTERS; ++j) 118 if (!Trie[i]->isDangerous && !Trie[i]->letter[j]->isDangerous) 119 ++m[Trie[i]->id][Trie[i]->letter[j]->id]; 120 121 m = m.power(len); 122 long long ans = 0; 123 for (int i = 1; i < nNodes; ++i) 124 ans = (ans + m[1][i]) % MOD; 125 printf("%lld", ans); 126 } 127 128 } ac; 129 130 int main() 131 { 132 int nPatterns, len; 133 scanf("%d%d", &nPatterns, &len); 134 for (int i = 0; i < MAXNLETTERS; ++i) 135 hash[letter[i]] = i; 136 137 while (nPatterns--) 138 { 139 char p[20]; 140 scanf("%s", p); 141 ac.Insert(p); 142 } 143 144 ac.ConstructDFA(); 145 146 ac.Solve(len); 147 148 return 0; 149 }