字符串专题二:Trie树、AC自动机与Trie图

/*

这篇会陆陆续续补充完整的,计划目录如下:

1. Trie树的概念、构造与应用(附1~2题?)

2. 从Trie树构造AC自动机(Trie树上的多模式串KMP):构造fail指针

3. 从AC自动机构造Trie图(增强确定性成为DFA):补边

4. Problems & Solutions

*/

 

Problems

 

POJ1625 - Censored!

题目大意:给定字母集合N以及P个模式串,求长度为M的字符串中,不含任一模式串的有多少个(|N|<=50, M<=50, P<=10, 模式串长度<=10)。

先根据给定的P个模式串构造好Trie图,然后根据题意,主串在图上状态转移的时候不能经过危险节点,我们考虑在图上进行动态规划。用dp[i][j]表示长度为i且最后走到节点j的合法字符串个数(j为安全节点),那么容易发现,对每个j的安全子节点child(j),都有dp[i+1][child(j)] += dp[i][j],初始条件为dp[0][1] = 1,最后所求的答案即为Σdp[M][j](j取遍所有安全节点)。由于答案数字会很大,所以还需要写高精度加法。

注意:有可能会存在ASCII码大于127的字母,如果直接转换为int会变成负数,所以需要先转换为unsigned char或者先加上128,然后再作为映射数组的下标。

  1 //  Problem: poj1625 - Censored!
  2 //  Category: AC Automaton + DP + High precision
  3 //  Author: Niwatori
  4 //  Date: 2016/07/26
  5 
  6 #include <stdio.h>
  7 #include <string.h>
  8 #include <queue>
  9 #define MAXDIGIT 50
 10 #define MAXNLETTERS 60
 11 #define MAXLEN 60
 12 #define MAXNNODES 110
 13 
 14 int nLetters, hash[300];    // From char to No.
 15 char letter[MAXNLETTERS];   // From No. to char
 16 
 17 struct HugeInt
 18 {
 19     int a[MAXDIGIT];
 20     HugeInt(int n = 0)
 21     {
 22         memset(a, 0, sizeof(a));
 23         a[0] = 1; a[1] = n;
 24     }
 25     int & operator[](int k) {return a[k];}
 26     
 27     void operator+=(HugeInt & b)
 28     {
 29         HugeInt c(0);
 30         c[0] = a[0] > b[0] ? a[0] : b[0];
 31         for (int i = 1; i <= c[0]; ++i)
 32         {
 33             c[i] += a[i] + b[i];
 34             c[i + 1] += c[i] / 10000;
 35             c[i] %= 10000;
 36         }
 37         if (c[c[0] + 1] > 0) ++c[0];
 38         memcpy(a, c.a, sizeof(a));
 39     }
 40     
 41     void print()
 42     {
 43         printf("%d", a[a[0]]);
 44         for (int i = a[0] - 1; i > 0; --i)
 45         {
 46             if (a[i] < 1000) printf("0");
 47             if (a[i] < 100 ) printf("0");
 48             if (a[i] < 10  ) printf("0");
 49             printf("%d", a[i]);
 50         }
 51     }
 52     
 53 } dp[MAXLEN][MAXNNODES];
 54 
 55 struct Node
 56 {
 57     int id;
 58     Node * letter[MAXNLETTERS];
 59     Node * fail;
 60     bool isDangerous;
 61     Node(int id_)
 62     {
 63         memset(letter, 0, sizeof(letter));
 64         fail = NULL; id = id_;
 65         isDangerous = 0;
 66     }
 67 };
 68 
 69 struct AC_Automaton
 70 {
 71     Node * Trie[MAXNNODES];
 72     int nNodes;
 73     
 74     AC_Automaton()
 75     {
 76         for (int i = 0; i < MAXNNODES; ++i)
 77             Trie[i] = new Node(i);
 78         nNodes = 2;
 79     }
 80     
 81     void Insert(char p[])
 82     {
 83         Node * root = Trie[1];
 84         for (int i = 0; p[i]; ++i)
 85         {
 86             if (root->letter[hash[(unsigned char)p[i]]] == NULL)
 87                 root->letter[hash[(unsigned char)p[i]]] = Trie[nNodes++];
 88             root = root->letter[hash[(unsigned char)p[i]]];
 89         }
 90         root->isDangerous = 1;
 91     }
 92     
 93     void ConstructDFA()
 94     {
 95         for (int i = 0; i < nLetters; ++i)
 96             Trie[0]->letter[i] = Trie[1];
 97         Trie[0]->fail = NULL;
 98         Trie[1]->fail = Trie[0];
 99         
100         std::queue<Node*> q;
101         q.push(Trie[1]);
102         while (!q.empty())
103         {
104             Node * now = q.front(); q.pop();
105             for (int i = 0; i < nLetters; ++i)
106                 if (now->letter[i] == NULL)
107                     now->letter[i] = now->fail->letter[i];
108                 else
109                 {
110                     now->letter[i]->fail = now->fail->letter[i];
111                     now->letter[i]->isDangerous |= now->fail->letter[i]->isDangerous;
112                     q.push(now->letter[i]);
113                 }
114         }
115     }
116 
117     void Solve(int len)
118     {
119         memset(dp, 0, sizeof(0));
120         dp[0][1] = 1;
121         for (int i = 0; i < len; ++i)
122             for (int j = 1; j < nNodes; ++j)
123                 if (!Trie[j]->isDangerous)
124                     for (int k = 0; k < nLetters; ++k)
125                         if (!Trie[j]->letter[k]->isDangerous)
126                             dp[i + 1][Trie[j]->letter[k]->id] += dp[i][j];
127         
128         HugeInt ans(0);
129         for (int i = 1; i < nNodes; ++i)
130             if (!Trie[i]->isDangerous)
131                 ans += dp[len][i];
132         ans.print();
133     }
134     
135 } ac;
136 
137 int main()
138 {
139     int len, nPatterns;
140     scanf("%d%d%d", &nLetters, &len, &nPatterns);
141     scanf("%s", letter);
142     for (int i = 0; i < nLetters; ++i)
143         hash[(unsigned char)letter[i]] = i; // Unsigned
144 
145     while (nPatterns--)
146     {
147         char p[MAXLEN];
148         scanf("%s", p);
149         ac.Insert(p);
150     }
151     
152     ac.ConstructDFA();
153 
154     ac.Solve(len);
155     
156     return 0;
157 }
View Code

 

POJ2778 - DNA Sequence

题目大意:给定字母集合N以及P个模式串,求长度为M的字符串中,不含任一模式串的有多少个,答案模100,000(|N|=4, M<=2,000,000,000, P<=10, 模式串长度<=10)。

题意其实和上一题一模一样-_-#,不同的是递推规模变大了,而且不要求高精度。我们知道,这题要求的实际上就是从1号节点开始,在Trie图上走M步且不经过危险节点的方案数,因而我们将Trie图转化为邻接矩阵的形式,危险节点所在的行列都置为0,再对该矩阵求M次幂(利用快速幂优化),得到的矩阵中第(i, j)元就是从节点i经过M步走到节点j(不经过危险节点)的方案数,最后对Σm[1][i](i取遍所有节点)求和即可。

  1 //  Problem: poj2778 - DNA Sequence
  2 //  Category: AC Automaton + Matrix multiplication
  3 //  Author: Niwatori
  4 //  Date: 2016/07/27
  5 
  6 #include <stdio.h>
  7 #include <string.h>
  8 #include <queue>
  9 #define MAXNLETTERS 4
 10 #define MAXNNODES 110
 11 #define MOD 100000
 12 
 13 char letter[MAXNLETTERS] = {'A', 'C', 'G', 'T'};
 14 int hash[300];
 15 
 16 struct Matrix
 17 {
 18     long long a[MAXNNODES][MAXNNODES];
 19     int n;
 20     Matrix(long long x, int n_):n(n_)
 21     {
 22         memset(a, 0, sizeof(a));
 23         for (int i = 0; i < n; ++i)
 24             a[i][i] = x;
 25     }
 26     long long * operator[](int i){return a[i];}
 27     
 28     Matrix operator*(Matrix & b)
 29     {
 30         Matrix c(0, n);
 31         for (int i = 0; i < n; ++i)
 32             for (int j = 0; j < n; ++j)
 33                 for (int k = 0; k < n; ++k)
 34                     c[i][j] = (c[i][j] + a[i][k] * b[k][j]) % MOD;
 35         return c;
 36     }
 37     
 38     Matrix power(int exp)
 39     {
 40         Matrix ans(1, n), tmp(*this);
 41         while (exp)
 42         {
 43             if (exp & 1) ans = ans * tmp;
 44             tmp = tmp * tmp;
 45             exp = exp >> 1;
 46         }
 47         return ans;
 48     }
 49 };
 50 
 51 struct Node
 52 {
 53     int id;
 54     Node * letter[MAXNLETTERS];
 55     Node * fail;
 56     bool isDangerous;
 57     Node(int id_)
 58     {
 59         memset(letter, 0, sizeof(letter));
 60         fail = NULL; id = id_;
 61         isDangerous = 0;
 62     }
 63 };
 64 
 65 struct AC_Automaton
 66 {
 67     Node * Trie[MAXNNODES];
 68     int nNodes;
 69     
 70     AC_Automaton()
 71     {
 72         for (int i = 0; i < MAXNNODES; ++i)
 73             Trie[i] = new Node(i);
 74         nNodes = 2;
 75     }
 76     
 77     void Insert(char p[])
 78     {
 79         Node * root = Trie[1];
 80         for (int i = 0; p[i]; ++i)
 81         {
 82             if (root->letter[hash[p[i]]] == NULL)
 83                 root->letter[hash[p[i]]] = Trie[nNodes++];
 84             root = root->letter[hash[p[i]]];
 85         }
 86         root->isDangerous = 1;
 87     }
 88     
 89     void ConstructDFA()
 90     {
 91         for (int i = 0; i < MAXNLETTERS; ++i)
 92             Trie[0]->letter[i] = Trie[1];
 93         Trie[0]->fail = NULL;
 94         Trie[1]->fail = Trie[0];
 95         
 96         std::queue<Node*> q;
 97         q.push(Trie[1]);
 98         while (!q.empty())
 99         {
100             Node * now = q.front(); q.pop();
101             for (int i = 0; i < MAXNLETTERS; ++i)
102                 if (now->letter[i] == NULL)
103                     now->letter[i] = now->fail->letter[i];
104                 else
105                 {
106                     now->letter[i]->fail = now->fail->letter[i];
107                     now->letter[i]->isDangerous |= now->fail->letter[i]->isDangerous;
108                     q.push(now->letter[i]);
109                 }
110         }
111     }
112     
113     void Solve(int len)
114     {
115         Matrix m(0, nNodes);
116         for (int i = 1; i < nNodes; ++i)
117             for (int j = 0; j < MAXNLETTERS; ++j)
118                 if (!Trie[i]->isDangerous && !Trie[i]->letter[j]->isDangerous)
119                     ++m[Trie[i]->id][Trie[i]->letter[j]->id];
120         
121         m = m.power(len);
122         long long ans = 0;
123         for (int i = 1; i < nNodes; ++i)
124             ans = (ans + m[1][i]) % MOD;
125         printf("%lld", ans);
126     }
127     
128 } ac;
129 
130 int main()
131 {
132     int nPatterns, len;
133     scanf("%d%d", &nPatterns, &len);
134     for (int i = 0; i < MAXNLETTERS; ++i)
135         hash[letter[i]] = i;
136     
137     while (nPatterns--)
138     {
139         char p[20];
140         scanf("%s", p);
141         ac.Insert(p);
142     }
143     
144     ac.ConstructDFA();
145     
146     ac.Solve(len);
147     
148     return 0;
149 }
View Code

 

转载于:https://www.cnblogs.com/niwatori1217/p/5709391.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值