【题意】有A,C,G,T四种碱基,N个基因串。给定一个碱基序列,重新排列,使包含最多的基因串(重复计数)。N<=50,基因串长度<=10,碱基序列最多40个。
1
AA
AAA
答案为2
【分析】
考虑到碱基序列最多40个,记A C G T的个数分别为A B C D,那么(A+1)*(B+1)*(C+1)*(D+1)<=11^4(均值不等式)
考虑到基因串长度<=10,对所有长度<=10的串做状态压缩有4^10种。但是实际上我们关心的只有N种,冗余较多。考虑使用AC自动机,对单词建树。
dp[i][s]表示,当前自动机状态为i,碱基状态为s = hash(a,b,c,d),包含的基因串的个数。
hash(a,b,c,d) = a*bit[0] + b*bit[1] + c*bit[2] + d*bit[3]
bit[0] = (B+1)*(C+1)*(D+1)
bit[1] = (C+1)*(D+1)
bit[2] = (D+1)
bit[3] = 1
第一次写AC自动机+DP,BFS找失败指针真是太神奇了!
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
![](https://images.cnblogs.com/OutliningIndicators/ExpandedBlockStart.gif)
1 #include<stdio.h> 2 #include<math.h> 3 #include<string.h> 4 #include<algorithm> 5 using namespace std; 6 const int maxn = 550; 7 char str[maxn*2], key[55]; 8 9 struct node{ 10 int fail; 11 int next[4]; 12 int count; 13 void init() 14 { 15 fail = -1; 16 count = 0; 17 memset(next, -1, sizeof(next)); 18 } 19 }tree[maxn*2]; 20 21 22 int nxt[maxn*2][4]; 23 24 25 26 int que[maxn*2]; 27 int root; 28 int cnt; 29 int get_idx(char ch) 30 { 31 if (ch =='A') 32 return 0; 33 if (ch =='C') 34 return 1; 35 if (ch == 'G') 36 return 2; 37 return 3; 38 39 } 40 void insert(char *str) 41 { 42 int len = strlen(str); 43 int idx; 44 int p = root; 45 for (int i=0;i<len;i++) 46 { 47 idx = get_idx(str[i]); 48 49 if (tree[p].next[idx] == -1){ 50 tree[p].next[idx] = ++cnt; 51 52 //nxt[p][idx] = cnt; 53 54 tree[cnt].init(); 55 } 56 p = tree[p].next[idx]; 57 } 58 tree[p].count ++; 59 //printf("%d %d\n",p, tree[p].count); 60 return ; 61 } 62 void build_ac() 63 { 64 memset(nxt, -1, sizeof(nxt)); 65 66 int head = 0, tail = 0; 67 que[++tail] = root; 68 while (head < tail) 69 { 70 int p = que[++head]; 71 tree[p].count += tree[tree[p].fail].count; 72 for (int i=0;i<4;i++) 73 { 74 if (tree[p].next[i] != -1) 75 { 76 //nxt[p][i] = tree[p].next[i]; 77 if (p==root) 78 { 79 tree[tree[p].next[i]].fail = root; 80 }else{ 81 int tmp = tree[p].fail; 82 while (tmp != -1) 83 { 84 if (tree[tmp].next[i] != -1) 85 { 86 tree[tree[p].next[i]].fail = tree[tmp].next[i]; 87 break; 88 } 89 tmp = tree[tmp].fail; 90 } 91 if (tmp == -1){ 92 tree[tree[p].next[i]].fail = root; 93 } 94 } 95 que[++tail] = tree[p].next[i]; 96 } 97 } 98 } 99 return ; 100 } 101 102 int dp[maxn*2][11*11*11*11 + 11]; 103 104 int num[4]; 105 int bit[4]; 106 107 int get_hash(int a, int b,int c,int d) 108 { 109 return a * bit[0] + b*bit[1] + c*bit[2] + d*bit[3]; 110 } 111 112 int solve() 113 { 114 for (int i=1;i<=cnt;i++) 115 { 116 for (int j=0;j<4;j++) 117 { 118 nxt[i][j] = tree[i].next[j]; 119 if (nxt[i][j]==-1) 120 { 121 int tmp = i; 122 while (tree[tmp].next[j]==-1 && tmp != root) 123 { 124 tmp = tree[tmp].fail; 125 } 126 nxt[i][j] = tree[tmp].next[j]; 127 if (nxt[i][j]==-1){ 128 nxt[i][j] = root; 129 } 130 } 131 } 132 } 133 134 135 int len = strlen(str); 136 memset(num, 0, sizeof(num)); 137 for (int i=0;i<len;i++) 138 { 139 num[get_idx(str[i])]++; 140 } 141 bit[0] = (num[1] + 1) * (num[2] + 1) * (num[3]+1); 142 bit[1] = (num[2] + 1) * (num[3]+1); 143 bit[2] = (num[3]+1); 144 bit[3] = 1; 145 146 memset(dp, -1, sizeof(dp)); 147 dp[root][0] = 0; 148 for (int a = 0;a<=num[0];a++) 149 for (int b = 0;b<=num[1];b++) 150 for (int c = 0;c<=num[2];c++) 151 for (int d = 0;d<=num[3];d++) 152 { 153 int s = get_hash(a, b, c,d); 154 //printf("%d %d %d %d s = %d\n",a,b,c,d,s); 155 for (int i=1;i<=cnt;i++) 156 { 157 //printf("i %d s %d dp %d\n",i,s,dp[i][s]); 158 if (dp[i][s] >=0) 159 { 160 for (int k=0;k<4;k++) 161 { 162 if (k==0 && a == num[0]) continue; 163 if (k==1 && b == num[1]) continue; 164 if (k==2 && c == num[2]) continue; 165 if (k==3 && d == num[3]) continue; 166 167 168 169 int nt = nxt[i][k]; 170 int ss = s + bit[k]; 171 //printf(" i = %d %d %d %d %d ss %d nxt %d\n",i,a,b,c,d,ss,nxt); 172 int tmp = dp[i][s] + tree[nt].count; 173 dp[nt][ss] = max(dp[nt][ss], tmp); 174 //printf(" dp = %d %d %d\n",dp[nxt][ss], dp[i][s], tree[tree[nxt].count].count); 175 176 } 177 } 178 } 179 } 180 int res = 0; 181 int st = 0; 182 for (int i=0;i<4;i++) 183 st += num[i] * bit[i]; 184 185 for (int i=1;i<=cnt;i++) 186 { 187 res = max(res, dp[i][st]); 188 } 189 return res; 190 } 191 192 193 int main() 194 { 195 196 int n; 197 int cast = 0; 198 while (scanf("%d",&n)==1 && n) 199 { 200 root = 1; 201 cnt = 0; 202 tree[++cnt].init(); 203 while (n--) 204 { 205 scanf("%s",key); 206 insert(key); 207 } 208 scanf("%s",str); 209 build_ac(); 210 //puts("kjk"); 211 printf("Case %d: %d\n",++cast,solve()); 212 } 213 return 0; 214 }