问题
hdu 2222 Keywords Search - https://acm.hdu.edu.cn/showproblem.php?pid=2222
分析
- 多模式匹配,AC自动机
- trie树
- fail树
- last树
- 关键词
- abcdef
- bcdef
- cdef
- def
- ef
- f
- 当abcdef完全匹配时,bcdef、cdef、def、ef、f肯定也完全匹配
- 但用一对一的关系来描述更简单,即:abcdef → \rightarrow → bcdef、bcdef → \rightarrow → cdef、cdef → \rightarrow → def、def → \rightarrow → ef、ef → \rightarrow → f
- 因此失配指针采用一对一的关系来描述,且形成一棵树;也就是说,任何一个节点都可以顺着失配指针回到根
- 例如:当关键词abcdef的前缀abcde完全匹配时,则关键词bcdef、cdef、def、ef的前缀也与之匹配,则失配指针用abcde → \rightarrow → bcde、bcde → \rightarrow → cde、cde → \rightarrow → de、de → \rightarrow → e的方式来描述
- 注意:上述的描述中,长度是单调递减的
- 已匹配部分是一个串,这个串的某些后缀是关键词的前缀,这些与关键词的前缀匹配的后缀中最长的,例如:
- 当scde匹配时,后缀cde、de、e是关键词的前缀,最长的是:cde
代码
- 【202MS】
/* hdu 2222 Keywords Search */
#include<bits/stdc++.h>
using namespace std;
const int MXN = 500010, MXL = 1000010, SIZE = 26;
int n;
int trie[MXN][SIZE], tot; // trie树
int f[MXN]; // 失配指针
int vis[MXN]; // 按文本遍历trie的标记
int tail[MXN]; // 模式串的尾结点
char txt[MXL], key[55];
int insert(char s[]){
int i = 0, j, p = 0;
while(s[i]){
j = s[i++] - 'a';
if(trie[p][j] == 0) trie[p][j] = ++tot, vis[tot] = 0, memset(trie[tot], 0, sizeof trie[tot]);
p = trie[p][j];
}
return p; // 模式串的尾结点
}
void getFail(){
queue<int> q;
int u, v;
f[0] = 0;
for(int i = 0; i < SIZE; ++i){
u = trie[0][i];
if(u) f[u] = 0, q.push(u);
}
while(q.size()){
u = q.front(), q.pop();
for(int i = 0; i < SIZE; ++i){
v = trie[u][i];
if(!v){// 优化
trie[u][i] = trie[f[u]][i];
continue;
}
q.push(v);
int pre = f[u];
while(pre && !trie[pre][i]) pre = f[pre];
f[v] = trie[pre][i];
}
}
}
void find(){
int i = 0, u = 0, v;
while(txt[i]){
u = trie[u][txt[i++] - 'a'];
if(vis[u]) continue;
vis[u] = 1, v = f[u];
while(!vis[v]) vis[v] = 1, v = f[v];
}
}
int main(){
int t, ans;
scanf("%d", &t);
while(t--){
scanf("%d", &n);
tot = 0, vis[0] = 1, ans = 0;
memset(trie[0], 0, sizeof trie[0]);
for(int i = 1; i <= n; ++i) scanf("%s", key), tail[i] = insert(key);
getFail();
scanf("%s", txt), find();
for(int i = 1; i <= n; ++i) if(vis[tail[i]]) ++ans;
printf("%d\n", ans);
}
return 0;
}
- 【249MS】
/* hdu 2222 Keywords Search */
#include<bits/stdc++.h>
using namespace std;
const int MXN = 500010, MXL = 1000010, SIZE = 26;
int n;
int trie[MXN][SIZE], tot; // trie树
int f[MXN]; // 失配指针
int last[MXN]; // last[u]:节点u的后缀中能构成的最长单词的节点,整体看是一个链表
int val[MXN]; // val[u]:节点u是否为单词,非0为单词
char txt[MXL], key[55];
void insert(char s[]){
int i = 0, j, p = 0;
while(s[i]){
j = s[i++] - 'a';
if(trie[p][j] == 0) trie[p][j] = ++tot, val[tot] = 0, memset(trie[tot], 0, sizeof trie[tot]);
p = trie[p][j];
}
++val[p]; // 如关键词"she"在列表中出现2次,则val[] = 2,即一次匹配2个关键词
}
void getFail(){
queue<int> q;
int u, v;
f[0] = 0, last[0] = 0;
for(int i = 0; i < SIZE; ++i){
u = trie[0][i];
if(u) f[u] = 0, last[u] = 0, q.push(u);
}
while(q.size()){
u = q.front(), q.pop();
for(int i = 0; i < SIZE; ++i){
v = trie[u][i];
if(!v){ // 优化
trie[u][i] = trie[f[u]][i];
continue;
}
q.push(v);
int pre = f[u];
while(pre && !trie[pre][i]) pre = f[pre];
f[v] = trie[pre][i];
last[v] = val[f[v]]?f[v]:last[f[v]];
}
}
}
int find(){
int res = 0, i = 0, j, u = 0, v;
while(txt[i]){
j = txt[i++] - 'a';
u = trie[u][j], v = last[u];
while(v && val[v]) res += val[v], val[v] = 0, v = last[v];
if(val[u]) res += val[u], val[u] = 0;
}
return res;
}
int main(){
int t;
scanf("%d", &t);
while(t--){
scanf("%d", &n);
tot = 0, val[0] = 0;
memset(trie[0], 0, sizeof trie[0]);
for(int i = 1; i <= n; ++i) scanf("%s", key), insert(key);
getFail();
scanf("%s", txt);
printf("%d\n", find());
}
return 0;
}