Step1 Problem:
给你 n 个单词,m 个文本串,对于每个文本串,你需要知道哪些单词在文本串中出现过。最后输出总的文本串有单词出现过的文本串的个数。
Step2 Ideas:
卡内存,字典树不能用链表,需要用数组模拟
Step3 Code:
#include<bits/stdc++.h>
using namespace std;
const int N = 1e5+5;
struct node
{
int data, id;
int next[128], fail;
};
node a[N];
char s[10055];
bool vis[505];
int top = 0;
int creat_kong()
{
node *root = &a[top++];
root->id = 0;
for(int i = 0; i < 128; i++)
root->next[i] = -1;
root->fail = -1;
return top-1;
}
void Insert(int root, char *s, int id)
{
int len = strlen(s);
int p = root;
for(int i = 0; i < len; i++)
{
int tmp = s[i];
if(a[p].next[tmp] == -1) a[p].next[tmp] = creat_kong();
p = a[p].next[tmp];
}
a[p].id = id;
}
void get_fail(int root)
{
queue<int> q;
q.push(root);
while(!q.empty())
{
int p = q.front(); q.pop();
for(int i = 0; i < 128; i++)
{
if(a[p].next[i] == -1) continue;
if(p == root) a[a[p].next[i]].fail = root;
else {
int tmp = a[p].fail;
while(tmp != -1 && a[tmp].next[i] == -1) tmp = a[tmp].fail;
if(tmp == -1) a[a[p].next[i]].fail = root;
else a[a[p].next[i]].fail = a[tmp].next[i];
}
q.push(a[p].next[i]);
}
}
}
int flag;
void mat(int root, char *s)
{
int p = root;
int len = strlen(s);
for(int i = 0; i < len; i++)
{
int tmp = s[i];
while(p != root && a[p].next[tmp] == -1) p = a[p].fail;
if(p == root && a[p].next[tmp] == -1) continue;
p = a[p].next[tmp];
int t = p;
while(t != root)
{
if(a[t].id && vis[a[t].id]) break;
if(a[t].id && !vis[a[t].id]) {
flag = 1;
vis[a[t].id] = 1;//出现过的单词标记下
}
t = a[t].fail;
}
}
}
int main()
{
int n, m;
char s1[250];
scanf("%d", &n);
top = 0;
int root = creat_kong();
for(int i = 1; i <= n; i++)
{
scanf("%s", s1);
Insert(root, s1, i);
}
get_fail(root);
scanf("%d", &m);
int ans = 0;
for(int i = 1; i <= m; i++)
{
memset(vis, 0, sizeof(vis));
flag = 0;
scanf("%s", s);
mat(root, s);
if(flag) {//代表该文本串存在单词
printf("web %d:", i);
for(int i = 1; i <= n; i++)
if(vis[i]) printf(" %d", i);
printf("\n");
ans++;
}
}
printf("total: %d\n", ans);
return 0;
}