题目: 中文题,给你一些病毒特征码,一个大的病毒串,问每个特征码出现的次数。
分析:多字符串匹配,AC自动机。数据规模较大,使用 AC自动机。
说明:注意不是大写字母的时候,直接返回 root。
/*
题目:多字符串匹配
分析:数据规模较大,使用 AC自动机
说明:注意不是答谢字母的时候,直接返回 root,否则会 RE
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char Word[1001][51];
char Line[2000005];
#define nodesize 30006
#define dictsize 26
typedef struct node1
{
int flag;
node1* fail;
node1* next[dictsize];
}tnode;
tnode dict[nodesize];
tnode* Q[nodesize];
int ID[128];
class AC_DFA
{
private:
int size;
tnode* root;
int numb[1001];
public:
AC_DFA() {makeID();init();}
void makeID() {
for (int i = 0 ; i < dictsize ; ++ i)
ID[i+'A'] = i;
}
void init() {
memset(dict, 0, sizeof(dict));
root=NULL;size=0;root=newnode();
}
tnode* newnode() {
dict[size].fail = root;
return &dict[size ++];
}
void insert(char* word, int id) {
tnode* now = root;
for (int i = 0 ; word[i] ; ++ i) {
if (!now->next[ID[word[i]]])
now->next[ID[word[i]]] = newnode();
now = now->next[ID[word[i]]];
}now->flag = id;
}
void setfail() {
Q[0] = root; root->fail = NULL;
for (int move = 0,save = 1 ; move < save ; ++ move) {
tnode* now = Q[move];
for (int i = 0 ; i < dictsize ; ++ i)
if (now->next[i]) {
tnode* p = now->fail;
while (p && !p->next[i]) p = p->fail;
now->next[i]->fail = p?p->next[i]:root;
Q[save ++] = now->next[i];
}else now->next[i] = now==root?root:now->fail->next[i];//构建 Trie图
}
}
int query(char* line, int N) {
memset(numb, 0, sizeof(numb));
tnode* now = root;
for (int i = 0 ; line[i] ; ++ i)
if (line[i] >= 'A' && line[i] <= 'Z') {
now = now->next[ID[line[i]]];
for (tnode* p = now ; p ; p = p->fail)
numb[p->flag] ++;
}else now = root;
for (int i = 1 ; i <= N ; ++ i)
if (numb[i])
printf("%s: %d\n",Word[i],numb[i]);
return 0;
}
};
/* AC_DFA end */
int main()
{
int N;
while (scanf("%d",&N) != EOF) {
AC_DFA ac;
for (int i = 1 ; i <= N ; ++ i) {
scanf("%s",Word[i]);
ac.insert(Word[i], i);
}
ac.setfail();
scanf("%s",&Line);
ac.query(Line, N);
}
return 0;
}