题目:给你一些单词构成的词典,统计出现过的单词个数。
分析:字符串,AC自动机。比较裸的AC自动机详细可参照本空间的:AC自动机总结。
说明:注意每个单词只统计一次。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* AC_DFA define */
#define nodesize 250010 //节点个数
#define dictsize 26 //符集大小
typedef struct node1
{
int flag; //值域
node1* fail;
node1* next[dictsize];
}tnode;
tnode dict[nodesize+1];
tnode* Q[nodesize+1];
int ID[256];
class AC_DFA
{
private:
int size;
tnode* root;
public:
AC_DFA() {
makeID();
memset( dict, 0, sizeof( dict ) );
root=NULL; size=0; root=newnode();
}
void makeID() {
for ( int i = 0 ; i < 26 ; ++ i )
ID['a'+i] = i;
}
void init() {
memset( dict, 0, sizeof( dict ) );
root=NULL; size=0; root=newnode();
}
tnode* newnode() {
dict[size].fail = root;
return &dict[size ++];
}
void insert( char* word, int l ) {
tnode* now = root;
for ( int i = 0 ; i < l ; ++ i ) {
if ( !now->next[ID[word[i]]] )
now->next[ID[word[i]]] = newnode();
now = now->next[ID[word[i]]];
}now->flag ++;
}
void setfail() {
Q[0] = root; root->fail = NULL;
for ( int move = 0,save = 1 ; move < save ; ++ move ) {
tnode* now = Q[move];
for ( int i = 0 ; i < dictsize ; ++ i )
if ( now->next[i] ) {
tnode* p = now->fail;
while ( p && !p->next[i] ) p = p->fail;
now->next[i]->fail = p?p->next[i]:root;
Q[save ++] = now->next[i];
}else now->next[i] = now==root?root:now->fail->next[i];//构建 Trie图
}
}
int query( char* line, int L ) {//统计字串出现个数,可重复及交叉
int sum = 0;
tnode *temp,*now = root;
for ( int i = 0 ; i < L ; ++ i ) {
now = now->next[ID[line[i]]];
temp = now;
while (temp && temp->flag) {
sum += temp->flag;
temp->flag = 0;
temp = temp->fail;
}
}
return sum;
}
};
/* AC_DFA end */
char Word[51];
char Line[1000001];
int main()
{
int T,N;
while ( ~scanf("%d",&T) )
for ( int t = 1 ; t <= T ; ++ t ) {
AC_DFA ac;
scanf("%d",&N);
for ( int i = 1 ; i <= N ; ++ i ) {
scanf("%s",Word);
ac.insert( Word, strlen( Word ) );
}
ac.setfail();
scanf("%s",Line);
printf("%d\n",ac.query( Line, strlen(Line) ));
}
return 0;
}