AC自动机

最新推荐文章于 2021-03-28 21:28:00 发布

zlppassion

最新推荐文章于 2021-03-28 21:28:00 发布

阅读量101

点赞数 1

分类专栏： string

本文链接：https://blog.csdn.net/zlppassion/article/details/84887640

版权

string 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

模板题hdu 2222http://acm.hdu.edu.cn/showproblem.php?pid=2222

#include <iostream>
#include<cstring>
#include<cstdio>
#include<cstdlib>
#include<queue>
#include<algorithm>
using namespace std;
struct node
{
    int cnt;//是否为该单词的最后一个节点
    node *fail;//失败指针
    node *nextt[130];
    node()
    {
        cnt=0;
        fail=0;
        memset(nextt,NULL,sizeof(nextt));
    }
};//队列，方便用BFS构造失败指针
char s[1000005];//主字符串
char keyword[55];//需要查找的单词
int head,tail;
node *root;//头节点
/*void init(node *root)
{
    root->cnt=0;
    root->fail=NULL;
    for(int i=0;i<130;i++)
        root->nextt[i]=NULL;
}*/
void build_trie(char *keyword)
{
   // node *p,*q;
   node *p=root;
    int i,v;
    int len=strlen(keyword);
    for(i=0;i<len;i++)
    {
       // v=keyword[i]-'a';
       v=keyword[i];//注意这里，想要这样用的话，把nextt数组开大一点即可，例如130
        if(p->nextt[v]==NULL)
        {
            //q=(struct ndoe*)malloc(sizeof(node));
 
           // q=(struct node*)malloc(sizeof(node));
           // q=new node;
            //init(q);
            p->nextt[v]=new node();//节点链接
        }
        p=p->nextt[v];
    }
    p->cnt++;//单词最后一个节点cnt++,代表一个单词
}
void build_ac_fail(node *root)
{
    queue <node *> que;
    root->fail=NULL;
    que.push(root);
    while(!que.empty())
    {
        node *temp=que.front();
        que.pop();
        node *p=NULL;
        for(int i=0; i<130; i++)
        {
            if(temp->nextt[i]!=NULL)
            {
                if(temp==root) temp->nextt[i]->fail=root;
                else
                {
                    p=temp->fail;
                    while(p!=NULL)
                    {
                        if(p->nextt[i]!=NULL)
                        {
                            temp->nextt[i]->fail=p->nextt[i];
                            break;
                        }
                        p=p->fail;
                    }
                    if(p==NULL)
                        temp->nextt[i]->fail=root;
                }
                que.push(temp->nextt[i]);
            }
        }
    }
 
 
}
/*void build_ac_fail(node *root)
{
    head=0,tail=0;//队列头、尾指针
    queuet[head++]=root;//先将root入队
    while(head!=tail)
    {
        node *p=NULL;
        node *temp=queuet[tail++];//弹出队头结点
        for(int i=0;i<130;i++)
        {
            if(temp->nextt[i]!=NULL)//找到实际存在的字符节点
            {
                //temp->nextt[i] 为该节点，temp为其父节点
                if(temp==root)//若是第一层中的字符节点，则把该节点指针指向root
                    temp->nextt[i]->fail=root;
                else
                    {
                        //依次回溯该节点的父节点的失败指针直到某节点的nextt[i]与该节点相同
                        //则把该节点的失败指针指向该nextt[i]节点
                        //若回溯到root都没有找到，则该节点的失败指针指向root
                        p=temp->fail;//把该节点的父节点的失败指针给p
                        while(p!=NULL)
                        {
                            if(p->nextt[i]!=NULL)
                            {
                                temp->nextt[i]->fail=p->nextt[i];
                                break;
                            }
                            p=p->fail;
                        }
                        //让该节点的失败指针也指向root
                        if(p==NULL)
                            temp->nextt[i]->fail=root;
                    }
                    queuet[head++]=temp->nextt[i];//每处理一个节点，都让该节点依次入队
            }
        }
    }
*/
int query(node *root)//匹配
{
    //i为主串指针，p为模式串指针
    int i,v,countt=0;
    node *p=root;
    int len=strlen(s);
    for(i=0;i<len;i++)
    {
       // v=s[i]-'a';
       v=s[i];
        //由失败指针回溯查找，判断s[i]是否存在于trie树中
        while(p->nextt[v]==NULL&&p!=root)
            p=p->fail;
        p=p->nextt[v];//找到后p指针指向该节点
        if(p==NULL)//若指针返回为空，则没有找到与之匹配的字符
            p=root;
        node *temp=p;//匹配该节点后，沿其失败指针回溯，判断其它节点是否匹配
        while(temp!=root)//匹配结束控制
        {
            if(temp->cnt>=0)//判断该节点是否被访问
            {
                countt+=temp->cnt;//由于cnt初始化为0，所以cnt>0时才统计了单词的个数
                temp->cnt=-1;//标记已访问过
            }
            else//节点已经访问，退出循环
                break;
            temp=temp->fail;//回溯，失败指针，继续寻找下一个节点
        }
    }
    return countt;
}
int main()
{
   int T,n;
   cin>>T;
   while(T--)
   {
       /*root=(struct node*)malloc(sizeof(node));
       init(root);*/
       root=new node();
       cin>>n;
       for(int i=0;i<n;i++)
       {
           scanf("\n%s",keyword);
           build_trie(keyword);
       }
       build_ac_fail(root);
       scanf("\n%s",s);
       printf("%d\n",query(root));
   }
    return 0;
}

hdu2896http://acm.hdu.edu.cn/showproblem.php?pid=2896

/*依次按如下格式输出按网站编号从小到大输出，带病毒的网站编号和包含病毒编号，每行一个含毒网站信息。
web 网站编号: 病毒编号 病毒编号 …
冒号后有一个空格，病毒编号按从小到大排列，两个病毒编号之间用一个空格隔开，如果一个网站包含病毒，病毒数不会超过3个。
最后一行输出统计信息，如下格式
total: 带病毒网站数
冒号后有一个空格。
Sample Input
3
aaa
bbb
ccc
2
aaabbbccc
bbaacc
Sample Output
web 1: 1 2 3
total: 1*/
#include <iostream>
#include<algorithm>
#include<cstring>
#include<cstdio>
#include<queue>
using namespace std;
const int maxn=10010;//在hdu2896这道题中，我的初始化出现了问题
struct node
{
    int cnt;//是否为该单词的最后一个节点
    int code;
    node *fail;//失败指针
    node *nextt[130];
    node()
    {
        fail=NULL;
        code=0;
        cnt=0;
        memset(nextt,NULL,sizeof(nextt));
    }
 
};//队列，方便用BFS构造失败指针
 
//需要查找的单词
 
int ans[5];//标记主串中含有病毒的编号
 
/*void init(node *root)//我用这个函数，提交超内存
{
    root->cnt=0;
    root->code=0;
    root->fail=NULL;
    for(int i=0; i<130; i++)//注意这里是大小写字母均存在，用130 这个范围,以后如果要谜面这种情况，直接用构造函数，sizeof(nextt)
        root->nextt[i]=NULL;
}*/
void build_trie(node*root,char *keyword,int x)
{
    node *p=root;
    int i=0,v;
   int len=strlen(keyword);
   /*while(keyword[i])
    {
          v=keyword[i];
        if(p->nextt[v]==NULL)
        {
           p->nextt[v]=new node();
        }
        p=p->nextt[v];
        i++;
    }*/
 
   for(i=0,p=root; i<len; i++)
    {
        v=keyword[i];//这个地方很巧妙。可以不用向上次那样keyword[i]-'a'
        if(p->nextt[v]==NULL)
        {
        //q=new node;
          //  init(q);
           // p->nextt[v]=q;
            p->nextt[v]=new node();//节点链接
        }
        p=p->nextt[v];
    }
    p->code=x;
    // p->cnt=1;
    p->cnt++;//单词最后一个节点cnt++,代表一个单词
}
void build_ac_fail(node *root)
{
    queue <node *> que;
    root->fail=NULL;
    que.push(root);
    while(!que.empty())
    {
        node *temp=que.front();
        que.pop();
        node *p=NULL;
        for(int i=0; i<130; i++)
        {
            if(temp->nextt[i]!=NULL)
            {
                if(temp==root) temp->nextt[i]->fail=root;
                else
                {
                    p=temp->fail;
                    while(p!=NULL)
                    {
                        if(p->nextt[i]!=NULL)
                        {
                            temp->nextt[i]->fail=p->nextt[i];
                            break;
                        }
                        p=p->fail;
                    }
                    if(p==NULL)
                        temp->nextt[i]->fail=root;
                }
                que.push(temp->nextt[i]);
            }
        }
    }
 
 
}
int query(node *root,char *str)
{
    int i=0,cnnt=0,index;
    node *p=root;
    while(str[i])
    {
        index=str[i];
        while(p->nextt[index]==NULL&&p!=root)
            p=p->fail;
        p=p->nextt[index];
        if(p==NULL) p=root;
        node *temp=p;
        while(temp!=root&&temp->code)
        {
            ans[cnnt]=temp->code;
            cnnt+=p->cnt;
            temp=temp->fail;
        }
        i++;
    }
    return cnnt;
}
 
int main()
{
    int T;
    char s[maxn];//主字符串
    char keyword[205];
    while(scanf("%d",&T)!=EOF)
    {
       node * root=new node();
       /* node *root=new node;
        init(root);*/
        for(int i=1; i<=T; i++)
        {
            scanf("%s",keyword);
 
            build_trie(root,keyword,i);
        }
        build_ac_fail(root);
        int sum=0;
        int M;
        scanf("%d",&M);
        for(int i=1; i<=M; i++)
        {
            scanf("%s",s);
            int flag=0;
            int num=query(root,s);
           if(num)
            {
                flag=true;
                printf("web %d:",i);
                sort(ans,ans+num);
                for(int j=0; j<num; j++)
                    printf(" %d",ans[j]);
                printf("\n");
            }
            if(flag)
                sum++;
        }
        printf("total: %d\n",sum);
    }
 
    return 0;

}

hdu3065

/*Input
第一行，一个整数N（1<=N<=1000），表示病毒特征码的个数。
接下来N行，每行表示一个病毒特征码，特征码字符串长度在1—50之间，并且只包含“英文大写字符”。任意两个病毒特征码，不会完全相同。
在这之后一行，表示“万恶之源”网站源码，源码字符串长度在2000000之内。字符串中字符都是ASCII码可见字符（不包括回车）。
Output
按以下格式每行一个，输出每个病毒出现次数。未出现的病毒不需要输出。
病毒特征码: 出现次数
冒号后有一个空格，按病毒特征码的输入顺序进行输出。 Sample Input
3
AA
BB
CC
ooxxCC%dAAAoen....END
Sample Output
AA: 2
CC: 1



Hint
Hit：
题目描述中没有被提及的所有情况都应该进行考虑。比如两个病毒特征码可能有相互包含或者有重叠的特征码段。
计数策略也可一定程度上从Sample中推测。

       */
#include <iostream>
#include<queue>
#include<cstring>
#include<cstdio>
#include<cstdlib>
#include<map>
#include<string>
using namespace std;
const int maxn=2000005;
//map<string,int> mp;
struct node
{
    int cnt,index;
    //char code[1005][55];
  //string code;
    node *fail;
    node *nextt[27];
    node()
    {
        cnt=0;
        fail=NULL;
        index=0;
        memset(nextt,NULL,sizeof(nextt));
     //  code="";
    }
};
//char code[1005][55];
//char s[maxn];
//char keyword[55];
int ans[1005];
//node *root;
void build_trie(node *root,char keyword[],int inde)
{
    node *p=root;
    int i,v;
    int len=strlen(keyword);
    for( i=0;i<len;i++)
    {
        v=keyword[i]-'A';
        if(p->nextt[v]==NULL)
            p->nextt[v]=new node();
        p=p->nextt[v];
    }
    p->index=inde;
    //p->code=keyword;
   // np[p->code]=p->index;
    p->cnt++;
}
void build_ac_fail(node *root)
{
    queue<node *> que;
    root->fail=NULL;
    que.push(root);
    while(!que.empty())
    {
        node *temp=que.front();
        que.pop();
        node *p=NULL;
        for(int i=0;i<27;i++)
        {
            if(temp->nextt[i]!=NULL)
            {
                if(temp==root)
                    temp->nextt[i]->fail=root;
                else
                {

                    p=temp->fail;
                    while(p!=NULL)
                    {
                        if(p->nextt[i]!=NULL)
                        {
                            temp->nextt[i]->fail=p->nextt[i];
                            break;
                        }
                         p=p->fail;
                    }
                    if(p==NULL)
                        temp->nextt[i]->fail=root;
                }
                que.push(temp->nextt[i]);

            }
        }
    }
}
void query(node *root,char * s)
{
    int i,v;
    node *p=root;
    int len=strlen(s);
    for(i=0;i<len;i++)
    {
        if(s[i]<'A'||s[i]>'Z')//因为特征码全为大写字母，所以可以令除了大写字母的字符都为‘Z'+1
            s[i]='Z'+1;
        v=s[i]-'A';
        while(p->nextt[v]==NULL&&p!=root)
            p=p->fail;
        p=p->nextt[v];
        if(p==NULL)
            p=root;
        node* temp=p;
        while(temp!=root)
        {
            if(temp->index) 
            {
                //temp->cnt++;
                ans[temp->index]=temp->cnt;
                temp->cnt++;
            }
        /*  if(temp->cnt>=0)
          {
               mp[temp->code]++;
          }*/

            temp=temp->fail;
            /*f(temp->cnt>=0)
            {
                              mp[]+=1;
                temp->cnt=-1;
            }
            else
                break;
            temp=temp->fail;*/
        }
    }
}
int main()
{
    int T;
    char s[maxn];
    char keyword[1005][55];
    while(scanf("%d",&T)!=EOF)
    {
        node *root=new node();
        memset(ans,0,sizeof(ans));
        for(int i=1;i<=T;i++)
        {
            scanf("%s",keyword[i]);
            build_trie(root,keyword[i],i);
        }
        build_ac_fail(root);
        scanf("%s",s);
        query(root,s);
        for(int i=1;i<=T;i++)
        {
            if(ans[i]!=0)
                cout<<keyword[i]<<": "<<ans[i]<<endl;

        }

    }
    return 0;
}