ac——自动机（1.5）

最新推荐文章于 2024-04-27 00:31:58 发布

Probie Tao

最新推荐文章于 2024-04-27 00:31:58 发布

阅读量185

点赞数

分类专栏： # 字符串 (学习) 文章标签： ac——自动机

本文链接：https://blog.csdn.net/qq_43824791/article/details/90273412

版权

字符串 (学习) 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

前：

1、预备知识KMP算法，字典树。（完）

2、什么是ac自动机（完）

3、 ac自动机算法的实际作用

3、算法实现，（完）

4、代码讲解。（完）

一、预备知识：

要想学习ac自动机 必须先掌握kmp算法，因为ac自动机的思想 与kmp算法的思想相似。
此外， 还要掌握字典树， 因为ac自动机要用到字典树， 用字典树来储存单词（字符串）。

二、什么时ac自动机、 ac自动机的作用

ac自动机是一种多模匹配算法。什么时多模匹配呢？对于KMP算法，一般是给你两个字符串 str1， str2, 让你去匹配 str2 是否在str1 中出现过，这种一个字符串匹配是否在另一个字符串中出现过，叫做单模匹配。而ac自动机是当给你N个单词（字符串），然后再给你一个字符串str，让你去判断这N个单词有几个在str中出现过。
时间复杂度：
　假设有N个模式串，平均长度为L；文章长度为M。
　建立Trie树：O(NL)
　 建立fail指针：O(NL)
　 模式匹配：O(M*L) 所以，总时间复杂度为:O( (N+M)*L )。来源—>

{开始我认为这种题目用字典树就行了，然而并不是这么简单，因为给你str 不是被空格分割开的多个单词，是没有分割开的。当然你可以通过暴力将其分割开再用字典树做，但是ac自动机是一个很好的算法}。

三、算法实现

https://blog.csdn.net/creatorx/article/details/71100840 代码借鉴

https://blog.csdn.net/mobius_strip/article/details/22549517 ac自动机器讲解

http://acm.hdu.edu.cn/showproblem.php?pid=2222 模板题链接

/***********************************
    利用到后缀思想，如果该字母匹配失败，
    就把后缀当成另一个单词的前缀，
    
************************************/
#include<bits/stdc++.h>
using namespace std;
const int maxn = 1e7 + 5;
const int MAX = 10000000;
int cnt;/****统计***/

/***********树*****************/
struct node{            /**节点**/
    node *next[26];
    node *fail;
    int sum;
};

node *root;             /**树根**/
/**********************/

char key[70];
/***********队列************/
node *q[MAX];
int head,tail;

/*********新节点***************/

node *newnode;// 新节点
/*********串**************/
char pattern[maxn];
/***********************/

int N;
/************树的插入函数***********/
void Insert(char *s)
{
    node *p = root;
    for(int i = 0; s[i]; i++)
    {
        int x = s[i] - 'a';
        if(p->next[x] == NULL)
        {
            newnode=(struct node *)malloc(sizeof(struct node));
            for(int j=0;j<26;j++) newnode->next[j] = 0;
            newnode->sum = 0;newnode->fail = 0;
            p->next[x]=newnode;
        }
        p = p->next[x];
    }
    p->sum++;
}

/**************建立错误指针**************/
void build_fail_pointer()
{
    /******初始化队列， 并将树根压入队列***/
    head = 0;
    tail = 0;
    q[tail++] = root;
    /*******************************/
    node *p;
    node *temp;
    while(head < tail)/**** 直到队列为空****/
    {
        temp = q[head++];/**头部压出队列***/
        
        for(int i = 0; i <= 25; i++)// 26 个字母
        {
            if(temp->next[i])/*** 如果该字符不为空进行该操作**/
            {
                if(temp == root) /***如果 该节点为 根节点进行该操作———— 如果该单词的第一个字母ch1匹配， 但是第二个字母ch2不匹配， 就重新回到根，从ch2开始作为头在匹配**/
                {
                    temp->next[i]->fail = root;/****与根节点相连的节点 其错误节点 其错误指针指向根**/
                }/***********不是根节点进行别的操作****/
                else
                {
                    p = temp->fail;/****p 指向 temp 的 错误指针 ***/
                    while(p)/****如果 p 不为空 即temp的错误指针不为空****/
                    {
                        if(p->next[i])/****p的 下一个字符不为空******/
                        {
                            temp->next[i]->fail = p->next[i];/****如果temp 处失配 就要到最大的后缀**/
                        
                            break;/*******如果找到了最大的后缀 就跳出 结束***/
                        }
                        p = p->fail;/***********如果最 下一个字母为空的条件进行该操作 ， 该操作说明 没有已改后缀为前缀的单词， 所以要缩小后缀，*********/
                    }
                    if(p == NULL) temp->next[i]->fail = root;/***如果开始p为空后者没找到该单词的后缀 为单词的单词 就指向root,如果上一个单词都为空 fail 就指向 树根了， 没有以该后缀为前缀的单词， 所以只能 缩小后缀****/
                }
                q[tail++] = temp->next[i];/***压入该节点 ，  目的时 为了 给他的 儿子节点的fail指针 找的位置 **/
            }
        }
    }
}

/*******这是 使用ac自动机函数****/
void ac_automation(char *ch)/***传入字符串***/
{
    node *p = root;/****P指向根***/
    int len = strlen(ch);/**求该字符串的长度*/
    for(int i = 0; i < len; i++)/**循环该字符串的每一个字符**/
    {
        int x = ch[i] - 'a';/**因为 该字典树是将 字母映射到 0 25**/
        
            while(p->next[x] == NULL && p != root) p = p->fail;/****在p不指向根时，p的next不为空时进行此操作***/
        
        
        p = p->next[x];/***因为该代词匹配了 就跳到该节点***/
        
        if(!p) p = root;/***如果p 为空 （该字母的为空） p指向root**/
        
        node *temp = p;/***temp = p****/
        while(temp != root)
        {
           if(temp->sum >= 0)/** 如果大于等于 1 表示有 表示串中又该单词 ， 同时字典树中也有该单词**/
           {
               cnt += temp->sum;/******（此处可能按照题意）如果树中和串中都有该单词 就 加上树中该单词的数目********/
               temp->sum = -1;/************（此处 看题意）数中该单词就被删除了*****/
           }
           else break;/****小于0 的情况就是 之前有该单词但是被删除了，  所以就要跳出***/
           
           temp = temp->fail;/**查完该单词后 在 找有没有以单词是该单词的后缀（即一该单词后缀 为单词的单词） **/
        }
    }
}
int main()
{
    int T;
    scanf("%d",&T);
    while(T--)
    {
        root=(struct node *)malloc(sizeof(struct node));
        for(int j=0;j<26;j++) root->next[j] = 0;
        root->fail = 0;
        root->sum = 0;
        scanf("%d",&N);
        getchar();
        for(int i = 1; i <= N; i++)
        {
            gets(key);
            Insert(key);
        }
        gets(pattern);// 串
        cnt = 0;
        build_fail_pointer();
        ac_automation(pattern);
        printf("%d\n",cnt);
    }
    return 0;
}

上篇代码看不懂（可以看这篇），这篇是重新解读，和修改后的：

#include <iostream>
#include<bits/stdc++.h>
using namespace std;

struct node
{
    struct node *next[26];
    int sum;
    struct node *fail;
};
typedef struct node *Node;
/******初始化函数和插入函数*********/
void Init(Node *p)
{
    for(int i = 0;i < 26;i++)
    {
        p->next[i] = NULL;
    }
    p->sum = 0;
    p->fail = NULL;
}
void Insert(char *str, Node Root)
{
    Node p = Root;
    int len = strlen(str);
    for(int i = 0;i < len;i++)
    {
        if(p->next[i] == NULL)
        {
            p->next[i] = (Node)malloc(sizeof(node));
            Init(p->next[i]);
            
        }
        p = p->next[i];
    }
    p->sum++;
}
/**************fail 指针建立******************/
void build_fail_pointer(Node Root)
{
    queue<Node>que;
    que.push(Root);//先将根压入
    Node temp, p;
    while(!que.empty())
    {
        temp = que.front();
        que.pop();
        for(int i = 0;i < 26;i++)
        {
            if(temp == Root)//与根相连的 fail 指向根(因为就一个字母没有后缀)
            {
                temp->next[i]->fail = Root;
            }
            else 
            {
                p = temp->fail;
                while(p)// p 肯定不为空 他至少 指向 Root
                {
                    if(p->next[i] != NULL)//如果 不为空 表示含有以改后缀为 前缀的单词
                    {
                        temp->next[i]->fail = p->next[i];
                        break;
                    }
                    else p = p->fail;//否则 减小后缀长度
                    
                }
                if(p == NULL) temp->next[i]->fail = Root; // 如果进行完上一个循环P为空， 表示没有以该单词所有后缀为前缀的单词
                
            }
            que.push(temp->next[i]);
        }
    }
}
/******************ac_自动机使用函数***************/
void ac_automation(char *str, Node Root)
{
    node *p = Root;
    int len = strlen(str);
    for(int i = 0;i <len;i++)// 以 str[i]结尾的 单词.
    {
        int ch = str[i] - 'a';
            while(p->next[ch] == NULL && p != NULL) p = p->fail;/**假设上次 i = j, 这次 i = j+1 如果 接着上次结尾 有后缀（只要有就行 不管是不是单词终点）， 就继续, 没有就缩减后缀（此处可以当成后缀）**/
        p = p->next[ch];// p要跳到该字母 所在的节点
        if(!p) p = Root;//未解读
        Node temp = p;
        while(temp!= Root)
        {
            if(temp->sum >= 0)
            {
                cnt+=temp->sum;
                temp->sum = -1;// 表示 把该单词删除
            }
            else break; //小于 就表示被访问过， 已经删除了。
            
            temp = temp->fail;// 缩减后缀
        
        }
    }
}
/*********************************/

int main()
{
    return 0;
}