AC自动机
一. 原理
AC自动机是在Trie树上构建的,比Trie树要多一个失配时的失败指针。
构建失败指针需要树的广度遍历。
每个节点(curr)的fail 指针,是跟前其父节点的fail指针构建的。
- 如果父节点(father)失败指针指向的节点,它(father.fail)的孩子里有当前节点(curr)的字符,则curr的失败指针指向father.fail的与curr字符匹配的孩子
- 如果父节点(father)失败指针指向的节点,它(father.fail)的孩子里没有当前节点(curr)的字符,则继续沿着father.fail.fail指针指向的方向移动,重复进行检查新的节点的孩子与curr对应的字符的匹配的情况,直到要检查的节点变为根。(变为根时,curr的失败指针就指向根)
- 如果curr是root的子节点(不是孙节点/重重孙节点等),则curr的失败节点直接指向根。
构建好后,AC自动机就有两种状态,两种转移:
状态:
- 根节点(起始状态)
- 非根节点
转移:
- 当前字符匹配(沿child指针转移)
- 当前字符不匹配(沿fail指针转移)
二. 代码
//trie tree
#include <vector>
#include <string>
#include <iostream>
#include <queue>
using namespace std;
struct TrieT_node{
bool mark;
TrieT_node* final;
TrieT_node* father;
TrieT_node* childs[26];
int p;
TrieT_node(){
for (int i = 0; i < 26; i++) {
childs[i] = NULL;
}
mark = false;
father = this;
}
};
struct TrieT{
TrieT_node root;
void insert(const string& s){
TrieT_node* curr = &root;
for (int i = 0; i < s.size(); i++) {
int p = s[i]-'a';
if(curr->childs[p] == NULL){
curr->childs[p] = new TrieT_node();
curr->childs[p]->father = curr;
curr->childs[p]->p = p;
curr->childs[p]->final = NULL;
curr = curr->childs[p];
}else{
curr = curr->childs[p];
}
}
curr->mark = true;
}
bool search(const string &s){
TrieT_node* curr = &root;
for (int i = 0; i < s.size(); i++) {
int p = s[i]-'a';
if(curr->childs[p] == NULL){
return false;
}else{
curr = curr->childs[p];
}
}
return curr->mark;
}
void build(){
//build AC automaton
//init
root.final = &root;
queue<TrieT_node*> myqueue;
for (int i = 0; i < 26; i++) {
if(root.childs[i] != NULL){
root.childs[i]->final = &root;
myqueue.push(root.childs[i]);
}
}
//BFS
while (!myqueue.empty()){
TrieT_node* curr = myqueue.front();
myqueue.pop();
TrieT_node* trans_father = curr->father->final;
while (curr->final == NULL){
int p = curr->p;
if(trans_father->childs[p] != NULL){
curr->final = trans_father->childs[p];
} else{
if(trans_father == &root){
curr->final = &root;
} else{
trans_father = trans_father->final;
}
}
}
for (int i = 0; i < 26; i++) {
if (curr->childs[i] != NULL) {
myqueue.push(curr->childs[i]);
}
}
}
}
bool solve(const string& s){
TrieT_node* curr = &root;
for (int i = 0; i < s.size(); ) {
int p = s[i]-'a';
if(curr->childs[p] == NULL){
if(curr == &root){
i++;
}
curr = curr->final;
} else{
i++;
curr = curr->childs[p];
if(curr->mark){
return true;
}
}
}
return false;
}
};
int main(){
TrieT t;
string s = "abc";
t.insert(s);
t.insert("bcf");
t.insert("abcfg");
t.build();
cout<<t.solve("abc")<<endl;
cout<<t.solve("saweq")<<endl;
cout<<t.solve("bcf")<<endl;
cout<<t.solve("ababc")<<endl;
cout<<t.solve("ab")<<endl;
// cout<<t.solve("ab")<<endl;
return 0;
}