个人微信公众号:程序员宅急送
一、算法介绍
上一周我们讲了Trie树,这次的AC自动机是Trie树的一个改进版,也是一个多模式串匹配算法。
区别在于Trie树——在一堆模串中找寻符合条件的前缀(搜索引擎的关键词弹出选项)
AC自动机——找寻符合条件的后缀。
二、算法讲解
1、首先我们构建一个如图的Trie树
如下图
2、在学习KMP算法的时候,我们说到了好前缀的前缀子串和后缀子串匹配。
在Trie树中,我们借鉴这个思路,如果出现了不匹配字符,我们就去Trie树中找“可匹配的最长后缀”。
在KMP算法中,我们又next数组,在Trie树中,我们用”失败指针“。
如下图
三、代码
AC自动机的关键在于两个部分。1、构建Trie树。2、构建失败指针
#include <iostream>
#include <vector>
#include <string>
#include <map>
#include <queue>
using namespace std;
typedef struct _acNode
{
char data;
struct _acNode* children[26] = {0};//此处我假设只有26个小写字母
struct _acNode* fail = nullptr;
bool isEnding = false;
int index = 0;
}acNode;
class acAutoMachine
{
public:
acAutoMachine()
{
root = new acNode;
root->data = '/';
root->fail = root;
}
void createTrie(string src)
{
int srcLen = src.size();
acNode* curPoint = root;
for (int i = 0; i < srcLen; ++i)
{
char curCh = src[i];
int index = curCh - 'a';
if (curPoint->children[index] == 0)
{
curPoint->children[index] = new acNode;
curPoint->children[index]->data = curCh;
curPoint->children[index]->index = i;
curPoint = curPoint->children[index];
}
else
{
curPoint = curPoint->children[index];
}
}
curPoint->isEnding = true;
}
void buildFailPoint()
{
queue<acNode*> acNodeQueue;
acNodeQueue.push(root);
while (!acNodeQueue.empty())
{
acNode* p= acNodeQueue.front();
acNodeQueue.pop();
for (int i = 0; i < 26; ++i)
{
acNode* pc = p->children[i];
if (pc == 0)
continue;
if (p == root)
{
//这里注意一下,是pc->fail = root
pc->fail = root;
}
else
{
acNode* q = p->fail;
while (q != 0)
{
acNode* qc = q->children[pc->data - 'a'];//这里写q->children[i]也是可以的
if (qc != 0)
{
pc->fail = qc;
break;
}
else if (qc == 0&&q == root)
{
pc->fail = root;
break;
}
q = q->fail;
}
if (q == nullptr)
{
pc->fail == root;
}
}
acNodeQueue.push(pc);
}
}
}
void match(string src)
{
int srcLen = src.size();
acNode* p = root;
for (int i = 0;i<srcLen;++i)
{
int index = src[i] - 'a';
while (p->children[index] == nullptr && p != root)
{
//失败指针发挥作用
p = p->fail;
}
p = p->children[index];
//没有匹配的从root开始,
if (p == nullptr)
p = root;
acNode* tmp = p;
while (tmp != root)
{
if (tmp->isEnding == true)
{
int pos = i - tmp->index;
cout << "index" << pos << " length = "<< tmp->index+1 << endl;
}
tmp = tmp->fail;
}
}
}
private:
acNode* root;
};
int main()
{
acAutoMachine ac;
ac.createTrie("msb");
//ac.createTrie("cb");
ac.createTrie("sb");
ac.buildFailPoint();
ac.match("msbde");
system("pause");
return 0;
}
打印结果
index0 length = 3
index1 length = 2
请按任意键继续. . .