AC自动机(多模式串匹配)

示范

虚线部分代表fail指针

AC自动机模板题链接
静态数组版本(推荐:代码量少易写,更快易写):

#include <bits/stdc++.h>
using namespace std;

class Aho_Corasick
{
public:
	void init()
	{
		idx = 0;
		memset(cnt, 0, sizeof cnt);
		memset(son, 0, sizeof son);
		memset(fail, 0, sizeof fail);
	}

	/*存储结构:Trie树*/
	void insert(const char* str)
	{
		int p = 0;
		int len = strlen(str);
		for (int i = 0; i < len; i++)
		{
			int u = str[i] - 'a';
			if (!son[p][u]) son[p][u] = ++idx;
			p = son[p][u];
		}
		cnt[p]++;
	}

	void build_fail()
	{
		queue<int> q;
		for (int i = 0; i < childCnt; i++) {
			if (son[0][i]) q.push(son[0][i]);
		}
		while (!q.empty())
		{
			int t = q.front(); q.pop();
			for (int i = 0; i < childCnt; i++)
			{
				//类似于状态压缩,不至于每次fail指针跳转很多次,只需每次跳转一次,相当于构建了图
				if (!son[t][i]) son[t][i] = son[fail[t]][i];
				else
				{
					fail[son[t][i]] = son[fail[t]][i];
					q.push(son[t][i]);
				}
			}
		}
	}

	int query(const char* str)
	{
		int p = 0, ans = 0;
		int len = strlen(str);
		for (int i = 0; i < len; i++)
		{
			p = son[p][str[i] - 'a'];
			int tmp = p;
			while (tmp)
			{
				/*写法1:当跳到以前找到的节点就直接停止不用等到root*/
				if (cnt[tmp] >= 0)
				{
					ans += cnt[tmp];
					cnt[tmp] = -1;
				}
				else break;
				/*写法2:
				ans += cnt[temp];
				cnt[temp] = 0;*/
				tmp = fail[tmp];
			}
		}
		return ans;
	}

private:
	static const int childCnt = 26;
	static const int N = 500005;
	int son[N][childCnt], cnt[N], idx;  // 0既是root又是null
	int fail[N];
};

const int maxn = 1e7 + 5;
char key[70];
char pattern[maxn];
int N;
Aho_Corasick ac;

int main()
{
	int T;
	scanf("%d", &T);
	while (T--)
	{
		ac.init();
		scanf("%d", &N);
		getchar();
		for (int i = 1; i <= N; i++)
		{
			scanf("%s", key);
			ac.insert(key);
		}
		scanf("%s", pattern);
		ac.build_fail();
		printf("%d\n", ac.query(pattern));
	}
	return 0;
}

指针优化版本:

#include <bits/stdc++.h>
using namespace std;

class Aho_Corasick
{
public:
	Aho_Corasick()
	{
		root = new Node();
	}

	/*存储结构:Trie树*/
	void insert(const char* word)
	{
		Node* tmp = root;

		for (int i = 0; i < strlen(word); i++)
		{
			int c = word[i] - 'a';
			if (!tmp->child[c])  //不存在就创建该节点
				tmp->child[c] = new Node();
			tmp = tmp->child[c];
		}
		tmp->sum++;
	}

	void build_fail()
	{
		queue<Node*> q;
		/*root的儿子fail指针均指向root*/
		for (int i = 0; i < childCnt; i++)
		{
			if (!root->child[i])
			{
				root->child[i] = root; //补上不存在的边
				continue;
			}
			root->child[i]->fail = root;
			q.push(root->child[i]);
		}

		while (!q.empty())
		{
			Node* cur = q.front();
			q.pop();

			for (int i = 0; i < 26; i++)
			{
				if (cur->child[i])
				{
					cur->child[i]->fail = cur->fail->child[i];
					q.push(cur->child[i]);
				} /*和静态数组版本相似补边,状态压缩*/
				else cur->child[i] = cur->fail->child[i];

			}
		}
	}

	int query(char* ch)
	{
		int ans = 0;
		Node* p = root;
		int len = strlen(ch);

		for (int i = 0; i < len; i++)
		{
			int v = ch[i] - 'a';
			p = p->child[v];
			Node* tmp = p;
			while (tmp != root)
			{
				if (tmp->sum >= 0)
				{
					ans += tmp->sum;
					tmp->sum = -1;
				}
				else break;
				/*另一写法
				ans += tmp->sum;
				tmp->sum = 0;*/
				tmp = tmp->fail;
			}
		}
		return ans;
	}

private:
	static const int childCnt = 26;
	struct Node
	{
		Node* fail;        //指向当前节点的最长后缀模板串的节点
		int sum;
		Node* child[childCnt];
		Node()
		{
			fail = NULL;
			sum = 0;
			memset(child, NULL, sizeof child);
		}
	};
	Node* root;
};

const int maxn = 1e7 + 5;
char key[70];
char pattern[maxn];
int N;

int main()
{
	int T;
	scanf("%d", &T);
	while (T--)
	{
		Aho_Corasick ac;
		scanf("%d", &N);
		getchar();
		for (int i = 1; i <= N; i++)
		{
			scanf("%s", key);
			ac.insert(key);
		}
		scanf("%s", pattern);
		ac.build_fail();
		printf("%d\n", ac.query(pattern));
	}
	return 0;
}

指针无优化版本:

#include <bits/stdc++.h>
using namespace std;

class Aho_Corasick
{
public:
	Aho_Corasick()
	{
		root = new Node();
	}

	/*存储结构:Trie树*/
	void insert(const char* word)
	{
		Node* tmp = root;

		for (int i = 0; i < strlen(word); i++)
		{
			int c = word[i] - 'a';
			if (!tmp->child[c])  //不存在就创建该节点
				tmp->child[c] = new Node();
			tmp = tmp->child[c];
		}
		tmp->sum++;
	}

	void build_fail()
	{
		Node* p = root;
		queue<Node*> q;
		/*root的儿子fail指针均指向root*/
		for (int i = 0; i < childCnt; i++)
		{
			if (!p->child[i])continue;
			p->child[i]->fail = root;
			q.push(p->child[i]);
		}

		/*fail指针构建规则:首先在fatherFail指针寻找当前字符是否存在
		如果不存在则继续跳fail指针直到直到该字符或者fatherFail为空
		如果fatherFail最后为空则设置当前节点的fail为root
		否则就设置为fatherFail->child[i]*/
		while (!q.empty())
		{
			Node* cur = q.front();
			q.pop();

			for (int i = 0; i < 26; i++)
			{
				if (cur->child[i])
				{
					p = cur->fail;
					while (p)
					{
						if (p->child[i])
						{
							cur->child[i]->fail = p->child[i];
							break;
						}
						p = p->fail;
					}
					if (!p)  cur->child[i]->fail = root;
					q.push(cur->child[i]);
				}

			}
		}
	}

	int query(char* ch)
	{
		int ans = 0;
		Node* p = root;
		int len = strlen(ch);

		for (int i = 0; i < len; i++)
		{
			int v = ch[i] - 'a';
			/*如果不存在就跳fail直到找到或者跳到root(root->faill ==NULL)*/
			while (!p->child[v] && p != root)p = p->fail;
			p = p->child[v];
			if (!p)p = root;

			Node* tmp = p;
			while (tmp != root)
			{
				if (tmp->sum >= 0)
				{
					ans += tmp->sum;
					tmp->sum = -1;  // 防止重复计数
				}
				else break;
				/*另一写法
				ans += tmp->sum;
				tmp->sum = 0;*/
				tmp = tmp->fail;
			}
		}
		return ans;
	}

private:
	static const int childCnt = 26;
	struct Node
	{
		Node* fail;        //指向当前节点的最长后缀模板串的节点
		int sum;
		Node* child[childCnt];
		Node()
		{
			fail = NULL;
			sum = 0;
			memset(child, NULL, sizeof child);
		}
	};
	Node* root;
};

const int maxn = 1e7 + 5;
char key[70];
char pattern[maxn];
int N;

int main()
{
	int T;
	scanf("%d", &T);
	while (T--)
	{
		Aho_Corasick ac;
		scanf("%d", &N);
		getchar();
		for (int i = 1; i <= N; i++)
		{
			scanf("%s", key);
			ac.insert(key);
		}
		scanf("%s", pattern);
		ac.build_fail();
		printf("%d\n", ac.query(pattern));
	}
	return 0;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值