后缀自动机总结(SAM)

本文档介绍了如何使用广义后缀自动机(SAM)解决字符串问题,包括在JSOI2012的'玄武密码'中寻找最长匹配子串,以及在CF802I 'FakeNews'中的子串计数问题。通过实例展示了如何构建SAM并利用其高效地处理字符串操作。
摘要由CSDN通过智能技术生成

前言

队伍在字符串方面比较薄弱,刚好字符串的内容相对来说不算多。所以拿出时间来学习一下字符串方面的算法。

P5231 [JSOI2012]玄武密码

题目链接:[JSOI2012]玄武密码
题目大意:给定一个母串 S S S,给出 m m m段文字,求每一段文字中最长的一段前缀满足是 S S S中的子串。
数据范围: 1 ≤ n ≤ 1 e 7 , 1 ≤ m ≤ 1 e 5 , 1 ≤ ∣ s i ∣ ≤ 100 1\le n\le 1e7,1\le m\le 1e5,1\le |s_i|\le 100 1n1e7,1m1e5,1si100
题解: S A M SAM SAM裸题了,对 S S S S A M SAM SAM然后把每一个 s i s_i si S A M SAM SAM上面跑,一直到不能匹配就行了。
AC代码:

#include<bits/stdc++.h>

#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
	T res = 0, f = 1; char c = getchar();
	while (!isdigit(c)) {
		if (c == '-')f = -1; c = getchar();
	}
	while (isdigit(c)) {
		res = (res << 3) + (res << 1) + c - '0'; c = getchar();
	}
	x = res * f;
}
const ll N = 20000000 + 10;
const int mod = 1e9 + 7;

int num(char c)
{
	if (c == 'E')return 0;
	if (c == 'S')return 1;
	if (c == 'W')return 2;
	return 3;
}
struct node
{
	int nxt[4], len, fa;
}nod[N];
int last=1, tot = 1;
void add(int w)
{
	int p = last; int now = last = ++tot;
	nod[now].len = nod[p].len + 1;
	for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
	if (!p)nod[now].fa = 1;
	else
	{
		int q = nod[p].nxt[w];
		if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
		else
		{
			int nq = ++tot; nod[nq] = nod[q]; nod[nq].len = nod[p].len + 1;
			nod[q].fa = nod[now].fa = nq;
			for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
		}
	}
}
int n, m;
char s[N];
int main()
{
	//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
	freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
	read(n); read(m);
	scanf("%s", s + 1);
	for (int i = 1; i <= n; i++)add(num(s[i]));
	for (int i = 1; i <= m; i++)
	{
		scanf("%s", s + 1); int len = strlen(s + 1);
		int now = 1,ans=len;
		for (int j = 1; j <= len; j++)
		{
			if (nod[now].nxt[num(s[j])])
			{
				now = nod[now].nxt[num(s[j])];
			}
			else
			{
				ans = j-1; break;
			}
		}
		printf("%d\n", ans);
	}

	return 0;
}

CF802I Fake News (hard)

题目链接:CF802I Fake News (hard)
题目大意: T T T组数据,对于字符串 s 求 ∑ p c n t ( s , p ) 2 \sum_p cnt(s,p)^2 pcnt(s,p)2其中 c n t ( s , p ) cnt(s,p) cnt(s,p) 表示子串 p p p s s s中的出现次数。
数据范围: ∣ s ∣ ≤ 1 0 5 , T ≤ 10 |s|\le 10^5,T\le 10 s105,T10
题解:这道题也是比较容易的,在 S A M SAM SAM中每一个子串出现的次数为,该节点 e n d p o s endpos endpos集合的大小,也即每一个节点儿子节点中终结节点个数。建好 S A M SAM SAM后再 d f s dfs dfs统计一下即可。
AC代码:

#include<bits/stdc++.h>

#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
	T res = 0, f = 1; char c = getchar();
	while (!isdigit(c)) {
		if (c == '-')f = -1; c = getchar();
	}
	while (isdigit(c)) {
		res = (res << 3) + (res << 1) + c - '0'; c = getchar();
	}
	x = res * f;
}
const ll N = 200000 + 10;
const int mod = 1e9 + 7;

struct node
{
	int nxt[26],fa,len;
	void init()
	{
		fa = len = 0;
		memset(nxt, 0, sizeof(nxt));
	}
}nod[N];
int last = 1, tot = 1,siz[N];
void add(int w)
{
	int p = last; int now = last = ++tot; nod[tot].init(); siz[now] = 1;
	nod[now].len = nod[p].len + 1;
	for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
	if (!p)nod[now].fa = 1;
	else
	{
		int q = nod[p].nxt[w];
		if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
		else
		{
			int nq = ++tot;  nod[nq] = nod[q];
			nod[nq].len = nod[p].len + 1;
			nod[q].fa = nod[now].fa = nq;
			for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
		}
	}
}
int t;
char s[N];
vector<int>p[N];
void dfs(int u)
{
	for (auto to : p[u])dfs(to), siz[u] += siz[to];
}
int main()
{
	//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
	freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE

	read(t);
	while (t--)
	{
		scanf("%s", s + 1);
		for (int i = 1; i <= tot; i++)siz[i] = 0,p[i].clear();
		tot = last = 1; nod[tot].init();
		int n = strlen(s + 1);
		for (int i = 1; i <= n; i++)add(s[i] - 'a');
		for (int i = 2; i <= tot; i++)p[nod[i].fa].push_back(i);
		dfs(1);
		ll ans = 0;
		for (int i = 1; i <= tot; i++)
		{
			int len = nod[i].len - nod[nod[i].fa].len;
			ans += 1ll * len * siz[i] * siz[i];
		}
		printf("%lld\n", ans);
	}
	return 0;
}

P6139 【模板】广义后缀自动机(广义 SAM)

题目链接:【模板】广义后缀自动机(广义 SAM)
题目大意:给定n个串,问不同子串个数。
题解:广义SAM,不过在不同串插入时候将 l a s t last last置为1.
数据范围: 1 ≤ n ≤ 4 e 5 , 1 ≤ ∑ ∣ s i ∣ ≤ 1 e 6 1\le n\le 4e5,1\le \sum |s_i|\le1e6 1n4e5,1si1e6
AC代码:

#include<bits/stdc++.h>

#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
	T res = 0, f = 1; char c = getchar();
	while (!isdigit(c)) {
		if (c == '-')f = -1; c = getchar();
	}
	while (isdigit(c)) {
		res = (res << 3) + (res << 1) + c - '0'; c = getchar();
	}
	x = res * f;
}
const ll N = 2000000 + 10;
const int mod = 1e9 + 7;

struct node
{
	int nxt[26], fa, len;
}nod[N];
int last = 1, tot = 1;
void add(int w)
{
	int p = last; int now = last = ++tot;
	nod[now].len = nod[p].len + 1;
	for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
	if (!p)nod[now].fa = 1;
	else
	{
		int q = nod[p].nxt[w];
		if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
		else
		{
			int nq = ++tot; nod[nq] = nod[q];
			nod[nq].len = nod[p].len + 1;
			nod[q].fa = nod[now].fa = nq;
			for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
		}
	}
}
int n;
char s[N];
int main()
{
	//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
	freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
	read(n);
	for (int i = 1; i <= n; i++, last = 1)
	{
		scanf("%s", s + 1);
		int len = strlen(s + 1);
		for (int j = 1; j <= len; j++)
		{
			add(s[j] - 'a');
		}
	}
	ll ans = 0;
	for (int i = 1; i <= tot; i++)ans += nod[i].len - nod[nod[i].fa].len;
	printf("%lld\n", ans);

	return 0;
}

SP8093 JZPGYZ - Sevenk Love Oimaster

题目链接:JZPGYZ - Sevenk Love Oimaster
题目大意:给定 n n n 个模板串,以及 m m m 个查询串,依次查询每一个查询串是多少个模板串的子串
数据范围: n < = 10000 , q < = 60000 , ∑ ∣ s i ∣ ≤ 100000 , ∑ ∣ q i ∣ ≤ 360000 n<=10000, q<=60000,\sum|s_i|\le 100000,\sum|q_i|\le 360000 n<=10000,q<=60000,si100000,qi360000
题解:广义后缀自动机+树状数组。对模板串建广义 S A M SAM SAM,然后将模板串放入跑,然后题目就转换为求 p a r e n t parent parent树上的子树的颜色个数。 d f s dfs dfs序+树状数组可以很好实现。码量较大,是个好题。
AC代码:

#include<bits/stdc++.h>

#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
	T res = 0, f = 1; char c = getchar();
	while (!isdigit(c)) {
		if (c == '-')f = -1; c = getchar();
	}
	while (isdigit(c)) {
		res = (res << 3) + (res << 1) + c - '0'; c = getchar();
	}
	x = res * f;
}
const ll N = 1000000 + 10;
const int mod = 1e9 + 7;
struct node
{
	int nxt[26], fa, len;
}nod[N];
int tot = 1;
int add(int w, int last)
{
	int p = last;
	if (nod[p].nxt[w])
	{
		int q = nod[p].nxt[w];
		if (nod[q].len == nod[p].len + 1)return q;
		else
		{
			int nq = ++tot; nod[nq] = nod[q];
			nod[nq].len = nod[p].len + 1;
			nod[q].fa = nq;
			for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
			return nq;
		}
	}
	int now = ++tot; nod[now].len = nod[p].len + 1;
	for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
	if (!p)nod[now].fa = 1;
	else
	{
		int q = nod[p].nxt[w];
		if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
		else
		{
			int nq = ++tot; nod[nq] = nod[q];
			nod[nq].len = nod[p].len + 1;
			nod[q].fa = nod[now].fa=nq;
			for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
		}
	}
	return now;
}
char s[N];
vector<int>col[N];
vector<int>p[N];
int idt[N],siz[N],dfn[N];
void dfs(int u)
{
	dfn[u] = ++dfn[0];
	idt[dfn[0]] = u;
	siz[u] = 1;
	for (auto to : p[u])dfs(to), siz[u] += siz[to];
}
struct que
{
	int l, r, id;
	bool operator<(const que a)
	{
		return r < a.r;
	}
}qe[N];
int ans[N],cnt,sum[N],pre[N];
void addw(int pos, int w)
{
	for (int i = pos; i <= tot; i += (i & -i))
		sum[i] += w;
}
int query(int pos)
{
	int ans = 0;
	for (int i = pos; i; i -= (i & -i))ans += sum[i];
	return ans;
}
int main()
{
	//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
	freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
	int n, m; read(n), read(m);
	for (int i = 1; i <= n; i++)
	{
		scanf("%s", s + 1);
		int len = strlen(s + 1);
		for (int j = 1, last = 1; j <= len; j++)
		{
			last = add(s[j] - 'a', last);
			col[last].push_back(i);
		}
	}
	for (int i = 1; i <= tot; i++)p[nod[i].fa].push_back(i);
	dfs(1);
	for (int i = 1; i <= m; i++)
	{
		scanf("%s", s + 1);
		int len = strlen(s + 1),u=1;
		for (int j = 1; u && j <= len; j++)
			u = nod[u].nxt[s[j] - 'a'];
		if (u)qe[++cnt] = que{ dfn[u],dfn[u] + siz[u] - 1,i };
	}
	sort(qe + 1, qe + 1 + cnt);
	for (int i = 1,al=0; i <= cnt; i++)
	{
		while (al < qe[i].r)
		{
			al++;
			int u = idt[al];
			for (auto co : col[u])
			{
				if (pre[co])addw(pre[co], -1);
				addw(al, 1);
				pre[co] = al;
			}
			
		}
		ans[qe[i].id] = query(qe[i].r) - query(qe[i].l - 1);
	}
	for (int i = 1; i <= m; i++)printf("%d\n", ans[i]);
	return 0;
}

P3975 [TJOI2015]弦论

题目链接:[TJOI2015]弦论
题目大意:给定一个字符串,求解字符串中字典序第 k k k小的子串。 t t t 0 0 0 则表示不同位置的相同子串算作一个, t t t 1 1 1 则表示不同位置的相同子串算作多个。
数据范围: 1 ≤ n ≤ 5 × 1 0 5 , 0 ≤ t ≤ 1 , 1 ≤ k ≤ 1 0 9 1≤n≤5×10^5,0\leq t \leq 1,1\leq k \leq 10^9 1n5×105,0t1,1k109
题解: S A M SAM SAM好题,对于 t = 1 t=1 t=1的情况, S A M SAM SAM中节点的 s i z siz siz为其 e n d p o s endpos endpos集合大小, t = 0 t=0 t=0的情况所有节点的 s i z siz siz为1。然后再求出经过以每个节点为起点的子串的个数,之后递归来求解即可。
AC代码:

#include<bits/stdc++.h>

#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
	T res = 0, f = 1; char c = getchar();
	while (!isdigit(c)) {
		if (c == '-')f = -1; c = getchar();
	}
	while (isdigit(c)) {
		res = (res << 3) + (res << 1) + c - '0'; c = getchar();
	}
	x = res * f;
}
const ll N = 2000000 + 10;
const int mod = 1e9 + 7;
int siz[N];
struct node
{
	int nxt[26], len, fa;
}nod[N];
int tot = 1;
int add(int w, int last)
{
	int p = last;
	int now = ++tot; nod[now].len = nod[p].len + 1;
	for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
	if (!p)nod[now].fa = 1;
	else
	{
		int q = nod[p].nxt[w];
		if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
		else
		{
			int nq = ++tot; nod[nq] = nod[q];
			nod[nq].len = nod[p].len + 1;
			nod[q].fa = nod[now].fa = nq;
			for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
		}
	}
	return now;
}
char s[N];
int t, k, dp[N];
vector<int>p[N];
void dfs(int u)
{
	for (auto to : p[u])dfs(to), siz[u] += siz[to];
}
int dfs2(int u)
{
	if (!u)return 0;
	if (~dp[u])return dp[u];
	dp[u] = siz[u];
	for (int i = 0; i < 26; i++)
	{
		int to = nod[u].nxt[i];
		dp[u] += dfs2(to);
	}
	return dp[u];
}
int main()
{
	//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
	freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
	scanf("%s", s + 1);
	read(t), read(k);
	int n = strlen(s + 1);
	for (int i = 1, last = 1; i <= n; i++)last = add(s[i] - 'a', last), siz[last]++;
	if (!t)
		for (int i = 1; i <= tot; i++)siz[i] = 1;
	else
	{
		for (int i = 2; i <= tot; i++)p[nod[i].fa].push_back(i);
		dfs(1);
	}
	memset(dp, -1, sizeof(dp));
	dfs2(1);
	vector<int>ans;
	int al = 0, now = 1;
	while (1)
	{
		bool isok = 0;
		for (int nx = 0; nx < 26; nx++)
		{
			if (!nod[now].nxt[nx])continue;
			if (al + dp[nod[now].nxt[nx]] < k)al += dp[nod[now].nxt[nx]];
			else
			{
				ans.push_back(nx);
				al += siz[nod[now].nxt[nx]];
				now = nod[now].nxt[nx];
				if (al < k)
					isok = 1;
				break;
			}
		}
		if (!isok)break;
	}
	if (al >= k)
		for (auto it : ans)printf("%c", it + 'a');
	else
		printf("-1\n");

	return 0;
}

CF235C Cyclical Quest

题目链接:CF235C Cyclical Quest
题目大意:给定一个主串 S S S n n n个询问串,求每个询问串的所有循环同构在主串中出现的次数总和。
数据范围: 1 ≤ n ≤ 1 e 5 , ∑ ∣ q i ∣ ≤ 1 e 6 , ∣ S ∣ ≤ 1 e 6 1\le n\le 1e5,\sum|q_i|\le1e6,|S|\le 1e6 1n1e5,qi1e6,S1e6
题解: S A M SAM SAM去除首字符操作。对于询问串的循环同构我们可以拆环为链,将字符串复制一份到后面即可。现在要考虑的就是如果匹配字符的数目已经是 ∣ q i ∣ |q_i| qi了,然后我们需要将首字符去除,在 S A M SAM SAM上,我们其实只需要比对一下 ∣ q i ∣ − 1 |q_i|-1 qi1长度是否还在该节点的包含范围上,如果不在就跳到 f a fa fa节点上面即可。
AC代码:

#include<bits/stdc++.h>

#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
	T res = 0, f = 1; char c = getchar();
	while (!isdigit(c)) {
		if (c == '-')f = -1; c = getchar();
	}
	while (isdigit(c)) {
		res = (res << 3) + (res << 1) + c - '0'; c = getchar();
	}
	x = res * f;
}
const ll N = 2000000 + 10;
const int mod = 1e9 + 7;

struct node
{
	int nxt[26], fa, len;
}nod[N];
int tot = 1,siz[N];
int ext(int w, int last)
{
	int p = last;
	int now = ++tot; nod[now].len = nod[p].len + 1;
	for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
	if (!p)nod[now].fa = 1;
	else
	{
		int q = nod[p].nxt[w];
		if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
		else
		{
			int nq = ++tot; nod[nq] = nod[q];
			nod[nq].len = nod[p].len + 1;
			nod[q].fa = nod[now].fa = nq;
			for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
		}
	}
	return now;
}
vector<int>p[N];
void dfs(int u)
{
	for (auto to : p[u])dfs(to), siz[u] += siz[to];
}
char s[N];
int vis[N];
int query(char* s,int idt)
{
	int n = strlen(s + 1);
	for (int i = 1; i <= n; i++)s[i + n] = s[i];
	int now = 1,ans=0,cnt=0;
	for (int i = 1; i <= 2 * n; i++)
	{
		if (nod[now].nxt[s[i] - 'a'])now = nod[now].nxt[s[i] - 'a'], cnt++;
		else
		{
			while (now && !nod[now].nxt[s[i] - 'a'])now = nod[now].fa, cnt = nod[now].len;
			if (now)now = nod[now].nxt[s[i] - 'a'], cnt++;
			else
				now = 1, cnt = 0;
		}
		if (cnt == n)//匹配了n个了
		{
			if (vis[now] != idt)ans += siz[now], vis[now] = idt;
			if (nod[nod[now].fa].len + 1 == n)now = nod[now].fa;
			cnt--;
		}
	}
	return ans;
}
int main()
{
	//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
	freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
	scanf("%s", s + 1);
	int n = strlen(s + 1);
	for (int i = 1, last = 1; i <= n; i++)last = ext(s[i] - 'a', last), siz[last]++;
	for (int i = 2; i <= tot; i++)p[nod[i].fa].push_back(i);
	dfs(1);
	int q; read(q);
	for (int i = 1; i <= q; i++)
	{
		scanf("%s", s + 1); 
		printf("%d\n", query(s, i));
	}
	return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值