文章目录
前言
队伍在字符串方面比较薄弱,刚好字符串的内容相对来说不算多。所以拿出时间来学习一下字符串方面的算法。
P5231 [JSOI2012]玄武密码
题目链接:[JSOI2012]玄武密码
题目大意:给定一个母串
S
S
S,给出
m
m
m段文字,求每一段文字中最长的一段前缀满足是
S
S
S中的子串。
数据范围:
1
≤
n
≤
1
e
7
,
1
≤
m
≤
1
e
5
,
1
≤
∣
s
i
∣
≤
100
1\le n\le 1e7,1\le m\le 1e5,1\le |s_i|\le 100
1≤n≤1e7,1≤m≤1e5,1≤∣si∣≤100
题解:
S
A
M
SAM
SAM裸题了,对
S
S
S建
S
A
M
SAM
SAM然后把每一个
s
i
s_i
si丢
S
A
M
SAM
SAM上面跑,一直到不能匹配就行了。
AC代码:
#include<bits/stdc++.h>
#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
T res = 0, f = 1; char c = getchar();
while (!isdigit(c)) {
if (c == '-')f = -1; c = getchar();
}
while (isdigit(c)) {
res = (res << 3) + (res << 1) + c - '0'; c = getchar();
}
x = res * f;
}
const ll N = 20000000 + 10;
const int mod = 1e9 + 7;
int num(char c)
{
if (c == 'E')return 0;
if (c == 'S')return 1;
if (c == 'W')return 2;
return 3;
}
struct node
{
int nxt[4], len, fa;
}nod[N];
int last=1, tot = 1;
void add(int w)
{
int p = last; int now = last = ++tot;
nod[now].len = nod[p].len + 1;
for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
if (!p)nod[now].fa = 1;
else
{
int q = nod[p].nxt[w];
if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
else
{
int nq = ++tot; nod[nq] = nod[q]; nod[nq].len = nod[p].len + 1;
nod[q].fa = nod[now].fa = nq;
for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
}
}
}
int n, m;
char s[N];
int main()
{
//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
read(n); read(m);
scanf("%s", s + 1);
for (int i = 1; i <= n; i++)add(num(s[i]));
for (int i = 1; i <= m; i++)
{
scanf("%s", s + 1); int len = strlen(s + 1);
int now = 1,ans=len;
for (int j = 1; j <= len; j++)
{
if (nod[now].nxt[num(s[j])])
{
now = nod[now].nxt[num(s[j])];
}
else
{
ans = j-1; break;
}
}
printf("%d\n", ans);
}
return 0;
}
CF802I Fake News (hard)
题目链接:CF802I Fake News (hard)
题目大意:
T
T
T组数据,对于字符串 s 求
∑
p
c
n
t
(
s
,
p
)
2
\sum_p cnt(s,p)^2
∑pcnt(s,p)2其中
c
n
t
(
s
,
p
)
cnt(s,p)
cnt(s,p) 表示子串
p
p
p 在
s
s
s中的出现次数。
数据范围:
∣
s
∣
≤
1
0
5
,
T
≤
10
|s|\le 10^5,T\le 10
∣s∣≤105,T≤10
题解:这道题也是比较容易的,在
S
A
M
SAM
SAM中每一个子串出现的次数为,该节点
e
n
d
p
o
s
endpos
endpos集合的大小,也即每一个节点儿子节点中终结节点个数。建好
S
A
M
SAM
SAM后再
d
f
s
dfs
dfs统计一下即可。
AC代码:
#include<bits/stdc++.h>
#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
T res = 0, f = 1; char c = getchar();
while (!isdigit(c)) {
if (c == '-')f = -1; c = getchar();
}
while (isdigit(c)) {
res = (res << 3) + (res << 1) + c - '0'; c = getchar();
}
x = res * f;
}
const ll N = 200000 + 10;
const int mod = 1e9 + 7;
struct node
{
int nxt[26],fa,len;
void init()
{
fa = len = 0;
memset(nxt, 0, sizeof(nxt));
}
}nod[N];
int last = 1, tot = 1,siz[N];
void add(int w)
{
int p = last; int now = last = ++tot; nod[tot].init(); siz[now] = 1;
nod[now].len = nod[p].len + 1;
for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
if (!p)nod[now].fa = 1;
else
{
int q = nod[p].nxt[w];
if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
else
{
int nq = ++tot; nod[nq] = nod[q];
nod[nq].len = nod[p].len + 1;
nod[q].fa = nod[now].fa = nq;
for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
}
}
}
int t;
char s[N];
vector<int>p[N];
void dfs(int u)
{
for (auto to : p[u])dfs(to), siz[u] += siz[to];
}
int main()
{
//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
read(t);
while (t--)
{
scanf("%s", s + 1);
for (int i = 1; i <= tot; i++)siz[i] = 0,p[i].clear();
tot = last = 1; nod[tot].init();
int n = strlen(s + 1);
for (int i = 1; i <= n; i++)add(s[i] - 'a');
for (int i = 2; i <= tot; i++)p[nod[i].fa].push_back(i);
dfs(1);
ll ans = 0;
for (int i = 1; i <= tot; i++)
{
int len = nod[i].len - nod[nod[i].fa].len;
ans += 1ll * len * siz[i] * siz[i];
}
printf("%lld\n", ans);
}
return 0;
}
P6139 【模板】广义后缀自动机(广义 SAM)
题目链接:【模板】广义后缀自动机(广义 SAM)
题目大意:给定n个串,问不同子串个数。
题解:广义SAM,不过在不同串插入时候将
l
a
s
t
last
last置为1.
数据范围:
1
≤
n
≤
4
e
5
,
1
≤
∑
∣
s
i
∣
≤
1
e
6
1\le n\le 4e5,1\le \sum |s_i|\le1e6
1≤n≤4e5,1≤∑∣si∣≤1e6
AC代码:
#include<bits/stdc++.h>
#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
T res = 0, f = 1; char c = getchar();
while (!isdigit(c)) {
if (c == '-')f = -1; c = getchar();
}
while (isdigit(c)) {
res = (res << 3) + (res << 1) + c - '0'; c = getchar();
}
x = res * f;
}
const ll N = 2000000 + 10;
const int mod = 1e9 + 7;
struct node
{
int nxt[26], fa, len;
}nod[N];
int last = 1, tot = 1;
void add(int w)
{
int p = last; int now = last = ++tot;
nod[now].len = nod[p].len + 1;
for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
if (!p)nod[now].fa = 1;
else
{
int q = nod[p].nxt[w];
if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
else
{
int nq = ++tot; nod[nq] = nod[q];
nod[nq].len = nod[p].len + 1;
nod[q].fa = nod[now].fa = nq;
for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
}
}
}
int n;
char s[N];
int main()
{
//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
read(n);
for (int i = 1; i <= n; i++, last = 1)
{
scanf("%s", s + 1);
int len = strlen(s + 1);
for (int j = 1; j <= len; j++)
{
add(s[j] - 'a');
}
}
ll ans = 0;
for (int i = 1; i <= tot; i++)ans += nod[i].len - nod[nod[i].fa].len;
printf("%lld\n", ans);
return 0;
}
SP8093 JZPGYZ - Sevenk Love Oimaster
题目链接:JZPGYZ - Sevenk Love Oimaster
题目大意:给定
n
n
n 个模板串,以及
m
m
m 个查询串,依次查询每一个查询串是多少个模板串的子串
数据范围:
n
<
=
10000
,
q
<
=
60000
,
∑
∣
s
i
∣
≤
100000
,
∑
∣
q
i
∣
≤
360000
n<=10000, q<=60000,\sum|s_i|\le 100000,\sum|q_i|\le 360000
n<=10000,q<=60000,∑∣si∣≤100000,∑∣qi∣≤360000
题解:广义后缀自动机+树状数组。对模板串建广义
S
A
M
SAM
SAM,然后将模板串放入跑,然后题目就转换为求
p
a
r
e
n
t
parent
parent树上的子树的颜色个数。
d
f
s
dfs
dfs序+树状数组可以很好实现。码量较大,是个好题。
AC代码:
#include<bits/stdc++.h>
#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
T res = 0, f = 1; char c = getchar();
while (!isdigit(c)) {
if (c == '-')f = -1; c = getchar();
}
while (isdigit(c)) {
res = (res << 3) + (res << 1) + c - '0'; c = getchar();
}
x = res * f;
}
const ll N = 1000000 + 10;
const int mod = 1e9 + 7;
struct node
{
int nxt[26], fa, len;
}nod[N];
int tot = 1;
int add(int w, int last)
{
int p = last;
if (nod[p].nxt[w])
{
int q = nod[p].nxt[w];
if (nod[q].len == nod[p].len + 1)return q;
else
{
int nq = ++tot; nod[nq] = nod[q];
nod[nq].len = nod[p].len + 1;
nod[q].fa = nq;
for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
return nq;
}
}
int now = ++tot; nod[now].len = nod[p].len + 1;
for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
if (!p)nod[now].fa = 1;
else
{
int q = nod[p].nxt[w];
if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
else
{
int nq = ++tot; nod[nq] = nod[q];
nod[nq].len = nod[p].len + 1;
nod[q].fa = nod[now].fa=nq;
for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
}
}
return now;
}
char s[N];
vector<int>col[N];
vector<int>p[N];
int idt[N],siz[N],dfn[N];
void dfs(int u)
{
dfn[u] = ++dfn[0];
idt[dfn[0]] = u;
siz[u] = 1;
for (auto to : p[u])dfs(to), siz[u] += siz[to];
}
struct que
{
int l, r, id;
bool operator<(const que a)
{
return r < a.r;
}
}qe[N];
int ans[N],cnt,sum[N],pre[N];
void addw(int pos, int w)
{
for (int i = pos; i <= tot; i += (i & -i))
sum[i] += w;
}
int query(int pos)
{
int ans = 0;
for (int i = pos; i; i -= (i & -i))ans += sum[i];
return ans;
}
int main()
{
//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
int n, m; read(n), read(m);
for (int i = 1; i <= n; i++)
{
scanf("%s", s + 1);
int len = strlen(s + 1);
for (int j = 1, last = 1; j <= len; j++)
{
last = add(s[j] - 'a', last);
col[last].push_back(i);
}
}
for (int i = 1; i <= tot; i++)p[nod[i].fa].push_back(i);
dfs(1);
for (int i = 1; i <= m; i++)
{
scanf("%s", s + 1);
int len = strlen(s + 1),u=1;
for (int j = 1; u && j <= len; j++)
u = nod[u].nxt[s[j] - 'a'];
if (u)qe[++cnt] = que{ dfn[u],dfn[u] + siz[u] - 1,i };
}
sort(qe + 1, qe + 1 + cnt);
for (int i = 1,al=0; i <= cnt; i++)
{
while (al < qe[i].r)
{
al++;
int u = idt[al];
for (auto co : col[u])
{
if (pre[co])addw(pre[co], -1);
addw(al, 1);
pre[co] = al;
}
}
ans[qe[i].id] = query(qe[i].r) - query(qe[i].l - 1);
}
for (int i = 1; i <= m; i++)printf("%d\n", ans[i]);
return 0;
}
P3975 [TJOI2015]弦论
题目链接:[TJOI2015]弦论
题目大意:给定一个字符串,求解字符串中字典序第
k
k
k小的子串。
t
t
t 为
0
0
0 则表示不同位置的相同子串算作一个,
t
t
t为
1
1
1 则表示不同位置的相同子串算作多个。
数据范围:
1
≤
n
≤
5
×
1
0
5
,
0
≤
t
≤
1
,
1
≤
k
≤
1
0
9
1≤n≤5×10^5,0\leq t \leq 1,1\leq k \leq 10^9
1≤n≤5×105,0≤t≤1,1≤k≤109
题解:
S
A
M
SAM
SAM好题,对于
t
=
1
t=1
t=1的情况,
S
A
M
SAM
SAM中节点的
s
i
z
siz
siz为其
e
n
d
p
o
s
endpos
endpos集合大小,
t
=
0
t=0
t=0的情况所有节点的
s
i
z
siz
siz为1。然后再求出经过以每个节点为起点的子串的个数,之后递归来求解即可。
AC代码:
#include<bits/stdc++.h>
#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
T res = 0, f = 1; char c = getchar();
while (!isdigit(c)) {
if (c == '-')f = -1; c = getchar();
}
while (isdigit(c)) {
res = (res << 3) + (res << 1) + c - '0'; c = getchar();
}
x = res * f;
}
const ll N = 2000000 + 10;
const int mod = 1e9 + 7;
int siz[N];
struct node
{
int nxt[26], len, fa;
}nod[N];
int tot = 1;
int add(int w, int last)
{
int p = last;
int now = ++tot; nod[now].len = nod[p].len + 1;
for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
if (!p)nod[now].fa = 1;
else
{
int q = nod[p].nxt[w];
if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
else
{
int nq = ++tot; nod[nq] = nod[q];
nod[nq].len = nod[p].len + 1;
nod[q].fa = nod[now].fa = nq;
for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
}
}
return now;
}
char s[N];
int t, k, dp[N];
vector<int>p[N];
void dfs(int u)
{
for (auto to : p[u])dfs(to), siz[u] += siz[to];
}
int dfs2(int u)
{
if (!u)return 0;
if (~dp[u])return dp[u];
dp[u] = siz[u];
for (int i = 0; i < 26; i++)
{
int to = nod[u].nxt[i];
dp[u] += dfs2(to);
}
return dp[u];
}
int main()
{
//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
scanf("%s", s + 1);
read(t), read(k);
int n = strlen(s + 1);
for (int i = 1, last = 1; i <= n; i++)last = add(s[i] - 'a', last), siz[last]++;
if (!t)
for (int i = 1; i <= tot; i++)siz[i] = 1;
else
{
for (int i = 2; i <= tot; i++)p[nod[i].fa].push_back(i);
dfs(1);
}
memset(dp, -1, sizeof(dp));
dfs2(1);
vector<int>ans;
int al = 0, now = 1;
while (1)
{
bool isok = 0;
for (int nx = 0; nx < 26; nx++)
{
if (!nod[now].nxt[nx])continue;
if (al + dp[nod[now].nxt[nx]] < k)al += dp[nod[now].nxt[nx]];
else
{
ans.push_back(nx);
al += siz[nod[now].nxt[nx]];
now = nod[now].nxt[nx];
if (al < k)
isok = 1;
break;
}
}
if (!isok)break;
}
if (al >= k)
for (auto it : ans)printf("%c", it + 'a');
else
printf("-1\n");
return 0;
}
CF235C Cyclical Quest
题目链接:CF235C Cyclical Quest
题目大意:给定一个主串
S
S
S和
n
n
n个询问串,求每个询问串的所有循环同构在主串中出现的次数总和。
数据范围:
1
≤
n
≤
1
e
5
,
∑
∣
q
i
∣
≤
1
e
6
,
∣
S
∣
≤
1
e
6
1\le n\le 1e5,\sum|q_i|\le1e6,|S|\le 1e6
1≤n≤1e5,∑∣qi∣≤1e6,∣S∣≤1e6
题解:
S
A
M
SAM
SAM去除首字符操作。对于询问串的循环同构我们可以拆环为链,将字符串复制一份到后面即可。现在要考虑的就是如果匹配字符的数目已经是
∣
q
i
∣
|q_i|
∣qi∣了,然后我们需要将首字符去除,在
S
A
M
SAM
SAM上,我们其实只需要比对一下
∣
q
i
∣
−
1
|q_i|-1
∣qi∣−1长度是否还在该节点的包含范围上,如果不在就跳到
f
a
fa
fa节点上面即可。
AC代码:
#include<bits/stdc++.h>
#define ld long double
#define ll long long
using namespace std;
template<class T>
void read(T& x)
{
T res = 0, f = 1; char c = getchar();
while (!isdigit(c)) {
if (c == '-')f = -1; c = getchar();
}
while (isdigit(c)) {
res = (res << 3) + (res << 1) + c - '0'; c = getchar();
}
x = res * f;
}
const ll N = 2000000 + 10;
const int mod = 1e9 + 7;
struct node
{
int nxt[26], fa, len;
}nod[N];
int tot = 1,siz[N];
int ext(int w, int last)
{
int p = last;
int now = ++tot; nod[now].len = nod[p].len + 1;
for (; p && !nod[p].nxt[w]; p = nod[p].fa)nod[p].nxt[w] = now;
if (!p)nod[now].fa = 1;
else
{
int q = nod[p].nxt[w];
if (nod[q].len == nod[p].len + 1)nod[now].fa = q;
else
{
int nq = ++tot; nod[nq] = nod[q];
nod[nq].len = nod[p].len + 1;
nod[q].fa = nod[now].fa = nq;
for (; p && nod[p].nxt[w] == q; p = nod[p].fa)nod[p].nxt[w] = nq;
}
}
return now;
}
vector<int>p[N];
void dfs(int u)
{
for (auto to : p[u])dfs(to), siz[u] += siz[to];
}
char s[N];
int vis[N];
int query(char* s,int idt)
{
int n = strlen(s + 1);
for (int i = 1; i <= n; i++)s[i + n] = s[i];
int now = 1,ans=0,cnt=0;
for (int i = 1; i <= 2 * n; i++)
{
if (nod[now].nxt[s[i] - 'a'])now = nod[now].nxt[s[i] - 'a'], cnt++;
else
{
while (now && !nod[now].nxt[s[i] - 'a'])now = nod[now].fa, cnt = nod[now].len;
if (now)now = nod[now].nxt[s[i] - 'a'], cnt++;
else
now = 1, cnt = 0;
}
if (cnt == n)//匹配了n个了
{
if (vis[now] != idt)ans += siz[now], vis[now] = idt;
if (nod[nod[now].fa].len + 1 == n)now = nod[now].fa;
cnt--;
}
}
return ans;
}
int main()
{
//ios::sync_with_stdio(false);
#ifndef ONLINE_JUDGE
freopen("test.in", "r", stdin);
#endif // ONLINE_JUDGE
scanf("%s", s + 1);
int n = strlen(s + 1);
for (int i = 1, last = 1; i <= n; i++)last = ext(s[i] - 'a', last), siz[last]++;
for (int i = 2; i <= tot; i++)p[nod[i].fa].push_back(i);
dfs(1);
int q; read(q);
for (int i = 1; i <= q; i++)
{
scanf("%s", s + 1);
printf("%d\n", query(s, i));
}
return 0;
}