A string is finite sequence of characters over a non-empty finite set Σ.
In this problem, Σ is the set of lowercase letters.
Substring, also called factor, is a consecutive sequence of characters occurrences at least once in a string.
Now your task is a bit harder, for some given strings, find the length of the longest common substring of them.
Here common substring means a substring of two or more strings.
Input
The input contains at most 10 lines, each line consists of no more than 100000 lowercase letters, representing a string.
Output
The length of the longest common substring. If such string doesn't exist, print "0" instead.
Example
Input:
alsdfkjfjkdsal
fdjskalajfkdsla
aaaajfaaaa
Output:
2
Notice: new testcases added
首先吐槽一下这题, 当初学sa的时候做这题, 想1e6带个log可以过, 倍增加换DC3T了一夜。看见了限时236ms,卒!
后来学了sam,看大佬们这题的题解:"对第一个串建自动机, 剩下的跑就行了。" 看的我是一脸懵比。
做法:
先考虑两个串的时候我们怎么求最长公共子串
我们先对S建立后缀自动机, 然后从S的起点u开始, 再记一个长度L, 添加T[1 ~ LenT]
若存在子节点意味着添加当前字符后, 我们可以得到下一个状态,此时令状态u = next[u][c], L++.
若不存在, 即不断跳lnk[u],直到有状态v满足next[v][c] != 0, 令u = next[v][c], L = len[v] + 1.
因为lnk[u]代表着当前u的后缀, 这里的匹配就与AC自动机的fail指针极其相似。
若一直到起点仍没有则说明S中不含有c这个字符, 我们初始化u = 1, L = 0即可。
void solve()
{
int len2 = strlen(b);
int u = 1, L = 0;
for (int i = 0; i < len2; i++)
{
int id = b[i] - 'a';
while (!sam.nxt[u][id] && u != 1)
u = sam.lnk[u], L = sam.len[u];
if (sam.nxt[u][id] != 0)
u = sam.nxt[u][id], ++L;
ans = max(ans, L);
}
cout << ans << endl;
}
对于多个:
设第一个串为S, 我们对S建立后缀自动机
第一步:按照求两个串的过程, 但是我们用数组d记录每一个状态, 我们能匹配的最大长度为多少。
int u = 1, L = 0;
for (int i = 1; i <= lenb; i++)
{
int id = b[i] - 'a';
while (!nxt[u][id] && u != 1)
u = lnk[u], L = len[u];
if (nxt[u][id])
u = nxt[u][id], ++L;
d[u] = max(d[u], L);
}
第二步:开一个f数组, 按照拓扑序遍历自动机的每一个状态, 考虑对于每一个状态, 因为每个串匹配的不同, 例如我们在S的状态中到了某一个状态含有abb, 那么其lnk中跳到含有bb, b字符串的状态,自然也含有这些子串。目的:如果前一个串匹配的时候有bb这个子串, 我们取答案时就能取到长度为2了。所以我们通过当前状态来更新其lnk的大小。
for (int i = idx; i >= 2; i--)
{
int wz = id[i]; // 存的是状态的先后序
if (d[wz])
d[lnk[wz]] = len[lnk[wz]];
f[wz] = min(f[wz], d[wz]);
d[wz] = 0;
}
对于拓扑序,我们可以使用下面这个函数得到id函数, 同时也能计算每个状态的串的出现次数(sub)。这里按照长度来基数排序,短的自然是长的串的后缀。
void get_sub2()
{
for (int i = 1; i <= idx; i++)
bucket[len[i]]++;
for (int i = 1; i <= ls; i++)
bucket[i] += bucket[i - 1];
for (int i = idx; i >= 1; i--)
id[bucket[len[i]]--] = i;
for (int i = idx; i >= 1; i--)
{
int wz = id[i];
sub[lnk[wz]] += sub[wz];
}
}
最后别忘了对S的每一个子串取一个匹配最长的答案。
AC:
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <queue>
#include <time.h>
#include <algorithm>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#ifdef LOCAL
#define debug(x) cout << "[" __FUNCTION__ ": " #x " = " << (x) << "]\n"
#define TIME cout << "RuningTime: " << clock() << "ms\n", 0
#else
#define TIME 0
#endif
const int N = 1e6 + 10;
const int mod = 1e9 + 7;
int d[N];
char a[N];
char b[N];
int f[N];
int bucket[N];
int id[N];
int ans;
int ls;
struct Suffix_Automata
{
int len[N << 1];
int lnk[N << 1];
int cnt[N << 1];
int sub[N << 1];
int nxt[N << 1][26];
int idx;
int last;
void init()
{
last = idx = 1;
lnk[1] = len[1] = 0;
}
void clear()
{
memset(len, 0, sizeof len);
memset(lnk, 0, sizeof lnk);
memset(cnt, 0, sizeof cnt);
memset(nxt, 0, sizeof nxt);
}
void extend(int c)
{
int x = ++idx;
len[x] = len[last] + 1;
sub[x] = 1;
int p;
for (p = last; p && !nxt[p][c]; p = lnk[p])
nxt[p][c] = x;
if (!p)
lnk[x] = 1, cnt[1]++;
else
{
int q = nxt[p][c];
if (len[p] + 1 == len[q])
lnk[x] = q, cnt[q]++;
else
{
int nq = ++idx;
len[nq] = len[p] + 1;
lnk[nq] = lnk[q];
memcpy(nxt[nq], nxt[q], sizeof nxt[q]);
for (; p && nxt[p][c] == q; p = lnk[p])
nxt[p][c] = nq;
lnk[q] = lnk[x] = nq;
cnt[nq] += 2;
}
}
last = x;
}
void get_sub2()
{
for (int i = 1; i <= idx; i++)
bucket[len[i]]++;
for (int i = 1; i <= ls; i++)
bucket[i] += bucket[i - 1];
for (int i = idx; i >= 1; i--)
id[bucket[len[i]]--] = i;
for (int i = idx; i >= 1; i--)
{
int wz = id[i];
sub[lnk[wz]] += sub[wz];
}
}
void solve(char* b)
{
int lenb = strlen(b + 1);
int u = 1, L = 0;
for (int i = 1; i <= lenb; i++)
{
int id = b[i] - 'a';
while (!nxt[u][id] && u != 1)
u = lnk[u], L = len[u];
if (nxt[u][id])
u = nxt[u][id], ++L;
d[u] = max(d[u], L);
}
for (int i = idx; i >= 2; i--)
{
int wz = id[i];
if (d[wz])
d[lnk[wz]] = len[lnk[wz]];
f[wz] = min(f[wz], d[wz]);
d[wz] = 0;
}
}
}sam;
int main()
{
#ifdef LOCAL
freopen("D:/input.txt", "r", stdin);
#endif
memset(f, 0x3f, sizeof f);
scanf("%s", a);
int len = strlen(a);
sam.init();
for (int i = 0; i < len; i++)
sam.extend(a[i] - 'a');
ls = len;
sam.get_sub2();
while (~scanf("%s", b + 1))
sam.solve(b);
int MX = 0;
for (int i = 2; i <= sam.idx; i++)
if (f[i] != 0x3f3f3f3f)
MX = max(MX, f[i]);
cout << MX << endl;
return TIME;
}