题目链接:https://leetcode-cn.com/problems/longest-duplicate-substring/
题意
给定一个字符串,求该字符串的一个出现次数大于1次的最长子串
题解
链接
解法0:二分+哈希,细节很多,不推荐使用,但思路简单,即二分答案长度,然后哈希判断是否存在重复
解法1:后缀数组,需要对height求解过程有一定理解,一边求解一边得到答案,标准LCP板子题,最长的重复子串即最长的公共前缀
解法2:后缀自动机,重复次数即每个状态后缀链接树所在子树的size,直接维护即可,复杂度O(n),直接抄板子即可,板子可以提前给出first_position/size的求解这样不需要理解无脑套即可,但还是建议对后缀自动机(SAM)有一定理解再使用
下面是解法2的代码,因为后缀数组实现相对较难所以我还是用了后缀自动机(其实是我SAM用的多…)
code
#define ms(a) memset(a, 0, sizeof(a))
const int N = 200001;
const int base = 26;
int tr[N][base];
int link[N];
int len[N];
int sz[N]; //出现次数,即endpos集合大小
int firstpos[N];
int rklen[N];
int cntp = 0;
int last = 0;
void init()
{
len[0] = 0;
ms(tr[0]);
last = 0;
link[0] = -1;
cntp = 0;
ms(sz);
}
int addp()
{
int res = ++cntp;
ms(tr[res]);
return res;
}
int clone(int p, int q)
{
int res = addp();
len[res] = len[p] + 1;
memcpy(tr[res], tr[q], sizeof(tr[res]));
link[res] = link[q];
sz[res] = 0;
firstpos[res] = firstpos[q];
return res;
}
void insert(int ch)
{
int cur = addp();
int p = last, q;
last = cur;
sz[cur] = 1;
len[cur] = len[p] + 1;
firstpos[cur] = len[p]; //视情况维护
while (p != -1 && !tr[p][ch])
{
tr[p][ch] = cur;
p = link[p];
}
if (p == -1)
{
link[cur] = 0;
return;
}
q = tr[p][ch];
if (len[q] == len[p] + 1)
{
link[cur] = q;
return;
}
int clo = clone(p, q);
while (p != -1 && tr[p][ch] == q)
{
tr[p][ch] = clo;
p = link[p];
}
link[q] = link[cur] = clo;
}
bool cmpl(int x, int y) { return len[x] < len[y]; }
void getrk()
{
rklen[0] = 0;
for (int i = 1; i <= cntp; i++)
rklen[i] = i;
sort(rklen + 1, rklen + cntp + 1, cmpl);
}
void getsz()
{
for (int i = cntp; i >= 1; i--)
{
int u = rklen[i];
sz[link[u]] += sz[u];
}
}
class Solution
{
public:
string longestDupSubstring(string s)
{
init();
for (int i = 0; i < s.length(); i++)
{
insert(s[i] - 'a');
}
getrk();
getsz();
int mlen = 0;
int mp = -1;
for (int i = 1; i <= cntp; i++)
{
if (sz[i] > 1)
{
if (len[i] > mlen)
{
mlen = len[i];
mp = firstpos[i];
}
}
}
string res = "";
for (int i = mp - mlen + 1; i <= mp; i++)
{
res.push_back(s[i]);
}
return res;
}
};