HDU 2144 Evolution 后缀树/后缀数组
题意
- 给我们不到一百个字符串(长度不到一百)以及一个百分比q,然后如果某两个字符串的最长公共子串占比超过了q(在两个串中都超过)则两个串为一个集合,问最终能分出几个集合
思路
-
显然我们枚举每两个字符串是n2复杂度,那么判断最长公共子串必须要n的复杂度才能过这道题。
-
所以使用后缀数组或者后缀树,每次都重建一遍,尽管常数可能不太优秀但是n3复杂度还是可以过的。这次把超过理解成了大于等于,所以wa了两次,可惜,代码中后缀树用的是之前的模板。
AC代码
#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>
#include <set>
using namespace std;
const int maxn = (1 << 30);
const int root = 1;
char ss[210] = {0};
char ss2[205] = {0};
int act = 1, co = 1;
int acteg = -1;
int tep = 0;
int ind = 0, rem = 0, s_end = -1;
int links[205] = {0};
int vv[205] = {0};
int mm = 0;
int linkk = 0;
int len1 = 0, len2 = 0;
int ans = 0;
struct ab
{
int l;
int r;
int nex;
int alp[28];
} tree[1005]; // 作为分割与结束符 (ascii相邻防止越界)
char sss[105][105];
int add_new(int o, int ll = s_end, int rr = maxn)
{
tree[o].l = ll;
tree[o].r = rr;
return o;
}
void add_link(int o)
{
if (linkk)
{
tree[linkk].nex = o;
}
linkk = o;
}
int check_len(int o)
{
return min(tree[o].r, s_end) - tree[o].l + 1;
}
bool check_contain(int o)
{
int node_len = check_len(o);
if (node_len <= ind)
{
ind -= node_len;
tep += node_len;
act = o;
return true;
}
return false;
}
void add(char cc)
{
++rem;
linkk = 0;
while (rem > 0)
{
if (!ind)
{
tep = s_end;
}
int& actedge_node = tree[act].alp[ss[tep] - 'A'];
if (!actedge_node)
{
actedge_node = add_new(++co, s_end);
add_link(act);
}
else
{
if (check_contain(actedge_node))
{
continue;
}
else
{
if (ss[tree[actedge_node].l + ind] != cc) // 分裂注意原树(actedge_node)必须成为子树(否则会和原先的子树失去联系)
{
int leaf1 = add_new(++co, s_end);
int leaf2 = actedge_node;
int newtree = add_new(++co, tree[actedge_node].l, tree[actedge_node].l + ind - 1);
tree[newtree].alp[cc - 'A'] = leaf1;
tree[newtree].alp[ss[tree[actedge_node].l + ind] - 'A'] = leaf2;
tree[leaf2].l += ind;
actedge_node = newtree;
add_link(actedge_node);
}
else
{
++ind; // 活跃半径只在此处增加 ,增加完就加链并结束本次增点
// if (act != root)
// {
add_link(act);
// }
break;
}
}
}
--rem;
if (act == root)
{
if (!ind)
{
break;
}
tep = s_end - rem + 1;
--ind;
}
else
{
// ind = rem - 1;
// tep = s_end - rem + 1;
if (tree[act].nex)
{
act = tree[act].nex;
}
else
{
act = root;
}
}
}
}
int dfs(int o, int cc) // 本题所需的搜索 返回1代表包含{,2代表包含|,3代表都有
{
bool bk1 = false;
bool bk2 = false;
bool stop = false;
for (int i = 0; i <= 27; ++i)
{
if (tree[o].alp[i])
{
if (tree[tree[o].alp[i]].r != maxn)
{
int contain_terminal = dfs(tree[o].alp[i], cc + check_len(tree[o].alp[i]));
if (contain_terminal == 1)
{
bk1 = true;
}
if (contain_terminal == 2)
{
bk2 = true;
}
if (contain_terminal == 3)
{
bk1 = bk2 = true;
stop = true;
}
}
else
{
if (tree[tree[o].alp[i]].l > len1)
{
bk2 = true;
}
else
{
bk1 = true;
}
}
}
}
if (stop)
{
return 3;
}
if (bk1 && bk2)
{
ans = max(ans, cc);
return 3;
}
if (bk1)
{
return 1;
}
if (bk2)
{
return 2;
}
}
int suffixt()
{
len1 = strlen(ss);
len2 = strlen(ss2);
memset(links, 0, sizeof(links));
memset(tree, 0, sizeof(tree));
memset(vv, 0, sizeof(vv));
act = 1, co = 1;
acteg = -1;
tep = 0;
ind = 0, rem = 0, s_end = -1;
mm = 0;
linkk = 0;
ans = 0;
ss[len1] = 'Z' + 1; //ss1的结束符,防止两字符串后缀拼接
for (int i = len1 + 1; i <= len1 + len2; ++i)
{
ss[i] = ss2[i - len1 - 1];
}
ss[len1 + len2 + 1] = 'Z' + 2; //ss2的结束符(也是整个合串的结束符)
for (int i = 0; i <= len1 + len2 + 1; ++i)
{
++s_end;
add(ss[i]);
}
dfs(root, 0);
return ans;
return 0;
}
int ff[105] = {0};
int f(int x)
{
if (ff[x] != x)
{
return ff[x] = f(ff[x]);
}
else
{
return x;
}
}
int main()
{
int n;
double q;
int kk = 0;
while (scanf("%d%lf", &n, &q) == 2)
{
q /= 100;
for (int i = 1; i <= n; ++i)
{
ff[i] = i;
}
for (int i = 1; i <= n; ++i)
{
scanf("%s", sss[i]);
for (int j = 1; j < i; ++j)
{
int a = f(i), b = f(j);
if (a == b)
{
continue;
}
int k;
for (k = 0; sss[j][k]; ++k)
{
ss2[k] = sss[j][k];
}
int len11 = k;
ss2[k] = 0;
for (k = 0; sss[i][k]; ++k)
{
ss[k] = sss[i][k];
}
len11 = max(len11, k);
ss[k] = 0;
int cc = suffixt();
if ((double)cc / (double)len11 > q)
{
ff[a] = b;
}
}
}
int bkk[105], anss = 0;
memset(bkk, 0, sizeof(bkk));
for (int i = 1; i <= n; ++i)
{
int c = f(i);
if (!bkk[c])
{
++anss;
bkk[c] = true;
}
}
printf("Case %d:\n%d\n", ++kk, anss);
}
return 0;
}