POJ - 3294
题意:给你n个字符串, 让你找到至少在n/2个字符串中出现的最长的公共子串(有多个按字典序全部输出)
思路:和前几题类似, 先把各个串连接起来, 用不相同的字符(最好换成数字, 不然容易出bug), 然后求一遍后缀数组即可。然后二分公共子串的长度, 对于二分的每一个值check一下, 在check中按照是否连续的大于mid来分组, 对于每一组内我们只需要判断来自不同的串的数量是否大于n / 2即可
细节见于代码
#include <iostream>
#include <cstring>
#include <algorithm>
#include <cstdio>
#include <vector>
using namespace std;
#define ll long long
const int N = 300010;
int n, m, t;
char a[N];
int s[N];
int sa[N], x[N], y[N], c[N], rk[N], height[N], base[N], f[N][30], belong[N];
void get_sa()
{
for (int i = 1; i <= n; i ++ ) c[x[i] = s[i]] ++ ;
for (int i = 2; i <= m; i ++ ) c[i] += c[i - 1];
for (int i = n; i; i -- ) sa[c[x[i]] -- ] = i;
for (int k = 1; k <= n; k <<= 1)
{
int num = 0;
for (int i = n - k + 1; i <= n; i ++ ) y[ ++ num] = i;
for (int i = 1; i <= n; i ++ )
if (sa[i] > k)
y[ ++ num] = sa[i] - k;
for (int i = 1; i <= m; i ++ ) c[i] = 0;
for (int i = 1; i <= n; i ++ ) c[x[i]] ++ ;
for (int i = 2; i <= m; i ++ ) c[i] += c[i - 1];
for (int i = n; i; i -- ) sa[c[x[y[i]]] -- ] = y[i], y[i] = 0;
swap(x, y);
x[sa[1]] = 1, num = 1;
for (int i = 2; i <= n; i ++ )
x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++ num;
if (num == n) break;
m = num;
}
}
void get_height()
{
for (int i = 1; i <= n; i ++ ) rk[sa[i]] = i;
for (int i = 1, k = 0; i <= n; i ++ )
{
if (rk[i] == 1) continue;
if (k) k -- ;
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <= n && s[i + k] == s[j + k]) k ++ ;
height[rk[i]] = k;
}
}
void init_rmq()
{
base[0] = -1;
for(int i = 1; i <= n; i ++)
{
f[i][0] = height[i];
base[i] = base[i>>1] + 1;
}
for(int j = 1; j <= 18; j ++)
{
for(int i = 1; i + (1 << (j - 1)) <= n; i++)
{
f[i][j] = min(f[i][j - 1], f[i + (1 << (j - 1))][j - 1]);
}
}
}
int LCP(int x, int y) //第x和第y个后缀(不是排名)的最长公共前缀
{
if(x == y) return n - x + 1;
x = rk[x], y = rk[y];
if(x > y) swap(x, y);
x ++;
int t = base[y - x + 1];
return min(f[x][t], f[y - (1 << t) + 1][t]);
}
vector<int> pos[1010];
void init()
{
for(int i = 0; i < 1010; i ++) pos[i].clear();
memset(c, 0, sizeof c);
memset(x, 0, sizeof x);
}
bool check(int mid)
{
bool vis[110];
memset(vis, 0, sizeof vis);
int cnt = 0;
bool ret = false, in = false;
for(int i = 1; i <= n; i ++)
{
if(height[i] >= mid)
{
if(!vis[belong[sa[i]]]) vis[belong[sa[i]]] = 1, cnt ++;
if(cnt > t / 2)
{
ret = true;
if(!in)
{
in = true;
pos[mid].push_back(sa[i]); // 第一次进入, 去重相同字符串
}
}
}
else
{
memset(vis, 0, sizeof vis);
vis[belong[sa[i]]] = 1, cnt = 1, in = false;
}
}
return ret;
}
int main()
{
bool f = 0;
while(scanf("%d", &t) && t)
{
if(f) printf("\n");
else f = 1;
init();
n = 0;
for(int i = 1; i <= t; i ++)
{
scanf("%s", a + 1);
int len1 = strlen(a + 1);
for(int j = 1; j <= len1; j ++)
{
s[++n] = a[j] - 'a' + 1;
belong[n] = i;
}
s[++n] = 133 + i;
}
m = 300;
// for(int i = 1; i <= n; i ++) cout << s[i];
// cout << endl;
get_sa();
get_height();
init_rmq();
// for(int i = 1; i <= n; i ++)
// {
// for(int j = sa[i]; j <= n; j ++) cout << s[j] ;
// cout << endl;
// }
int l = 0, r = 1000, ans = 0;
while(l <= r)
{
int mid = l + r >> 1;
if(check(mid))
{
ans = mid;
l = mid + 1;
}
else r = mid - 1;
}
//cout << ans << endl;
if(ans == 0) printf("?\n");
else
{ // 由于后缀数组性质, 就已经是
for(unsigned int i = 0; i < pos[ans].size(); i ++)
{
for(int j = 0; j < ans; j ++)
{
printf("%c", s[pos[ans][i] + j] + 'a' - 1);
}
printf("\n");
}
}
}
return 0;
}