题目:http://poj.org/problem?id=3294
题意:
给定 n 个字符串,求出现在大于一半个字符串中的最长子串。
分析:
将 n 个字符串连起来,中间用不相同的且没有出现在字符串中的字符隔开,求后缀数组。然后二分答案,将后缀分成若干组,判断每组的后缀是否出现在不小于 k 个的原串中。这个做法的时间复杂度为 O(nlogn)。
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<iostream>
using namespace std;
typedef long long ll;
const int INF = 1e9 + 9;
const int N = 2e5 + 9;
/********************倍增算法*后缀数组模板*******************************/
int sa[N], t1[N], t2[N], c[N], rk[N], height[N];
void build_sa (int s[], int n, int m) {
int i, k, p, *x = t1, *y = t2;
for (i = 0; i < m; i++) c[i] = 0;
for (i = 0; i < n; i++) c[x[i] = s[i]]++;
for (i = 1; i < m; i++) c[i] += c[i - 1];
for (i = n - 1; i >= 0; i--) sa[--c[x[i]]] = i;
for (k = 1; k <= n; k <<= 1) {
p = 0;
for (i = n - k; i < n; i++) y[p++] = i;
for (i = 0; i < n; i++) if (sa[i] >= k) y[p++] = sa[i] - k;
for (i = 0; i < m; i++) c[i] = 0;
for (i = 0; i < n; i++) c[x[y[i]]]++;
for (i = 1; i < m; i++) c[i] += c[i - 1];
for (i = n - 1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
swap (x, y);
p = 1;
x[sa[0]] = 0;
for (i = 1; i < n; i++)
x[sa[i]] = y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k] ? p - 1 : p ++;
if (p >= n) break;
m = p;
}
}
void getHeight (int s[], int n) {
int i, j, k = 0;
for (i = 0; i <= n; i++) rk[sa[i]] = i;
for (i = 0; i < n; i++) {
if (k) k--;
j = sa[rk[i] - 1];
while (s[i + k] == s[j + k]) k++;
height[rk[i]] = k;
}
}
/********************************************************************************/
int s[N], n, m, length[N], tot, ans[N];
bool flag[111];
char str[N];
int check (int x) { //判断在哪个字符串中
for (int i = 0; i < m; i++)
if (x < length[i]) return i;
}
bool ok (int k) {
tot = 0;
int cnt = 0;
for (int i = 1; i <= n; i++) {
if (height[i] < k) {
if (cnt > (m ) / 2) ans[tot++] = i - 1; //ans保存出现次数大于一半的字符串的一个后缀,然后再根据长度计算出字符串
cnt = 0;
memset (flag, 0, sizeof (flag) );
continue;
}
if (cnt == 0) {
flag[check (sa[i - 1])] = 1;
cnt++;
}
int id = check (sa[i]);
if (!flag[id]) flag[id] = 1, cnt++;
}
return tot > 0;
}
int main() {
//freopen ("f.txt", "r", stdin);
int cas = 0;
while (~scanf ("%d", &m) && m ) {
if (cas++) printf ("\n");
int XX = 30;
n = 0;
for (int i = 0; i < m; i++) {
scanf ("%s", str);
int len = strlen (str);
for (int j = 0; j < len; j++) s[n++] = str[j] - 'a' + 1;
s[n++] = XX++; //每个字符串后面插入一个不同的符号
length[i] = n;
}
if (m == 1) { //测试数据中没有m=1
puts ("?");
continue;
}
s[n] = 0;
build_sa (s, n + 1, 150);
getHeight (s, n);
int l = 0, r = n;
while (l < r) { //二分长度
int mid = l + (r - l + 1) / 2;
if (ok (mid) ) l = mid;
else r = mid - 1;
}
ok (l);
if (tot == 0) puts ("?");
else
for (int i = 0; i < tot; i++) {
for (int j = sa[ans[i]]; j < sa[ans[i]] + l; j++)
printf ("%c", (char) (s[j] + 'a' - 1) );
printf ("\n");
}
}
return 0;
}
/*
Sample Input
3
abcdefg
bcdefgh
cdefghi
3
xxx
yyy
zzz
0
Sample Output
bcdefg
cdefgh
?
*/