对应POJ 题目:点击打开链接
思路:就是后缀数组求多个字符串的最长公共子串,height数组分组+二分答案求上界。细节上,求得一组公共前缀后,要判断是否含有分隔符。一开始我是直接for一遍那个前缀检查是否有分隔符,后来发现其实只需要判断首尾字符是不是来自同一个字符串就可以了,一下子又高效了一点,么么哒。。。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MS(x, y) memset(x, y, sizeof(x))
const int MAXN = 100000+2000;
const int INF = 1<<30;
int wa[MAXN],wb[MAXN],wv[MAXN],ws[MAXN];
int rank[MAXN],r[MAXN],sa[MAXN],height[MAXN];
char str[1005];
int vis[1005], ID[1005];
int block[MAXN];
int cmp(int *r, int a, int b, int l)
{
return r[a] == r[b] && r[a+l] == r[b+l];
}
void da(int *r, int *sa, int n, int m)
{
int i, j, p, *x = wa, *y = wb, *t;
for(i=0; i<m; i++) ws[i] = 0;
for(i=0; i<n; i++) ws[x[i] = r[i]]++;
for(i=1; i<m; i++) ws[i] += ws[i-1];
for(i=n-1; i>=0; i--) sa[--ws[x[i]]] = i;
for(j=1,p=1; p<n; j<<=1, m=p){
for(p=0,i=n-j; i<n; i++) y[p++] = i;
for(i=0; i<n; i++) if(sa[i] >= j) y[p++] = sa[i] - j;
for(i=0; i<n; i++) wv[i] = x[y[i]];
for(i=0; i<m; i++) ws[i] = 0;
for(i=0; i<n; i++) ws[wv[i]]++;
for(i=1; i<m; i++) ws[i] += ws[i-1];
for(i=n-1; i>=0; i--) sa[--ws[wv[i]]] = y[i];
for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1; i<n; i++)
x[sa[i]] = cmp(y, sa[i-1], sa[i], j) ? p-1 : p++;
}
return;
}
void calheight(int *r, int *sa, int n)
{
int i, j, k = 0;
for(i=1; i<n; i++) rank[sa[i]] = i;
for(i=0; i<n-1; height[rank[i++]] = k)
for(k ? k-- : 0,j=sa[rank[i]-1]; r[i+k] == r[j+k]; k++);
return;
}
int main()
{
//freopen("in.txt", "r", stdin);
int n;
scanf("%d", &n);
while(n)
{
if(1 == n){
scanf("%s", str);
printf("%s\n", str);
scanf("%d", &n);
if(n) printf("\n");
continue;
}
int i, j, k;
MS(rank, 0);
MS(sa, 0);
MS(wa, 0);
MS(wb, 0);
MS(ws, 0);
MS(wv, 0);
MS(r, 0);
MS(height, 0);
MS(block, -1);
MS(ID, 0);
int len = 1, tmp_l, maxn = 0;
int left = 1, right = INF;
for(i=0; i<n; i++){//把所有字符串连成一个用分隔符分隔的字符串
scanf("%s", str);
tmp_l = strlen(str);
if(tmp_l < right) right = tmp_l;//二分答案的右边界为最短字符串的长度
int k;
for(j=len, k=0; k<tmp_l; j++, k++){
block[j] = i;//下标为j的字符所在的是第i个字符串
r[j] = str[k] - 'a' + 1;
if(r[j] > maxn) maxn = r[j];
}
len += tmp_l;
r[len++] = 0;//末尾添加一个最小值
}
da(r, sa, len, maxn+1);
calheight(r, sa, len);
int beg = 0, end = 0, ok, u = 0, ul = 0, LEN = 0;
while(left <= right)
{
ok = u = 0;
int mid = left + (right - left)/2;//二分答案
for(i=n+1; i<len; i++){
if(height[i] >= mid){//确定某一组的起点终点
//for(k=sa[i]; k < sa[i] + mid; k++)
// if(0 == r[k]) break;//该公共前缀含有分隔符
if(block[sa[i]] == block[sa[i] + mid - 1]){//判断首尾字符是否来自同一个字符串
if(!beg) beg = i;
end = i;
}
}
if((beg && end) && (i == len - 1 || height[i] < mid)){
int count = 0;
MS(vis, 0);
for(j=beg-1; j<=end; j++){//一组里面有多少个后缀来自不同的字符串
int num = block[sa[j]];
if(!vis[num]) {
vis[num] = 1;
count++;
}
}
if(count > n/2){//符合题意的解
ID[u++] = sa[j-1];//保存下标
LEN = mid;
ok = 1;
}
beg = end = 0;
}
}
if(ok) ul = u;//u值在每次二分都会置为0,故在每次找到合理的解后要赋给其它变量
if(ok) left = mid + 1;//找到解,说明不是最长
else right = mid - 1;
}
if(ul){
for(i=0; i<ul; i++){
for(j=ID[i]; j<ID[i] + LEN; j++)
printf("%c", char(r[j] - 1 +'a'));
printf("\n");
}
}
else printf("?\n");
scanf("%d", &n);
if(n) printf("\n");
}
}