[二分][后缀数组]Life Forms POJ3294

23 篇文章 0 订阅
22 篇文章 0 订阅

You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.

The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant's life forms ended up with a large fragment of common DNA.

Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

Input

Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.

Output

For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.

Sample Input

3
abcdefg
bcdefgh
cdefghi
3
xxx
yyy
zzz
0

Sample Output

bcdefg
cdefgh

?

题意: 给出n个字符串,求在一半以上字符串中出现的最长公共子串,若有多个,按字典序升序输出,若不存在,输出'?'。

分析: 类似后缀数组求最长公共子串,同样是用一个不会出现的字符把多个字符串连接起来,不过之后需要二分长度,对于确定的长度利用height数组check是否成立。具体的check过程就是遍历一遍height数组,当遇到一段连续的height值大于等于mid的区间段时,记录这一段来自多少个不同的字符串,如果大于字符串总数的一半就返回true,如果遍历完也没有找到符合要求的一段区间就返回false。

得到答案子串的长度len后需要输出具体子串是什么,还是类似上面二分中check的过程,只需要遍历一遍height数组,找到一段连续的height值大于等于len的区间,如果这段区间来自一半以上的字符串就输出它。由于height数组本来就是按字典序升序排列好的,我们这样输出也一定是按字典序升序输出的,不需要特殊的处理。

不过这题有几个坑需要注意下。首先加入的分隔符不能相同,否则输出答案可能会把分隔符带出来。另外最好把字符数组转为int数组,不然不好处理上面提到的不同的分隔符。最后注意每个字符串只能输出一次,小心重复输出的情况,这里给一个样例:2 pabcp ptyup,可以测试一下。

具体代码如下:

#include <iostream>
#include <cstdio>
#include <cstring>
#include <utility>
#include <cmath>
using namespace std;

const int maxn = 1e6+10;
int n, m, num;
int s[maxn];
int sa[maxn], height[maxn], x[maxn], y[maxn], rk[maxn], tong[maxn], str[maxn];
char st[maxn];

void get_sa()
{
	for(int i = 0; i <= m; i++) tong[i] = 0;
	for(int i = 0; i <= 2*n; i++) y[i] = x[i] = 0;
    for(int i = 1; i <= n; i++) tong[x[i] = s[i]] ++;
    for(int i = 2; i <= m; i++) tong[i] += tong[i-1];
    for(int i = n; i; i--) sa[tong[x[i]]--] = i;
    for(int k = 1; k <= n; k <<= 1) 
	{
        int num = 0;
        for(int i = n-k+1; i <= n; i++) y[++num] = i;
        for(int i = 1; i <= n; i++) 
		{
            if(sa[i] <= k) continue;
            y[++num] = sa[i] - k;
        }
        for(int i = 0; i <= m; i++) tong[i] = 0;
        for(int i = 1; i <= n; i++) tong[x[i]]++;
        for(int i = 2; i <= m; i++) tong[i] += tong[i-1];
        for(int i = n; i; i--) sa[tong[x[y[i]]]--] = y[i], y[i] = 0;
    	for(int i = 0; i <= 2*num; i++)
    	{
    		int temp = x[i];
    		x[i] = y[i];
    		y[i] = temp;
		}
        x[sa[1]] = 1, num = 1;
        for(int i = 2; i <= n; i++) 
            x[sa[i]] = (y[sa[i]] == y[sa[i-1]] && y[sa[i] + k] == y[sa[i-1] + k]) == 1 ? num : ++ num;
        if(n == num) return;
        m = num;
    }
}

void get_height() 
{
    for(int i = 1; i <= n; i++) rk[sa[i]] = i;
    for(int i = 1, k = 0; i <= n; i++) 
	{
        if(rk[i] == 1) continue;
        if(k) k--;
        int j = sa[rk[i]-1];
        while(i + k <= n && j + k <= n && s[i+k] == s[j+k]) k++;
        height[rk[i]] = k;
    }
}

bool check(int x)
{
	int cnt = 0;
	bool mp[105] = {0};//字符串的桶 
	for(int i = 1; i <= n; i++)
	{
		if(height[i] >= x)
		{
			if(!mp[str[sa[i-1]]])
				cnt++, mp[str[sa[i-1]]] = 1;
			if(!mp[str[sa[i]]])
				cnt++, mp[str[sa[i]]] = 1;
			if(cnt > num/2)
				return true;
		}
		else
		{
			cnt = 0;
			for(int i = 1; i <= num; i++)
				mp[i] = 0;
		}
	}
	return false;
}

void solve() 
{ 
    get_sa();
    get_height();
	int l = 1, r = 1005, ans = -1;//二分符合题意的最长长度 
	while(l <= r)
	{
		int m = l+r>>1;
		if(check(m))
		{
			ans = m;
			l = m+1;
		}
		else
			r = m-1;
	}
	if(ans == -1)
		puts("?");
	else
	{
		int cnt = 0;
		bool mp[105] = {0};//字符串的桶 
		bool flag = false;//标记当前区间段内是否输出过 
		for(int i = 1; i <= n; i++)
		{ 
			if(height[i] >= ans)
			{
				if(!mp[str[sa[i-1]]])
					cnt++, mp[str[sa[i-1]]] = 1;
				if(!mp[str[sa[i]]])
					cnt++, mp[str[sa[i]]] = 1;
				if(cnt == num/2+1)//只输出一次 
				{
					if(!flag)
					{
						for(int j = sa[i]; j <= sa[i]+ans-1; j++)
							putchar(s[j]+'a'-1);
						putchar('\n');
						flag = true;
					}
				}
			}
			else
			{
				flag = false;
				cnt = 0;
				for(int i = 1; i <= num; i++)
					mp[i] = 0;
			}
		}
	}
}

signed main()
{
	while(~scanf("%d", &num)) 
	{
		if(num == 0)
			break;
		int len = 0;
		for(int i = 1; i <= num; i++)
		{
			scanf("%s", st+1);
			int lent = strlen(st+1);
			for(int j = len+1; j <= len+lent; j++)
			{
				s[j] = st[j-len]-'a'+1; 
				str[j] = i;
			}
			len += lent;
			s[++len] = i+100;//加入的每个分隔符需要不同 
		}
		n = len;
		m = 300;//每次都需要初始化! 
		solve();
		putchar('\n');
	}
    return 0;
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值