POJ3450 Corporate Identity

最新推荐文章于 2020-03-31 11:05:06 发布

eeeaaaaa

最新推荐文章于 2020-03-31 11:05:06 发布

阅读量581

点赞数

CC 4.0 BY-SA版权

分类专栏：后缀数组

本文链接：https://blog.csdn.net/eeeaaaaa/article/details/37696013

后缀数组专栏收录该内容

7 篇文章

订阅专栏

本文介绍了一种使用后缀数组解决寻找多个字符串中最长公共子串的问题，通过连接字符串并利用特殊字符分隔，实现对多个商标标识中共同字母序列的有效识别。

Corporate Identity

Time Limit: 3000MS		Memory Limit: 65536K
Total Submissions: 4672		Accepted: 1765

Description

Beside other services, ACM helps companies to clearly state their “corporate identity”, which includes company logo but also other signs, like trademarks. One of such companies is Internet Building Masters (IBM), which has recently asked ACM for a help with their new identity. IBM do not want to change their existing logos and trademarks completely, because their customers are used to the old ones. Therefore, ACM will only change existing trademarks instead of creating new ones.

After several other proposals, it was decided to take all existing trademarks and find the longest common sequence of letters that is contained in all of them. This sequence will be graphically emphasized to form a new logo. Then, the old trademarks may still be used while showing the new identity.

Your task is to find such a sequence.

Input

The input contains several tasks. Each task begins with a line containing a positive integer N, the number of trademarks (2 ≤ N ≤ 4000). The number is followed by N lines, each containing one trademark. Trademarks will be composed only from lowercase letters, the length of each trademark will be at least 1 and at most 200 characters.

After the last trademark, the next task begins. The last task is followed by a line containing zero.

Output

For each task, output a single line containing the longest string contained as a substring in all trademarks. If there are several strings of the same length, print the one that is lexicographically smallest. If there is no such non-empty string, output the words “IDENTITY LOST” instead.

Sample Input

3
aabbaabb
abbababb
bbbbbabb
2
xyz
abc
0

Sample Output

abb
IDENTITY LOST

Source

CTU Open 2007

题意：找出所给字符串里里最长的公共部分。。

分析：这题可以用KMP做，但最近一直学后缀数组也就用后缀数组写了下，把输入的串连起来每个串之间用一个特殊字符隔开成为一个串，然后就相当于求这个串的不可重叠的最长子串，二分长度算，在输入串的时候给每个串的每个字符上个编号以便judge，judge里面就是要找出满足这个长度的的串，这里就要用到输入时的标记。。具体看代码。。

#include<cstdio>
#include<cstring>
#include<algorithm>
using namespace std;
const int MAXN=1000010;
int r[MAXN],wa[MAXN],wb[MAXN],ws[MAXN],wv[MAXN],sa[MAXN];
int rank[MAXN],height[MAXN],id[MAXN];
char s[2100];
int cmp(int *r,int a,int b,int l)
{
    return r[a]==r[b]&&r[a+l]==r[b+l];
}
void da(int *r,int *sa,int n,int m)
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0;i<m;i++)
        ws[i]=0;
    for(i=0;i<n;i++)
        ws[x[i]=r[i]]++;
    for(i=1;i<m;i++)
        ws[i]+=ws[i-1];
    for(i=n-1;i>=0;i--)
        sa[--ws[x[i]]]=i;
    for(p=1,j=1;p<n;m=p,j*=2)
    {
        for(p=0,i=n-j;i<n;i++)
            y[p++]=i;
        for(i=0;i<n;i++)
            if(sa[i]>=j)
            y[p++]=sa[i]-j;
        for(i=0;i<n;i++)
            wv[i]=x[y[i]];
        for(i=0;i<m;i++)
            ws[i]=0;
        for(i=0;i<n;i++)
            ws[wv[i]]++;
        for(i=1;i<m;i++)
            ws[i]+=ws[i-1];
        for(i=n-1;i>=0;i--)
            sa[--ws[wv[i]]]=y[i];
        for(t=x,x=y,y=t,x[sa[0]]=0,p=i=1;i<n;i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
    }
    return;
}
void calheight(int *r,int *sa,int n)
{
    int i,j,k=0;
    for(i=1;i<=n;i++)
        rank[sa[i]]=i;
    for(i=0;i<n;height[rank[i++]]=k)
        for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
    return;
}
int vis[4010],m;
char res[2100];
bool judge(int mid,int n)
{
    int flag=0,tot=1,bg,i;  //tot计数
    memset(vis,-1,sizeof(vis));
    for(i=1;i<=n;i++)
    {
        int t=id[sa[i]];    //找到当前sa属于的那个串
        if(t==0)
        {
            flag++;
            tot=1;
            continue;
        }
        if(height[i]<mid)   //分组了
        {
            vis[t]=++flag;  //避免和前面的重复
            tot=1;
            bg=sa[i];
        }
        else
        {
            if(vis[t]!=flag)
            {
                vis[t]=flag;
                tot++;
            }
        }
        if(tot>=m)  //tot达到m了，也就满足条件了
        {
            for(int j=0;j<mid;j++)
                res[j]=(char)r[bg+j];
            res[mid]='\0';
            return 1;
        }
    }
    return 0;
}
int main()
{
    int n,i,j;
    while(scanf("%d",&m)==1&&m)
    {
        n=0;
        int op=130;
        for(i=1;i<=m;i++)
        {
            scanf("%s",s);
            for(j=0;s[j];j++)
            {
                r[n]=s[j];
                id[n++]=i;
            }
            r[n++]=op++;
        }
        n--;
        r[n]=0;
        da(r,sa,n+1,op+1);
        calheight(r,sa,n);
        int ll=0,rr=n,ans=0;
        while(ll<=rr)
        {
            int mid=(ll+rr)>>1;
            if(judge(mid,n))
            {
                ans=max(ans,mid);
                ll=mid+1;
            }
            else
                rr=mid-1;
        }
        if(ans)
            printf("%s\n",res);
        else
            printf("IDENTITY LOST\n");
    }
    return 0;
}