spoj Longest Common Substring(lcs)

weixin_30512785

于 2019-06-11 16:04:00 发布

阅读量66

点赞数

原文链接：http://www.cnblogs.com/ZJXXCN/p/11004139.html

版权

A string is finite sequence of characters over a non-empty finite set Σ.

In this problem, Σ is the set of lowercase letters.

Substring, also called factor, is a consecutive sequence of characters occurrences at least once in a string.

Now your task is simple, for two given strings, find the length of the longest common substring of them.

Here common substring means a substring of two or more strings.

Input

The input contains exactly two lines, each line consists of no more than 250000 lowercase letters, representing a string.

Output

The length of the longest common substring. If such string doesn't exist, print "0" instead.

Example

Input:
alsdfkjfjkdsal
fdjskalajfkdsla

Output:
3

Notice: new testcases added

思路： $O(nlogn)$的后缀数组直接做，也可以用后缀自动机。

 1 #include<bits/stdc++.h>
 2 using namespace std;  
 3 int const N=500000+100;  
 4 int wa[N<<1],wb[N<<1],wv[N],rk[N],num[N],sa[N],h[N],r[N],vis[11],id[N],m;  
 5 char s[N]; 
 6 int cmp(int *r,int x,int y,int z){
 7     return r[x]==r[y] &&r[x+z]==r[y+z];  
 8 }    
 9 void build_sa(int *r,int *sa,int n,int m){
10     int i,j,p,*x=wa,*y=wb;  
11     for(i=0;i<m;i++) num[i]=0;  
12     for(i=0;i<n;i++) num[x[i]=r[i]]++; 
13     for(i=1;i<m;i++) num[i]+=num[i-1];  
14     for(i=n-1;i>=0;i--) sa[--num[x[i]]]=i;  
15     for(j=1,p=1;p<n;j<<=1,m=p){
16         for(p=0,i=n-j;i<n;i++) y[p++]=i;  
17         for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;  
18         for(i=0;i<m;i++) num[i]=0;  
19         for(i=0;i<n;i++) num[wv[i]=x[y[i]]]++; 
20         for(i=1;i<m;i++) num[i]+=num[i-1]; 
21         for(i=n-1;i>=0;i--) sa[--num[wv[i]]]=y[i]; 
22         swap(x,y); 
23         for(i=1,p=1,x[sa[0]]=0; i<n;i++) 
24             x[sa[i]]=cmp(y,sa[i],sa[i-1],j)? p-1: p++;  
25     }
26     for(i=0;i<n;i++) rk[i]=x[i];  
27 } 
28 void build_h(int *r,int *sa,int n){
29     int k=0; 
30     for(int i=0;i<n;i++){
31         if(k) k--; 
32         int j=sa[rk[i]-1];  
33         while (r[i+k]==r[j+k]) k++; 
34         h[rk[i]]=k; 
35     }
36 }
37 int check(int mid,int n){
38     memset(vis,0,sizeof(vis));  
39     for(int i=1;i<=n;i++){
40         if(h[i]>=mid){
41             vis[id[sa[i-1]]]=1;  
42             vis[id[sa[i]]]=1;  
43             int check=0; 
44             for(int j=1;j<=m;j++)   
45                 if(!vis[j]){
46                     check=1; break;
47                 } 
48             if(!check) return 1; 
49         }else {
50             for(int i=1;i<=m;i++) vis[i]=0;  
51         }
52     }
53     return 0;  
54 }  
55         
56 int main(){
57     int len=0,sum=122;   
58     while (scanf("%s",s)!=EOF){
59         m++;  
60         for(int i=0;s[i];i++)  r[len++]=s[i],id[len-1]=m;  
61         r[len++]=++sum;  
62     }  
63     build_sa(r,sa,len+1,140);  
64     build_h(r,sa,len);  
65     int l=0,r=len,ans=0;  
66     while (l<=r){
67         int mid=(l+r)/2;  
68         if(check(mid,len)){
69             ans=mid; 
70             l=mid+1;  
71         }else r=mid-1;  
72     }
73     printf("%d\n",ans); 
74     return 0; 
75 }

View Code

后缀自动机的思路：

这应该算是后缀自动机的经典应用了吧，我们先构造A的SAM，然后用A的SAM一次读入B的每一个字符，初始时状态在root处，此时最大匹配数为tmp=0，（这里的最大匹配数是指以当前读入的字符结尾，往前能匹配的最大长度），设当前到达的状态为p，最大匹配数为tmp，读入的字符为x，若p->go[x]!=NULL，则说明可从当前状态读入一个字符x到达下一个状态，则tmp++,p=p->go[x],否则，找到p的第一个祖先s，s->go[x]!=NULL,若s不存在，则说明以x结尾的字符串无法和A串的任何位置匹配，则设tmp=0,p=root。否则，设tmp=s->tmp+1（因为我们不算x的话已经到达了状态p，这说明对于p的任意祖先已经匹配完毕）,p=s->go[x]。我们求tmp所达到的最大值即为所求。

 1 #include<bits/stdc++.h>
 2 using namespace std; 
 3 int const N=250000+1000;  
 4 struct node{
 5     int len,fa,ch[26];  
 6     node() { memset(ch,0,sizeof(ch));  len=0; } 
 7 }a[N<<1];  
 8 char s[N];  
 9 int ls=1,tot=1;  
10 void add(int c){
11     int p=ls;  
12     int np=ls=++tot;   
13     a[np].len=a[p].len+1;  
14     for(;p&&!a[p].ch[c]; p=a[p].fa) a[p].ch[c]=np;  
15     if(!p)  a[np].fa=1;  
16     else {
17         int q=a[p].ch[c];  
18         if(a[q].len==a[p].len+1)  a[np].fa=q; 
19         else {
20             int nq=++tot;  a[nq]=a[q];  
21             a[nq].len=a[p].len+1;  
22             a[q].fa=a[np].fa=nq;  
23             for(;p && a[p].ch[c]==q; p=a[p].fa) a[p].ch[c]=nq;  
24         } 
25     }
26 }  
27 int main(){
28     scanf("%s",s);  
29     for(int i=0;s[i];i++)  
30         add(s[i]-'a');  
31     scanf("%s",s);  
32     int p=1,tmp=0,ans=0; 
33     for(int i=0;s[i];i++){
34         int k=s[i]-'a';  
35         if(a[p].ch[k]){
36             tmp++;  
37             p=a[p].ch[k];  
38         }else {
39             while (p && !a[p].ch[k]) p=a[p].fa;  
40             if(p) {   
41                 tmp=a[p].len+1;  
42                 p=a[p].ch[k];  
43             }else {
44                 tmp=0; p=1; 
45             }
46         }
47         ans=max(ans,tmp); 
48     } 
49     printf("%d\n",ans); 
50     return 0; 
51 }

View Code

转载于:https://www.cnblogs.com/ZJXXCN/p/11004139.html

weixin_30512785

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
spoj Longest Common Substring(lcs)

A string is finite sequence of characters over a non-empty finite set Σ.In this problem, Σ is the set of lowercase letters.Substring, also called factor, is a consecutive sequence of characters oc...
复制链接

扫一扫