A string is finite sequence of characters over a non-empty finite set Σ.
In this problem, Σ is the set of lowercase letters.
Substring, also called factor, is a consecutive sequence of characters occurrences at least once in a string.
Now your task is simple, for two given strings, find the length of the longest common substring of them.
Here common substring means a substring of two or more strings.
Input
The input contains exactly two lines, each line consists of no more than 250000 lowercase letters, representing a string.
Output
The length of the longest common substring. If such string doesn't exist, print "0" instead.
Example
Input: alsdfkjfjkdsal fdjskalajfkdsla Output: 3
Notice: new testcases added
思路: $O(nlogn)$的后缀数组直接做,也可以用后缀自动机。
1 #include<bits/stdc++.h> 2 using namespace std; 3 int const N=500000+100; 4 int wa[N<<1],wb[N<<1],wv[N],rk[N],num[N],sa[N],h[N],r[N],vis[11],id[N],m; 5 char s[N]; 6 int cmp(int *r,int x,int y,int z){ 7 return r[x]==r[y] &&r[x+z]==r[y+z]; 8 } 9 void build_sa(int *r,int *sa,int n,int m){ 10 int i,j,p,*x=wa,*y=wb; 11 for(i=0;i<m;i++) num[i]=0; 12 for(i=0;i<n;i++) num[x[i]=r[i]]++; 13 for(i=1;i<m;i++) num[i]+=num[i-1]; 14 for(i=n-1;i>=0;i--) sa[--num[x[i]]]=i; 15 for(j=1,p=1;p<n;j<<=1,m=p){ 16 for(p=0,i=n-j;i<n;i++) y[p++]=i; 17 for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j; 18 for(i=0;i<m;i++) num[i]=0; 19 for(i=0;i<n;i++) num[wv[i]=x[y[i]]]++; 20 for(i=1;i<m;i++) num[i]+=num[i-1]; 21 for(i=n-1;i>=0;i--) sa[--num[wv[i]]]=y[i]; 22 swap(x,y); 23 for(i=1,p=1,x[sa[0]]=0; i<n;i++) 24 x[sa[i]]=cmp(y,sa[i],sa[i-1],j)? p-1: p++; 25 } 26 for(i=0;i<n;i++) rk[i]=x[i]; 27 } 28 void build_h(int *r,int *sa,int n){ 29 int k=0; 30 for(int i=0;i<n;i++){ 31 if(k) k--; 32 int j=sa[rk[i]-1]; 33 while (r[i+k]==r[j+k]) k++; 34 h[rk[i]]=k; 35 } 36 } 37 int check(int mid,int n){ 38 memset(vis,0,sizeof(vis)); 39 for(int i=1;i<=n;i++){ 40 if(h[i]>=mid){ 41 vis[id[sa[i-1]]]=1; 42 vis[id[sa[i]]]=1; 43 int check=0; 44 for(int j=1;j<=m;j++) 45 if(!vis[j]){ 46 check=1; break; 47 } 48 if(!check) return 1; 49 }else { 50 for(int i=1;i<=m;i++) vis[i]=0; 51 } 52 } 53 return 0; 54 } 55 56 int main(){ 57 int len=0,sum=122; 58 while (scanf("%s",s)!=EOF){ 59 m++; 60 for(int i=0;s[i];i++) r[len++]=s[i],id[len-1]=m; 61 r[len++]=++sum; 62 } 63 build_sa(r,sa,len+1,140); 64 build_h(r,sa,len); 65 int l=0,r=len,ans=0; 66 while (l<=r){ 67 int mid=(l+r)/2; 68 if(check(mid,len)){ 69 ans=mid; 70 l=mid+1; 71 }else r=mid-1; 72 } 73 printf("%d\n",ans); 74 return 0; 75 }
后缀自动机的思路:
这应该算是后缀自动机的经典应用了吧,我们先构造A的SAM,然后用A的SAM一次读入B的每一个字符,初始时状态在root处,此时最大匹配数为tmp=0,(这里的最大匹配数是指以当前读入的字符结尾,往前能匹配的最大长度),设当前到达的状态为p,最大匹配数为tmp,读入的字符为x,若p->go[x]!=NULL,则说明可从当前状态读入一个字符x到达下一个状态,则tmp++,p=p->go[x],否则,找到p的第一个祖先s,s->go[x]!=NULL,若s不存在,则说明以x结尾的字符串无法和A串的任何位置匹配,则设tmp=0,p=root。否则,设tmp=s->tmp+1(因为我们不算x的话已经到达了状态p,这说明对于p的任意祖先已经匹配完毕),p=s->go[x]。我们求tmp所达到的最大值即为所求。
1 #include<bits/stdc++.h> 2 using namespace std; 3 int const N=250000+1000; 4 struct node{ 5 int len,fa,ch[26]; 6 node() { memset(ch,0,sizeof(ch)); len=0; } 7 }a[N<<1]; 8 char s[N]; 9 int ls=1,tot=1; 10 void add(int c){ 11 int p=ls; 12 int np=ls=++tot; 13 a[np].len=a[p].len+1; 14 for(;p&&!a[p].ch[c]; p=a[p].fa) a[p].ch[c]=np; 15 if(!p) a[np].fa=1; 16 else { 17 int q=a[p].ch[c]; 18 if(a[q].len==a[p].len+1) a[np].fa=q; 19 else { 20 int nq=++tot; a[nq]=a[q]; 21 a[nq].len=a[p].len+1; 22 a[q].fa=a[np].fa=nq; 23 for(;p && a[p].ch[c]==q; p=a[p].fa) a[p].ch[c]=nq; 24 } 25 } 26 } 27 int main(){ 28 scanf("%s",s); 29 for(int i=0;s[i];i++) 30 add(s[i]-'a'); 31 scanf("%s",s); 32 int p=1,tmp=0,ans=0; 33 for(int i=0;s[i];i++){ 34 int k=s[i]-'a'; 35 if(a[p].ch[k]){ 36 tmp++; 37 p=a[p].ch[k]; 38 }else { 39 while (p && !a[p].ch[k]) p=a[p].fa; 40 if(p) { 41 tmp=a[p].len+1; 42 p=a[p].ch[k]; 43 }else { 44 tmp=0; p=1; 45 } 46 } 47 ans=max(ans,tmp); 48 } 49 printf("%d\n",ans); 50 return 0; 51 }