字符串匹配算法--两种基于双向比较的最长公共子串算法
LCSstrDP
int LCSstrDP (char * S1,int L1 ,ch ar * S2 ,int L2)
{
if(L1≤0||L2≤0)
return 0 ;
int *c=new int[L2] ;
int lcs=0 ;
for(in t i=0;i < L;i++ ) {
for(int j=L2— 1;j≥ O;j一一){
if(S1[i]==S2[j]){
if(i==0||j==0) c[j]=1;
else c[j]=c[j-1]+ 1;
if(c[j] > lcs) lcs=c[j];
}
else c[j]=0;
}
}
delete [] C;
return lcs ;
}
LCSstrSeL
int LCSstrSeL(char *S1,int L1,char *S2,int L2)
{
if(L1<=0||l2<=0)
return 0;
int lcs=0; /*最长公共字串的长度*/
for(int i=0;i<L1;i+=(lcs+1)){
for(int j=L2-1;j>=0;j--){
if(S1[i]==S2[j]){
int k=1,m=0,x,y;
for(x=i,y=j;++x<L1&&++y<L2&&S1[x]==S2[y];k++);/*向右比较*/
for(x=i,y=j;--y>=0&&--x>=0&&S1[x]==S2[y];m++);/*向左比较*/
if(k+m>lcs)lcs=k+m;
}
}
}
return lcs;
}
LCSstrSCel
int LCSstrSCel(char *S1,int L1,char *S2,int L2){
if(L1<=0||L2<=0)
return 0;
int i,j,lcs=0,is,w1;/*预处理*/
int anum[256]={0},ap[256]={0},ast[256]={0};
for(j=0,i=0;i<L2;i++)
if(i==L2-1||S2[i]!=S2[i+1]){
anum[(unsigned char)S2[i]]+=(i>j?2:1);j=i+1;
}
for(j=0,i=0;i<256;i++)
if(anum[i]>0){
ast[i]=ap[i]=j;j+=anum[i];
};
int *s2pos=new int[j+1];
for(j=0,i=0;i<L2;i++)
if(i==L2-1||S2[i]!=S2[i+1]){
is=(unsigned char)S2[i];
s2pos[ap[is]++]=(i<j?(-i):i);
if(i<j) s2pos[ap[is]++]=j;
j=i+1;
}
/*查找*/
for(i=0;i<L1;i+=(lcs+1)){
int ii=(unsigned char)S1[i];
if(!anum[ii]) continue; /*字符跨越*/
for(is=i;--is>0&&S1[is]==S1[i];);
is++;
for(;++i<L1&&S1[i]==S1[is];);
i--;
w1=i-is+1;
int LWh=0,Sl,Sr,*p0=&s2pos[ast[ii]],*pjx=p0+anum[ii]-1;
if(w1<=lcs&&is&&i<L1-1){
if(anum[(unsigned char)S1[is-1]]==0&&anum[(unsigned char)S1[i+1]]==0)
continue;/*S1连续同值小区间跨越*/
LWh=((lcs-w1)>>1)+1;
if (anum[ii]>7&&is-LWh>=0&&(i+LWh)<L1-1)
{
/* 小区间穿越准备,若j区间太少,不用做 */
Sl=*(short *)(S1+is-LWh);
Sr=*(short *)(S1+i+LWh-1);
for(j=*(--pjx);pjx>p0&&(abs(j)>L2-LWh-1||abs(*(pjx+1))>l2-LWh-1||
abs(*(pjx+1))>L2-LWh-1);j=*(--pjx));
}
else LWh=0;
}
for(int *p=p0,*px=p0+anum[ii],js=j=*p++;p<=px;js=j=*p++){
/*S2遍历,字符、通知区间跨越*/
if(j<0){j=-j;js=*p++;}
int k=w1,m=0,x,y;
for (x=i,y=j;++y<L2&&++x<L1&&S1[x]==S2[y]; k++);
/* 向右比较 */
for (x=is,y=js;--y>=0&&-x>=0&&S1[x]==S2[y]; m++);
/* 向左比较 */
if(lcs<m+k){
int lcs_cur=m+k,w2=(j-js+1);
if(w1!=w2) lcs_cur=(w1<w2?w1:w2)+(m>(k-w1)?m:(k-w1));
if(lcs<lcs_cur) lcs=lcs_cur;
}
if(LWh&&p<pjx&&js>=LWh){
/*S2同值区间穿越*/
char *pl=S2-LWh,*pr=S2+LWh-1;
for(int *pj=p,js=j=*pj++;pj<=pjx;p=pj,js=j=*pj++){
if (j<0)
{
j=-j;
js=*pj++;
}
if(Sl==(*(short *)(pl+js))||Sr==(*(short *)(pr+j))
break;
}
}
}
}
delete[] s2pos;
return lcs;
}
Wang Kaiyun, Kong Siqi, Fu Yunsheng, Pan Zeyou, Ma Weidong, Zhao Qiang. Two Longest Common SubstringAlgorithms Based on Bi-Directional Comparison[J].Journal of Computer Research and Development,2013,50(11):2444-2454(in Chinese). [王开云,孔思淇,付云生,潘泽友,马卫东,赵强. 两种基于双向比较的最长公共子串算法[J]. 计算机研究与发展, 2013, 50(11): 2444-2454]