#include <stdio.h>
#include <string.h>
#include <malloc.h>
//LCS , loop over chacracter
//time complexity is o(n*m), where n m is the length of two strings
char *maxsubstr(char* str1, char* str2)
{
char *p1, *p2, *q1, *q2, *destp;
char *substr;
int max=0, len;
p1 = str1;
while(*p1!='/0')
{
q1=str2;
while(*q1!='/0')
{
len=0;
p2=p1;
q2=q1;
while((*p2!='/0')&&(*q2!='/0'))
{
if(*p2==*q2)
{
p2++;
q2++;
len++;
}
else
break;
}
if(len>max)
{
max = len;
destp =p1;
}
q1++;
}
p1++;
}
substr=(char*)malloc(sizeof(char)*max);
strncpy(substr,destp,max);
return substr;
}
int main(int argc, char *args[])
{
char *str1,*str2;
if(argc == 3){
str1 = args[1];
str2 = args[2];
}
else{
str1="hello world happy birthday";
str2="hello abcd happy birthday";
}
printf("str1: %s/n",str1);
printf("str2: %s/n",str2);
printf("LCS: %s/n",maxsubstr(str1,str2));
return 0;
}
2 the clever approach should be adopted , just using of the DP(dynamic programming thought).
the specific philosophy is as the following:
公共子串的元素必须相邻:
LCS问题就是求两个字符串最长公共子串的问题。解法就是用一个矩阵来记录两个字符串中所有位置的两个字符之间的匹配情况,若是匹配则为1,否则为0。然后求出对角线最长的1序列,其对应的位置就是最长匹配子串的位置.
但是在0和1的矩阵中找最长的1对角线序列又要花去一定的时间。通过改进矩阵的生成方式和设置标记变量,可以省去这部分时间。下面是新的矩阵生成方式:
s1: i love u
s2: ikonw i love u
i k o n w i l o v e u
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
i 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0
0 0 0 0 0 0 1 0 2 0 0 0 0 1 0
l 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0
o 0 0 0 1 0 0 0 0 0 0 4 0 0 0 0
v 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0
e 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0
0 0 0 0 0 0 1 0 1 0 0 0 0 7 0
u 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8
LCS: i love u
当字符匹配的时候,我们并不是简单的给相应元素赋上1,而是赋上其左上角元素的值加一。我们用两个标记变量来标记矩阵中值最大的元素的位置,在矩阵生成的过程中来判断当前生成的元素的值是不是最大的,据此来改变标记变量的值,那么到矩阵完成的时候,最长匹配子串的位置和长度就已经出来了。
lcs_DP.c source code is as the following:
#include <stdio.h>
#include <string.h>
#include <malloc.h>
//DP_LCS
void print_table(char *str1,char *str2,int **pf)
{
int i,j,row,col;
row = strlen(str1);
col = strlen(str2);
printf(" ");
for (i=0; i<col; i++)
printf("%c ",str2[i]);
for (i=0; i<=row; i++)
for (j=0; j<=col; j++)
{
if (j == 0)
{
printf("/n");
if (i)
printf("%c ",str1[i-1]);
else
printf(" ");
}
printf("%d ",*(pf+i*(col+1)+j));
}
printf("/n");
}//print_table
char* maxsubstr(char *str1, char *str2)
{
int i,j;
int len1=strlen(str1),len2=strlen(str2),row,col,max=0;
int end=0;//store the end index of LCS
int pf[len1+1][len2+1];
memset(pf,0,sizeof(pf));
for (row=1; row<=len1; row++)
for (col=1;col<=len2; col++)
{
if (str1[row-1] == str2[col-1] && str1[row-1] != '/0' && str2[col-1] != '/0')
{
pf[row][col] = pf[row-1][col-1] + 1;
//max = pf[row][col] > max ? pf[row][col] : max;
if(pf[row][col] > max)
{
max = pf[row][col];
end = row-1;
}
}
else
pf[row][col] = 0;
}
print_table(str1,str2,pf);
//printf("end: %d/n",end);
//printf("length of LCS: %d/n",max);
char* result;
result = (char*)malloc(sizeof(char)*max);
memcpy(result,&str1[end-max+1],max);
return result;
}
int main(int argc, char**args){
char*s1,*s2;
if(argc == 3){
s1 = args[1];
s2 = args[2];
}
else{
s1="a hello world happy birthday";
s2="b hello abcd happy birthday";
}
printf("s1: %s/n",s1);
printf("s2: %s/n",s2);
printf("LCS: %s/n",maxsubstr(s1,s2));
return 0;
}
the result:
the time complexity of lcs_DP.c is O(n^2)