最长公共子序列(LCS)算法
输入:随机生成两条字符序列,长度最少为10000。
输出:求解时间,最长公共子序列及其长度。
算法思想:
设X=<x1,x2,x3,…,xm>和Y=<y1,y2,y3,…,yn>为两个序列,并设Z=<z1,z2,z3,…zk>为X和Y的任意一个LCS
1. 如果xm=yn=zk,那么Zk-1是Xm-1和Yn-1的一个LCS
2. 如果xm不等于yn,那么zk不等于xm蕴含Z是Xm-1和Y的一个LCS
3. 如果xm不等于yn,那么zk不等于yn蕴含Z是X和Yn-1的一个LCS
因此基于算法导论上的LCS算法给予以下几方面的优化:
1. 去掉用来构造最优解的数组,节省了O(mn)的存储空间
2. 将二维数组的类型从int改为short
3. 将LCS_Print()程序由递归改为非递归,解决了数据量过大时会出现的堆栈溢出。
实现代码如下:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include<time.h>
#define MAXSIZE 10000
short C[MAXSIZE][MAXSIZE] = { 0 };
void LCS_Length(char *S1, char *S2){
int i, j;
int len1 = strlen(S1), len2 = strlen(S2);
for (i = 0; i <len2; i++)
C[i][0] = 0; //S2 column
for (i = 0; i <len1; i++)
C[0][i] = 0; //S1 row
for (i = 1; i < len2; i++)
for (j = 1; j < len1; j++){
if (S1[j] == S2[i]){
C[i][j] = C[i - 1][j - 1] + 1;
}
else if (C[i - 1][j] >= C[i][j - 1]){
C[i][j] = C[i - 1][j];
}
else{
C[i][j] = C[i][j - 1];
}
}
}
/*
void LCS_Print(char *S2, int i, char *S1, int j){
if (i == 0 || j == 0) return;
if (S1[j] == S2[i]){
LCS_Print(S2, i - 1,S1, j - 1);
printf("%c", S1[j]);
}
else if (C[i - 1][j] >= C[i][j - 1]){
LCS_Print(S2, i - 1,S1, j);
}
else
LCS_Print(S2, i,S1, j - 1);
}
*/
void reverse(char *S){
int i = 0, j = strlen(S) - 1;
int temp;
for (; i <= j; i++, j--){
temp = S[i];
S[i] = S[j];
S[j] = temp;
}
}
void LCS_Print(char *S2, int i, char *S1, int j){
char S[MAXSIZE] = { ' ' };
int len = 0;
while (i != 0 && j != 0) {
if (S1[j] == S2[i]){
S[len++] = S1[j];
i--;
j--;
}
else if (C[i - 1][j] >= C[i][j - 1])
i--;
else
j--;
}
S[len] = '\0';
reverse(S);
printf("%s\nLCS Length:%d\n",S,strlen(S));
}
void main(){
int j, len1, len2;
clock_t start;
clock_t duringtime;
//char S1[] = " ACCGGTCGAGTGCGCGGAAGCCGGCCGAA";//"BDCABA";
//char S2[] = " GTCGTTCGGAATGCCGTTGCTCTGTAAA";
char S1[MAXSIZE] = { '0' };
char S2[MAXSIZE] = { '0' };
for (j = 1; j <MAXSIZE; j++){
S1[j] = ('A' + rand() % 25);
}
S1[9999] = '\0';
for (j = 1; j <MAXSIZE; j++){
S2[j] = ('A' + rand() % 25);
}
S2[9999] = '\0';
start = clock();
LCS_Length(S1, S2);
len1 = strlen(S1)-1;
len2 = strlen(S2)-1;
//printf("S1:%s\n", S1);
//printf("S2:%s\n", S2);
printf("LCS:");
LCS_Print(S2, len2, S1,len1);
duringtime = clock() - start;
printf("Time:");
printf("%f ms\n", (double)(duringtime));
}