求两个字符串最长公共子序列的算法优化（C++实现）

最新推荐文章于 2024-08-07 15:50:47 发布

时空霹雳

最新推荐文章于 2024-08-07 15:50:47 发布

阅读量5.4k

点赞数 4

分类专栏：算法学习文章标签： c++ 动态规划优化递归算法

本文链接：https://blog.csdn.net/u010189459/article/details/29908303

版权

算法学习专栏收录该内容

37 篇文章 0 订阅

订阅专栏

前几天写的的一篇文章，利用动态规划的方法求两个字符串的最长公共子序列，参见：求两个字符串的最长公共子序列——动态规划（C++实现），而后又将该算法用Python进行了改写，参见：Python实现求两个字符串公子序列的算法（Python实现）。对比他们的时间复杂度发现，递归方式实现的时候C++竟然比Python还慢，而且随着数据规模的不断扩大，这种差距越来越明显。明显不合常理，肯定是算法哪里出现了问题，因此本文对C++算法在各方面进行优化，改进下算法。

动态规划有两种实现方式，一种是递归，一种是非递归。通过对比发现，时间反常的是C++实现中的递归算法，非递归算法表现正常。因此，从C++程序中的递归算法入手，分别用不同的方式改写递归函数。因为考虑到可能是传参的方式影响了递归的性能，因此对递归函数的参数用不同的方式传递，以此来观察参数传递方式堆递归的影响。

一、源程序中的六个函数的功能都是求两个字符串的最长公共子序列。

他们的区别为：

1.递归实现

1.1.字符串使用传值方式，辅助数组使用指针传递（最初的版本）

1.2.字符串使用传值方式，辅助数组使用全局变量

1.3.字符串使用指针方式，辅助数组使用指针传递

1.4.字符串使用全局变量，辅助数组使用全局变量

2.非递归实现（递推实现）

2.1.普通实现

2.2.优化了辅助数组的空间复杂度，由整个m*n数组降到只需要两行数组

下表是对这六种实现方式在不同的数据规模下的测试（不同的函数用他们的代号表示）：

字符串X长度	字符串Y长度	LCS长度	递归时间（ms）				非递归时间（ms）
			1.1	1.2	1.3	1.4	2.1	2.2
200	300	75	8000	7963	2	3	2	2
500	1000	229	70961	71156	32	30	18	21
2000	1000	459	280864	282681	121	127	76	77
2000	3000	799	—	—	432	438	199	188

从图中可以看出：

（1）对比1.1和1.2，发现辅助数组使用指针传递或者是使用全局变量区别不大

（2）对比1.1和1.3，发现字符串的不同传值方式决定了算法的时间效率。联系我们的算法可知，在递归函数中每次调用函数本身（递归过程），都要向新函数传递两个字符串的值，当字符串的长度比较长时，比如2000和1000的两个字符串，加起来是3000，大概要3KB的数据，函数每递归一次就要拷贝3K的数据，这个过程大大减慢了函数的执行速度，而且还占用大量的栈空间。实验证明，当问题的规模为500和1000的两个字符串时，Linux为每个程序分配的默认的8MB的栈空间已经不够用了，必须自己手共调整栈的上限才能保证算法的正常运行。从算法的角度来讲，两个字符串是只读的，不会对他们进行写操作，因此每次递归都要将两个字符串进行复制是没有必要的，所以完全可以用传指针的方式，程序中只有一份字符串的内容，每次递归时传给函数一个指向字符串首地址的指针。这样减少了函数执行时的读写操作，大大提高了执行效率。

（3）对比1.3和1.4，发现将字符串指针和辅助数组指针都用全局变量来代替的话，在性能上二者差不多，甚至全局变量性能还有些许下降，不知道是偶然因素还是必然结果。从程序功能的角度来看，使用全局变量会破坏函数的功能性，不便于函数的移植和复用。因此最好还是使用传指针的方式进行参数传递，尽量少用全局变量。

（4）对比1.3和2.1，函数的递归实现和非递归实现都是O（m*n），但由于递归过程会有一些创建函数、分配栈空间等操作，所以总体来看效率比非递归实现稍低，但相差不是太大，仍然处于同一数量级。

（5）对比2.1和2.1，对空间进行优化后，由于增加了一些取模运算，理论上时间应该大于优化前的，这里却表现的不是很明显，甚至相反，没弄明白。

二、源代码

#include <iostream>
#include <string>
#include <string.h>
#include <time.h>
#include <sys/time.h>
#include <stdlib.h>
using namespace std;


const int X = 2000, Y = 3000;


//全局变量
char str_1[X+1]; 
char str_2[Y+1];
int tmp_11[X+1][Y+1] = {1};
int tmp_22[X+1][Y+1] = {-2};
string a;
string b;
int alen = 0;
int blen = 0;

//--------------以下为算法实现部分---------------------------------------------------------------

//1.1动态规划算法：递归实现（记忆化搜索）
//无优化
int LCS_length_1(string x, string y, int tmp_1[][Y+1], int tmp_2[][Y+1], int xlen, int ylen, int i, int j){
	if(i >= 0 && j >= 0 && i <= xlen && j <= ylen){
		if(i == 0 || j == 0 || tmp_1[i][j] > 0){
			return tmp_1[i][j];
		}else{
			if(x[i-1] == y[j-1]){
				tmp_2[i][j] = 0;
				return tmp_1[i][j] = 1 + LCS_length_1(x, y, tmp_1, tmp_2, xlen, ylen, i-1, j-1);
			}else if((tmp_1[i][j-1] = LCS_length_1(x, y, tmp_1, tmp_2, xlen, ylen, i, j-1)) >= 
					(tmp_1[i-1][j] = LCS_length_1(x, y, tmp_1, tmp_2, xlen, ylen, i-1, j))){
				tmp_2[i][j] = 1;
				return tmp_1[i][j] = tmp_1[i][j-1];
			}else{
				tmp_2[i][j] = -1;
				return tmp_1[i][j] = tmp_1[i-1][j];
			}
		}
	}
	
	return 0;	
}

//1.2动态规划算法：递归实现（记忆化搜索）
//优化1：字符串使用传值，数组使用全局变量
int LCS_length_2(string x, string y, int xlen, int ylen, int i, int j){
	if(i >= 0 && j >= 0 && i <= xlen && j <= ylen){
		if(i == 0 || j == 0 || tmp_11[i][j] > 0){
			return tmp_11[i][j];
		}else{
			if(x[i-1] == y[j-1]){
				tmp_22[i][j] = 0;
				return tmp_11[i][j] = 1 + LCS_length_2(x, y, xlen, ylen, i-1, j-1);
			}else if((tmp_11[i][j-1] = LCS_length_2(x, y, xlen, ylen, i, j-1)) >= 
					(tmp_11[i-1][j] = LCS_length_2(x, y, xlen, ylen, i-1, j))){
				tmp_22[i][j] = 1;
				return tmp_11[i][j] = tmp_11[i][j-1];
			}else{
				tmp_22[i][j] = -1;
				return tmp_11[i][j] = tmp_11[i-1][j];
			}
		}
	}
	
	return 0;	
}

//1.3动态规划算法：递归实现（记忆化搜索）
//优化2：字符串以指针方式传递，数组仍然使用值传递
int LCS_length_3(char* x, char* y, int tmp_1[][Y+1], int tmp_2[][Y+1], int xlen, int ylen, int i, int j){
	if(i >= 0 && j >= 0 && i <= xlen && j <= ylen){
		if(i == 0 || j == 0 || tmp_1[i][j] > 0){
			return tmp_1[i][j];
		}else{
			if(x[i-1] == y[j-1]){
				tmp_2[i][j] = 0;
				return tmp_1[i][j] = 1 + LCS_length_3(x, y, tmp_1, tmp_2, xlen, ylen, i-1, j-1);
			}else if((tmp_1[i][j-1] = LCS_length_3(x, y, tmp_1, tmp_2, xlen, ylen, i, j-1)) >= 
					(tmp_1[i-1][j] = LCS_length_3(x, y, tmp_1, tmp_2, xlen, ylen, i-1, j))){
				tmp_2[i][j] = 1;
				return tmp_1[i][j] = tmp_1[i][j-1];
			}else{
				tmp_2[i][j] = -1;
				return tmp_1[i][j] = tmp_1[i-1][j];
			}
		}
	}
	
	return 0;	
}

//1.4动态规划算法：递归实现（记忆化搜索）
//优化3：字符串和数组都使用全局参数
int LCS_length_4(int i, int j){
	if(i >= 0 && j >= 0 && i <= alen && j <= blen){
		if(i == 0 || j == 0 || tmp_11[i][j] > 0){
			return tmp_11[i][j];
		}else{
			if(a[i-1] == b[j-1]){
				tmp_22[i][j] = 0;
				return tmp_11[i][j] = 1 + LCS_length_4(i-1, j-1);
			}else if((tmp_11[i][j-1] = LCS_length_4(i, j-1)) >= (tmp_11[i-1][j] = LCS_length_4(i-1, j))){
				tmp_22[i][j] = 1;
				return tmp_11[i][j] = tmp_11[i][j-1];
			}else{
				tmp_22[i][j] = -1;
				return tmp_11[i][j] = tmp_11[i-1][j];
			}
		}
	}
	return 0;
}

//2.1动态规划算法：非递归实现(递推实现)
int LCS_length_5(string x, string y, int tmp_1[][Y+1], int tmp_2[][Y+1], int xlen, int ylen){
	int i = 0, j = 0, k = 0;
	//辅助数组的首行首列元素置为0
	for(i = 0; i <= xlen; i++){
		tmp_1[i][0] = 0;
	}
	for(j = 0; j <= ylen; j++){
		tmp_1[0][j] = 0;
	}
	//对数组内容进行写入
	for(i = 1; i <= xlen; i++){
		k = i%2;
		tmp_1[i%2][0] = 0;
		for(j = 1; j <= ylen; j++){
			if(x[i-1] == y[j-1]){
				tmp_1[k][j] = tmp_1[(i-1)%2][j-1] + 1;
				tmp_2[i][j] = 0;		//斜向下
			}else{
				if(tmp_1[k][j-1] >= tmp_1[(i-1)%2][j]){
					tmp_1[k][j] = tmp_1[k][j-1];
					tmp_2[i][j] = 1;		//向右
				}else{
					tmp_1[k][j] = tmp_1[(i-1)%2][j];
					tmp_2[i][j] = -1;		//向下
				}
			}			
		}
	}
	return tmp_1[(i-1)%2][j-1];
}

//2.2动态规划算法：非递归实现(递推实现，空间优化)
int LCS_length_6(string x, string y, int tmp_1[][Y+1], int tmp_2[][Y+1], int xlen, int ylen){
	int i = 0, j = 0, k = 0;
	//辅助数组的首行元素置为0
	for(i = 0; i <= xlen; i++){
		tmp_1[0][i] = 0;
	}
	tmp_1[1][0] = 0;
	//对数组内容进行写入
	for(i = 1; i <= xlen; i++){
		k = i%2;
		tmp_1[i%2][0] = 0;
		for(j = 1; j <= ylen; j++){
			if(x[i-1] == y[j-1]){
				tmp_1[k][j] = tmp_1[(i-1)%2][j-1] + 1;
				tmp_2[i][j] = 0;		//斜向下
			}else{
				if(tmp_1[k][j-1] >= tmp_1[(i-1)%2][j]){
					tmp_1[k][j] = tmp_1[k][j-1];
					tmp_2[i][j] = 1;		//向右
				}else{
					tmp_1[k][j] = tmp_1[(i-1)%2][j];
					tmp_2[i][j] = -1;		//向下
				}
			}			
		}
	}
	return tmp_1[(i-1)%2][j-1];
}


//最大公共子序列输出函数，非递归实现
int LCS_print_1(string x, char *result, int tmp_2[][Y+1], int xlen, int ylen){
	int i = xlen, j = ylen, k = 0;
	while(i > 0 && j > 0 ){
		if(tmp_2[i][j] == 0){
			result[k] = x[i-1];			//斜向下
			k++;
			i--;
			j--;
		}else if(tmp_2[i][j] == 1){
			j--;					//向右
		}else if(tmp_2[i][j] == -1){
			i--;					//向下
		}
	}
	return k;
}

//------------------------以下为测试代码---------------------------------

//1.1测试动态规划算法递归实现
void test_1(string x, string y){
	//2.1参数处理
	char result[X+1];
	int xlen = x.length();
	int ylen = y.length();
	int tmp_1[X+1][Y+1] = {1};
	int tmp_2[X+1][Y+1] = {-2};
	//2.2初始化辅助数组
	memset(tmp_1,-1,sizeof(tmp_1));
	for(int i = 0; i <= xlen; i++){
		tmp_1[i][0] = 0;
	}
	for(int j = 0; j <= ylen; j++){
		tmp_1[0][j] = 0;
	}
	//2.3执行计算
	int max_len = LCS_length_1(x, y, tmp_1, tmp_2, xlen, ylen, xlen, ylen);
	LCS_print_1(x, result, tmp_2, xlen, ylen);	
	//3.3打印结果
	cout << "1.1动态规划法递归求最长公共子序列：" << endl;
	cout << "  最大公共子序列长度为：" << max_len << " 序列为：";
	for(int j = max_len - 1; j >= 0; j--){
		cout << result[j];
	}
	cout << endl;
}

//1.2测试动态规划算法递归实现（数组全局）
void test_2(string x, string y){
	//2.1参数处理
	//仅用于递归过程
	int xlen = x.length();
	int ylen = y.length();
	memset(tmp_11,-1,sizeof(tmp_11));
	for(int i = 0; i <= xlen; i++){
		tmp_11[i][0] = 0;
	}
	for(int j = 0; j <= ylen; j++){
		tmp_11[0][j] = 0;
	}
	
	char result[X+1];
	//2.3执行计算
	int max_len = LCS_length_2(x, y, xlen, ylen, xlen, ylen);
	LCS_print_1(x, result, tmp_22, xlen, ylen);	
	//3.3打印结果
	cout << "1.2动态规划法递归求最长公共子序列：" << endl;
	cout << "  最大公共子序列长度为：" << max_len << " 序列为：";
	for(int j = max_len - 1; j >= 0; j--){
		cout << result[j];
	}
	cout << endl;
}

//1.3测试动态规划算法递归实现（字符串引用）
void test_3(string x, string y){
	//2.1参数处理
	char result[X+1];
	int xlen = x.length();
	int ylen = y.length();
	int tmp_1[X+1][Y+1] = {1};
	int tmp_2[X+1][Y+1] = {-2};
	//2.2初始化辅助数组
	memset(tmp_1,-1,sizeof(tmp_1));
	for(int i = 0; i <= xlen; i++){
		tmp_1[i][0] = 0;
	}
	for(int j = 0; j <= ylen; j++){
		tmp_1[0][j] = 0;
	}
	//2.3执行计算
	int max_len = LCS_length_3(str_1, str_2, tmp_1, tmp_2, xlen, ylen, xlen, ylen);
	LCS_print_1(x, result, tmp_2, xlen, ylen);	
	//3.3打印结果
	cout << "1.3动态规划法递归求最长公共子序列：" << endl;
	cout << "  最大公共子序列长度为：" << max_len << " 序列为：";
	for(int j = max_len - 1; j >= 0; j--){
		cout << result[j];
	}
	cout << endl;
}
//1.4测试动态规划算法递归实现（全部全局）
void test_4(string x, string y){
	//2.1参数处理
	//仅用于递归过程
	a = x;
	b = y;
	alen = a.length();
	blen = b.length();
	memset(tmp_11,-1,sizeof(tmp_11));
	for(int i = 0; i <= alen; i++){
		tmp_11[i][0] = 0;
	}
	for(int j = 0; j <= blen; j++){
		tmp_11[0][j] = 0;
	}
	
	char result[X+1];
	//2.3执行计算
	int max_len = LCS_length_4(alen, blen);
	LCS_print_1(a, result, tmp_22, alen, blen);	
	//3.3打印结果
	cout << "1.4动态规划法递归求最长公共子序列：" << endl;
	cout << "  最大公共子序列长度为：" << max_len << " 序列为：";
	for(int j = max_len - 1; j >= 0; j--){
		cout << result[j];
	}
	cout << endl;
}

//2.1测试动态规划算法非递归实现
void test_5(string x, string y){
	//3.1参数处理
	char result[X+1];
	int xlen = x.length();
	int ylen = y.length();
	int tmp_1[X+1][Y+1] = {-2};
	int tmp_2[X+1][Y+1] = {-2};
	//3.2执行计算
	int max_len = LCS_length_5(x, y, tmp_1, tmp_2, xlen, ylen);
	LCS_print_1(x, result, tmp_2, xlen, ylen);
	//3.3输出结果
	cout << "2.1动态规划法非递归求最长公共子序列：" << endl;
	cout << "  最大公共子序列长度为：" << max_len << " 序列为：";
	for(int j = max_len - 1; j >= 0; j--){
		cout << result[j];
	}
	cout << endl;
}
//2.2测试动态规划算法非递归实现（空间优化）
void test_6(string x, string y){
	//3.1参数处理
	char result[X+1];
	int xlen = x.length();
	int ylen = y.length();
	int tmp_1[2][Y+1] = {-2};
	int tmp_2[X+1][Y+1] = {-2};
	//3.2执行计算
	int max_len = LCS_length_6(x, y, tmp_1, tmp_2, xlen, ylen);
	LCS_print_1(x, result, tmp_2, xlen, ylen);
	//3.3输出结果
	cout << "2.2动态规划法非递归求最长公共子序列：" << endl;
	cout << "  最大公共子序列长度为：" << max_len << " 序列为：";
	for(int j = max_len - 1; j >= 0; j--){
		cout << result[j];
	}
	cout << endl;
}

char* rand_string(char* str, int len){
	int i = 0;
	srand((unsigned)time(NULL));
	for(i = 0; i < len; i++){
		str[i] = rand()%26 + 'a';
	}
	str[i] = '\0';
	return str;
}

long getCurrentTime(){
	struct timeval tv;
	gettimeofday(&tv, NULL);
	return tv.tv_sec * 1000 + tv.tv_usec / 1000;
}

int main()
{
	
	rand_string(str_1,X);	
	sleep(1);
	rand_string(str_2,Y);
	string x(str_1);
	string y(str_2);

	cout << "string x = " << x << endl;
	cout << "  字符串长度为：" << x.length() << endl;
	cout << "string y = " << y << endl;
	cout << "  字符串长度为：" << y.length() << endl;
	
	long time_1 = getCurrentTime();
	//test_1(x, y);
	long time_2 = getCurrentTime();
	cout << "  用时：";
	cout << time_2 - time_1 << " ms;" << endl;
	
	long time_3 = getCurrentTime();
	//test_2(x, y);
	long time_4 = getCurrentTime();
	cout << "  用时：";
	cout << time_4 - time_3 << " ms;" << endl;

	long time_5 = getCurrentTime();
	test_3(x, y);
	long time_6 = getCurrentTime();
	cout << "  用时：";
	cout << time_6 - time_5 << " ms;" << endl;

	long time_7 = getCurrentTime();
	test_4(x, y);
	long time_8 = getCurrentTime();
	cout << "  用时：";
	cout << time_8 - time_7 << " ms;" << endl;

	long time_9 = getCurrentTime();
	test_5(x, y);
	long time_10 = getCurrentTime();
	cout << "  用时：";
	cout << time_10 - time_9 << " ms;" << endl;

	long time_11 = getCurrentTime();
	test_6(x, y);
	long time_12 = getCurrentTime();
	cout << "  用时：";
	cout << time_12 - time_11 << " ms;" << endl;
	
	return 1;
}