算法上机2 —— 快速排序pivot选择 Hoare划分尾递归

最新推荐文章于 2024-01-13 00:12:14 发布

mysoulmq

最新推荐文章于 2024-01-13 00:12:14 发布

阅读量682

点赞数

分类专栏：算法上机作业文章标签：快速排序 pivot选择 Hoare 算法上机快排尾递归

本文链接：https://blog.csdn.net/mysoulmq/article/details/80317315

版权

算法上机作业专栏收录该内容

2 篇文章 0 订阅

订阅专栏

题目1

This projectrequires you to implement an optimized version of quicksort, and compare theperformances of the following combinations:

(1) Cutoff values(边界值) from 0 to 20(between 0 and 20) (That is, if n is less than or equal to the cutoff value, weadopt insertion sort instead of quicksort);

(2) Take pivot tobe the 1st element, random, median of random three, and median of random five.

The tests must bedone on the following three kinds of inputs:

(1) sorted input;

(2)reverse-ordered input;

(3) random input.

The size of inputcan be taken from 20000 to 100000 (the data are generated randomly between 1and 1000). The run times must be plotted with respect to the sizes toillustrate the difference. (figure out using excel, matlab in the Report)

分析

题目要求在快速排序的过程中，当数组分割后的长度不大于20时换用插入排序。

插入排序：

插入排序是在有序序列的基础上往其中插入新的元素，并且保证新数组仍然有序。当待排数列有序时是最好情况，此时复杂度为Θ(n)；当待排数列倒序时是最坏情况，此时复杂度为Θ(n^2)；平均情况下复杂度是O(n^2)。

快速排序：

快速排序是在数组中选取了一个主元x后，将这个数组分为[<x], x, [>x]三个部分。然后再对划分后的小数组重复刚才的动作，直到数组有序。对于书本上选择数组最后一个数为主元的快速排序，当划分的两个子问题分别包含了n-1个元素和0个元素时，最坏情况发生，其复杂度为Θ(n^2)；当每次划分都是平均划分时，最好情况发生，启其复杂度时Θ(nlgn);平均情况下其复杂度是O(nlgn)。

很显然，只要不是每次划分都出现两个子问题分别包含n-1个元素和0个元素，就可以避免复杂度为Θ(n^2)的最坏情况的发生。因此，可以通过随机取一个值、随机取奇数个值的中间值等方法来灵活的选择主元x。

算法伪代码

//插入排序
InsertSort(A, p, r)
       for j = p to r
              key= A[j]

       while i>=p and A[i]>key
              A[i+1]= A[i]
              i= i – 1
       A[i+1] = key

//快速排序
Partition(A, p, r, pivot)
       if pivot == 0
              key= p
       if pivot == 1
              key= random number between 0 to 1000
       if pivot == 2
              key= median of random three number between 0 to 1000
       if pivot == 3
              key= median of random five number between 0 to 1000

       x = A[key]
       i = p - 1
       for j = p to r
              if A[j] <= x
                     i = i + 1
                     exchange(A[i] with A[j])
                     if j == key
                            key= i                          
       exchangeA([key] with A[i])
       return i

Quicksort(A, p, r, pivot)
       if p < r
              q = Partition(A, p, r, pivot)
              if q-p > 20
                     Quicksort(A, p, q-1, pivot)
              else
                     Insertsort(A,p, q-1)
              if r-q > 20
                     Quicksort(A,q+1, r, pivot)

       else
              Insertsort(A, q+1, r)

总结

输入数据：长度为20000到100000、步长为2000、元素为0到1000的40个随机数组，以及它们排序后的有序数组和倒序数组。这样重复三次并记录数据，取各个长度的数组在各算法下的时间平均值。

用MATLAB处理的以数组长度为自变量，排序时间为应变量的拟合图像如下：

1. 随机数组

从数据中可以看出，对于随机数组来说，四种快排的效率差不多。但是结合图像来看，1st的快排相比于其他三种取随机数的快排在效率上有略微的优势，我认为是取随机值以及取中的过程消耗了一定的时间。

2. 有序数组

3. 倒序数组

去除1st曲线后：

在输入有序数组和倒序数组的情况下，使用1st的快排花费的时间远大于其他三种快排，因为出现了划分后两个子问题分别包含n-1个元素和0个元素的最坏情况。而在三种随机取值的快排中，三数取中的效率最高，我认为这是因为random快排中仍然会存在取头取尾的情况，而五数取中快排在随机取数和排序过程中会花费更多的时间。

综上所述，当排序数组是随机的时，采用1st的快排算法效率较高；当排序数组趋近于有序时，采用三数取中的快排算法效率较高。

算法源码

#include <iostream>
#include<cstring>
#include <fstream>
#include <sstream>
#include <string>
#include <time.h>
#include<stdlib.h>
using namespace std;

void swap(int &a, int &b)
{
	int t;
	t = a;
	a = b;
	b = t;
	
	return;
}

string num2str(double i)
{
        stringstream ss;
        ss<<i;
        return ss.str();
}

template <class T>
int getArrayLen(T& array)
{//该函数将返回数组array的长度
return (sizeof(array) / sizeof(array[0]));
}

//插入排序 
void Insertsort(int *A, int p, int r)
{
  	int key, i, j;
  	
    for (j=p; j<=r; j++)
	{
		key = A[j];
		i = j - 1;
		
		while (i>=p && A[i]>key)
		{
			A[i+1] = A[i];
			i--;
		}
		A[i+1] = key;
	}        
}

int Median3(int *A, int *index)
{
	if (A[index[0]] > A[index[1]])
		swap(index[0], index[1]);
	if (A[index[0]] > A[index[2]])
		swap(index[0], index[2]);
	if (A[index[1]] > A[index[2]])
		swap(index[1], index[2]);
	
	return index[1];
}

int Median5(int *A, int *index)
{
	for (int i=0; i<5; i++)
		for(int j=i+1; j<5; j++)
			if (A[index[i]]>A[index[j]])
				swap(index[i], index[j]);
	
	return index[2];
}

int Partition(int *A, int p, int r, int pivot)
{
	int key, i, j, x;
	//the 1st element
	if (pivot == 0)
		key = p;
	//random 
	if (pivot == 1)
	{
		srand((int)time(0));
		key = (rand()%(r-p+1)) + p;
	}
	//median of random three
	if (pivot == 2)
	{
		int index[3];
		
		srand((int)time(0));
		for(i=0; i<3; i++)
		{
			index[i] = (rand()%(r-p+1)) + p;
		}
		key = Median3(A, index);
	}
	//median of random five
	if (pivot == 3)
	{
		int index[5];
		
		srand((int)time(0));
		for(i=0; i<5; i++)
		{
			index[i] = (rand()%(r-p+1)) + p;
		}
		key = Median5(A, index);
	}
	x = A[key];
	i = p - 1;
	for(j=p; j<=r; j++)
	{
		if (A[j] <= x)
		{
			i++;
			swap(A[i], A[j]);
			if (j == key)
				key = i;
		}
	}
	swap(A[key], A[i]); 
	return i;
}

void Quicksort(int *A, int p, int r, int pivot)
{
	if (p < r)
	{
		int q = Partition(A, p, r, pivot);
		if (q-p > 20) 
			Quicksort(A, p, q-1, pivot);
		else
			Insertsort(A, p, q-1);
		if (r-q > 20)	
			Quicksort(A, q+1, r, pivot);
		else
			Insertsort(A, q+1, r);
	}
	return;
}

//生成随机数组 
int * CreatArray(int n)//n为数组长度 
{
	int *p = new int[n];
	
	srand((int)time(0));
	for(int i=0; i<n; i++)
	{
		p[i] = rand()%1001;
		//cout<<p[i]<<" ";
	}
	cout<<"\n\n\n";
	
	return p;
}

void CopyArray(int *A, int *B, int length)
{
	
	int n = length;
	for(int i=0; i<n; ++i)
	{
		B[i] = A[i];
	}
}

void test(int *A, int length, ofstream &myfile)
{
	time_t begin, end;
	//myfile<<length<<"	";
	
	/*for(int i=0; i<length; ++i)
	{
		cout<<A[i]<<" ";
	}
	cout<<"\n";*/ 
	
	int *B = new int[length];
	
	
	CopyArray(A, B, length);
	begin = clock();
	Quicksort(B, 0, length-1, 0);
	end = clock();
	cout<<end-begin<<"\n";
	myfile<<end-begin<<"	";
	
	CopyArray(A, B, length);
	begin = clock();
	Quicksort(B, 0, length-1, 1);
	end = clock();
	cout<<end-begin<<"\n";
	myfile<<end-begin<<"	";
	
	CopyArray(A, B, length);
	begin = clock();
	Quicksort(B, 0, length-1, 2);
	end = clock();
	cout<<end-begin<<"\n";
	myfile<<end-begin<<"	";
	
	CopyArray(A, B, length);
	begin = clock();
	Quicksort(B, 0, length-1, 3);
	end = clock();
	cout<<end-begin<<"\n";
	myfile<<end-begin<<"\n";	
}

void reverse(int *A, int length)
{
	int *B = new int[length];
	CopyArray(A, B, length);
	
	for(int i=0; i<length;i++)
		A[i] = B[length-1-i];
}

int main()
{
	int length;
	ofstream myfile;
	string filename = "data"; 
	
	for(int i=0; i<3; i++)
	{
		filename = "data";
		filename = filename + num2str(i+1) + ".txt";
		myfile.open(filename.c_str(), ios::out);
		
		for (length=20000; length<=100000; length+=2000)
		{
			//myfile<<"---------------------------"<<length<<"--------------------------\n";
			
			int *A =  CreatArray(length);
				
			//myfile<<"random:";
			test(A, length, myfile);
				
			Quicksort(A, 0, length-1, 1) ;
			//myfile<<"sorted:";
			test(A, length, myfile);
				
			reverse(A, length);
			//myfile<<"reverse:";
			test(A, length, myfile);
		}
		
		myfile.close();
	}
	
	
	
	/*for(int i=0; i<length; ++i)
	{
		cout<<A[i]<<" ";
	}
	cout<<"\n";*/ 
	
	return 0;
}

题目2

Implement Hoare’s algorithm and compare it with ouralgorithm in the textbook.

(体会有重复数据情况下，算法之间的优劣)。

The input is alsotaken form 20000 and 100000 (the data are generated randomly between 1 and1000), and the tests should be done on the random input. The run times must beplotted with respect to the sizes to illustrate the difference. (figure outusing excel, matlab in the Report)

分析

普通快排：

选取最后一个元素A[r]为主元后，进行划分，结果以q为划分分为[p, q - 1]和[q+ 1, r]。

Hoare划分快排：

选取第一个元素A[p]为主元，进行划分，结果为A[p..j]和A[j+1..r]两部分，主元可能放入某一个之中。

算法伪代码

//Hoare
Hoare_Partition(A, p, r)
	x = A[p]
	i = p - 1
	j = r + 1
	
	while True
		repeat
			j = j - 1
		until A[j]<=x 
		
		repeat
			i = i + 1
			until A[i]>=x
		
		if i < j
			exchange A[i] with A[j])
		else
			return j

Hoare_Quicksort(A, p, r)
	if p < r
		q = Hoare_Partition(A, p, r)
		Hoare_Quicksort(A, p, q)
		Hoare_Quicksort(A, q+1, r)

//Origin
Partition(A, p, r)
	x = A[r]
	i = p - 1
	
	for j=p to r-1
		if A[j]<=x
			i = i + 1
			exchange A[i] with A[j])
			
	exchange A[i+1] with A[r])
	return i + 1

Quicksort(A, p, r)
	if p < r
		q = Partition(A, p, r);
		Quicksort(A, p, q-1);
		Quicksort(A, q+1, r);

总结

用MATLAB处理的以数组长度为自变量，排序时间为应变量的图像如下：

1. 折线图

2. 拟合图

从以上两个图像可以看出，随着数组长度的增加，重复数据增多，Hoare划分快排比较于普通快排在效率上有了更大的优势。

在Hoare划分的过程中，与主元相等的元素既可以在A[p..j]和A[j+1..r]的任意部分。所以，当数组中有大量重复时，就可以减少与主元相同的数的移动，从而提升了效率。

算法源码

#include <iostream>
#include<cstring>
#include <fstream>
#include <sstream>
#include <string>
#include <time.h>
#include <stdlib.h>
using namespace std;

void swap(int &a, int &b)
{
	int t;
	t = a;
	a = b;
	b = t;
	
	return;
}

string num2str(double i)
{
        stringstream ss;
        ss<<i;
        return ss.str();
}

void CopyArray(int *A, int *B, int length)
{
	
	int n = length;
	for(int i=0; i<n; ++i)
	{
		B[i] = A[i];
	}
}

int Hoare_Partition(int *A, int p, int r)
{
	int x = A[p];
	int i = p - 1;
	int j = r + 1;
	
	while(1)
	{
		while(1)
		{
			j--;
			if (A[j]<=x)	break;
		}
		while(1)
		{
			i++;
			if (A[i]>=x)	break;
		}
		
		if (i < j)
			swap(A[i], A[j]);
		else
			return j;
	}
	
}

void Hoare_Quicksort(int *A, int p, int r)
{
	if (p < r)
	{
		int q = Hoare_Partition(A, p, r);
		Hoare_Quicksort(A, p, q);
		Hoare_Quicksort(A, q+1, r);
	}
	return;
}

int Text_Partition(int *A, int p, int r)
{
	int x = A[r];
	int i = p - 1;
	for(int j=p; j<r; j++)
	{
		if(A[j]<=x)
		{
			i++;
			swap(A[i], A[j]);
		}
	}
	swap(A[i+1], A[r]);
	return i + 1;
}

void Text_Quicksort(int *A, int p, int r)
{
	if (p < r)
	{
		int q = Text_Partition(A, p, r);
		Text_Quicksort(A, p, q-1);
		Text_Quicksort(A, q+1, r);
	}
	return;
}

//生成随机数组 
int * CreatArray(int n)//n为数组长度 
{
	int *p = new int[n];
	
	srand((int)time(0));
	for(int i=0; i<n; i++)
	{
		p[i] = rand()%1001;
		//cout<<p[i]<<" ";
	}
	cout<<"\n\n\n";
	
	return p;
}

void test(int *A, int length, ofstream &myfile)
{
	time_t begin, end;
	int *B = new int[length];
	
	//myfile<<length<<"	";
	
	CopyArray(A, B, length);
	begin = clock();
	Hoare_Quicksort(B, 0, length-1);
	end = clock();
	cout<<end-begin<<"\n";
	myfile<<end-begin<<"	";
	
	CopyArray(A, B, length);
	begin = clock();
	Text_Quicksort(B, 0, length-1);
	end = clock();
	cout<<end-begin<<"\n";
	myfile<<end-begin<<"\n";
}

int main()
{
	int length;
	ofstream myfile;
	string filename;
	
	for(int i=0; i<3; i++)
	{
		filename = "data";
		filename = filename + num2str(i+1) + ".txt";
		myfile.open(filename.c_str(), ios::out);
		
		for(length=20000; length<=100000; length+=2000)
		{
			int *A =  CreatArray(length);
			test(A, length, myfile);
		}
		
		myfile.close();
	} 
	
	return 0;
}

题目3

Implementquicksort algorithm using tail recursion and compare it with the original quicksortalgorithm.

分析

采用了循环控制结构，用循环代替了普通快排的第二次递归。

算法伪代码

Partition(A, p, r)
	x = A[r]
	i = p - 1
	
	for j=p to r-1
		if A[j]<=x
			i = i + 1
			exchange A[i] with A[j])
			
	exchange A[i+1] with A[r])
	return i + 1

Taile_Recursive_Quicksort(A, p, r)
	while p < r
		q = Partition(A, p, r)
		Taile_Recursive_Quicksort(A, p, q - 1)
		p = q + 1

总结

用MATLAB处理的以数组长度为自变量，排序时间为应变量的图像如下：

1. 折线图

2. 拟合图

关闭了编译器的自动优化以后，尾递归快排相比于普通快排只有非常微小的优势（不排除实际上没有关闭编译优化的可能性）。在尾递归快排中，第二次递归并没有在实际上被省去，只是以循环控制结构的形式进行，整个排序过程中的递归深度也没有发生变化。所以我猜测，尾递归的实际意义可能不在时间效率上，而是体现在空间效率中。

算法源码

#include <iostream>
#include<cstring>
#include <fstream>
#include <fstream>
#include <sstream>
#include <time.h>
#include<stdlib.h>
using namespace std;

void swap(int &a, int &b)
{
	int t;
	t = a;
	a = b;
	b = t;

	return;
}

string num2str(double i)
{
	stringstream ss;
	ss << i;
	return ss.str();
}



int Partition(int *A, int p, int r)
{
	int x = A[r];
	int i = p - 1;
	for (int j = p; j<r; j++)
	{
		if (A[j] <= x)
		{
			i++;
			swap(A[i], A[j]);
		}
	}
	swap(A[i + 1], A[r]);
	return i + 1;
}

void Quicksort(int *A, int p, int r)
{
	if (p < r)
	{
		int q = Partition(A, p, r);
		Quicksort(A, p, q - 1);
		Quicksort(A, q + 1, r);
	}
	return;
}

void Taile_Recursive_Quicksort(int *A, int p, int r)
{
	while (p < r)
	{
		int q = Partition(A, p, r);
		Taile_Recursive_Quicksort(A, p, q - 1);
		p = q + 1;
	}
}

void CopyArray(int *A, int *B, int length)
{

	int n = length;
	for (int i = 0; i<n; ++i)
	{
		B[i] = A[i];
	}
}

//生成随机数组 
int * CreatArray(int n)//n为数组长度 
{
	int *p = new int[n];

	srand((int)time(0));
	for (int i = 0; i<n; i++)
	{
		p[i] = rand() % 1001;
		//cout<<p[i]<<" ";
	}
	cout << "\n\n\n";

	return p;
}

void test(int *A, int length, ofstream &myfile)
{
	time_t begin, end;
	int *B = new int[length];

	//myfile << length << "	";

	CopyArray(A, B, length);
	begin = clock();
	Taile_Recursive_Quicksort(B, 0, length - 1);
	end = clock();
	cout << end - begin << "\n";
	myfile << end - begin << "	";

	CopyArray(A, B, length);
	begin = clock();
	Quicksort(B, 0, length - 1);
	end = clock();
	cout << end - begin << "\n";
	myfile << end - begin << "\n";
}

int main()
{
	int length;
	ofstream myfile;
	string filename;

	for (int i=0; i < 3; i++)
	{
		filename = "data";
		filename = filename + num2str(i + 1) + ".txt";
		myfile.open(filename.c_str(), ios::out);

		for (length = 700000; length <= 800000; length += 2000)
		{
			int *A = CreatArray(length);
			test(A, length, myfile);
		}

		myfile.close();
	}

	return 0;
}