各种排序的实现与分析
我实现了7种排序算法:冒泡、选择、插入、希尔、归并、快排、堆排序。
我在linux平台下进行的测试,调用了linux中的时间函数,精确到微秒。如果想要在windows在编译,只需要将计算时间的模块更改为windows下的API即可。
数据采用的rand函数自动生成,根据数据集的大小和排序算法的个数进行生成。保证每个排序使用的数据集一致,避免偶然性。
以下是实现代码:
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <ctime>
#include <sys/time.h>
using namespace std;
const long long MAX = 1e8;
long long **data;
const int sortNum = 4;
char buf[128];
// create data array and initialize it
int initData(int num)
{
long long tem = 0;
srand((unsigned)time(NULL));
data = new long long *[num];
for(int i = 0; i < num; i++)
{
data[i] = new long long [MAX];
}
for(int i = 0; i < MAX; i++)
{
tem = rand() % MAX;
for(int j = 0; j < num; j++)
{
data[j][i] = tem;
}
}
return 0;
}
// release data array
int freeData(int num)
{
for(int i = 0; i < num; i++)
{
delete[] data[i];
}
delete[] data;
return 0;
}
void display(int num)
{
for(int i = 0; i < MAX; i++)
{
cout << "id : " << i << "\t\t";
for(int j = 0; j < num; j++)
{
cout << data[j][i] << "\t";
}
cout << endl;
}
}
void showSpendTime(struct timeval &oldTime, char *info)
{
struct timeval now;
gettimeofday(&now, NULL);
if(now.tv_usec < oldTime.tv_usec)
{
now.tv_sec--;
now.tv_usec += 1000000;
}
printf("%s : \n%10ld seconds\t%4ld milliseconds\t%4ld microseconds\n\n",
info, now.tv_sec - oldTime.tv_sec,
(now.tv_usec - oldTime.tv_usec) / 1000,
(now.tv_usec - oldTime.tv_usec) % 1000);
}
// bubbling sort
int sortBubbling(int id)
{
long long *bData = data[id], *eData;
long long tem;
for(int i = 0; i < MAX - 1; i++)
{
bData = data[id];
eData = data[id] + MAX - i - 1;
for(; bData < eData; bData++)
{
if(*bData > *(bData + 1))
{
tem = *bData;
*bData = *(bData + 1);
*(bData + 1) = tem;
}
}
}
return 0;
}
// selection sort
int sortSelection(int id)
{
long long *bData = data[id],
*eData = data[id] + MAX,
*head;
long long tem, *min;
for(int i = 0; i < MAX - 1; i++)
{
head = bData = data[id] + i;
min = bData;
for(bData++; bData < eData; bData++)
{
if(*min > *bData)
{
min = bData;
}
}
tem = *head;
*head = *min;
*min = tem;
}
return 0;
}
// insertion sort
int sortInsertion(int id)
{
long long *bData = data[id];
long long tem;
int j;
for(int i = 1; i < MAX; i++)
{
tem = bData[i];
for(j = i - 1; j >= 0; j--)
{
if(tem < bData[j])
bData[j + 1] = bData[j];
else
break;
}
bData[j + 1] = tem;
}
return 0;
}
// shell sort
int sortShell(int id)
{
long long *bData = data[id];
long long tem, step = MAX >> 1;
int i, j;
for(; step > 0; step = step >> 1)
{
for(i = step; i < MAX; i++)
{
j = i - step;
tem = bData[i];
for(j = i - step; j >= 0; j -= step)
{
if(tem < bData[j])
bData[j + step] = bData[j];
else
break;
}
bData[j + step] = tem;
}
}
return 0;
}
// merge sort
int sortMerge(int id)
{
long long *bData = data[id];
long long *tem = new long long[MAX], step = 1, *now;
int i, j, begin, end = 0, mid;
while(step < MAX)
{
now = tem;
for(begin = 0, mid = begin + step, end = step << 1;
begin < MAX;
begin = end, mid = begin + step, end += step << 1)
{
if(end > MAX)
{
end = MAX;
if(mid > MAX)
mid = MAX;
}
i = begin;
j = mid;
while(i < mid && j < end)
{
if(bData[i] > bData[j])
*now++ = bData[j++];
else
*now++ = bData[i++];
}
while(i < mid)*now++ = bData[i++];
while(j < end)*now++ = bData[j++];
}
for(i = 0; i < MAX; i++)
{
bData[i] = tem[i];
}
step = step << 1;
}
delete[] tem;
return 0;
}
// quick sort
int sortQuick(int id, int begin, int end)
{
if(begin >= end)return 0;
long long *bData = data[id], tem, head = bData[begin];
int i = begin, j = end;
while(i < j)
{
while(i < j && bData[j] > head)j--;
bData[i] = bData[j];
while(i < j && bData[i] <= head)i++;
bData[j] = bData[i];
}
bData[i] = head;
sortQuick(id, begin, i - 1);
sortQuick(id, i + 1, end);
return 0;
}
void dispHeap(int id)
{
long long *bData = data[id];
for(int i = 1, block = 1, count = 0; count < MAX; count++)
{
if(block == count)
{
cout << endl;
i = i << 1;
block = block + i;
}
cout << bData[count] << "\t";
}
cout << endl;
}
// adjust max head
void adjustMaxHead(int id, int now, int end)
{
long long *bData = data[id], tem;
int left, right, max;
while(1)
{
left = (now << 1) + 1;
right = (now << 1) + 2;
if(right > end)
{
if(left > end)break;
else
{
max = left;
}
}
else
max = bData[left] > bData[right] ? left : right;
if(bData[max] > bData[now])
{
tem = bData[max];
bData[max] = bData[now];
bData[now] = tem;
now = max;
}
else
break;
}
}
// heap sort
int sortHeap(int id)
{
long long *bData = data[id], tem, parent;
for(int i = MAX - 1; i >= 0; i--)
{
adjustMaxHead(id, i, MAX - 1);
}
for(int end = MAX - 1; end > 0; end--)
{
adjustMaxHead(id, 0, end);
tem = bData[0];
bData[0] = bData[end];
bData[end] = tem;
}
return 0;
}
int main(int argc, char **argv)
{
struct timeval oldTime;
int id = 0;
// initialze data array
gettimeofday(&oldTime, NULL);
initData(sortNum);
sprintf(buf, "initData %lld", MAX);
showSpendTime(oldTime, buf);
/*
// 0 bubbling sort
gettimeofday(&oldTime, NULL);
sortBubbling(id++);
sprintf(buf, "%d bubblong sort %lld", id, MAX);
showSpendTime(oldTime, buf);
// 1 selection sort
gettimeofday(&oldTime, NULL);
sortSelection(id++);
sprintf(buf, "%d selection sort %lld", id, MAX);
showSpendTime(oldTime, buf);
// 2 insertion sort
gettimeofday(&oldTime, NULL);
sortInsertion(id++);
sprintf(buf, "%d insertion sort %lld", id, MAX);
showSpendTime(oldTime, buf);
*/
// 3 shell sort
gettimeofday(&oldTime, NULL);
sortShell(id++);
sprintf(buf, "%d shell sort %lld", id, MAX);
showSpendTime(oldTime, buf);
// 4 merge sort
gettimeofday(&oldTime, NULL);
sortMerge(id++);
sprintf(buf, "%d merge sort %lld", id, MAX);
showSpendTime(oldTime, buf);
// 5 quick sort
gettimeofday(&oldTime, NULL);
sortQuick(id++, 0, MAX - 1);
sprintf(buf, "%d quick sort %lld", id, MAX);
showSpendTime(oldTime, buf);
// 6 heap sort
gettimeofday(&oldTime, NULL);
sortHeap(id++);
sprintf(buf, "%d heap sort %lld", id, MAX);
showSpendTime(oldTime, buf);
// display(sortNum);
freeData(sortNum);
return 0;
}
下面进行算法测试,不同机器的耗时不一样。同样的思想不同的代码描述也会导致效率不同。
数据集为:1e5个long long型
运行效果如下:
数据集为:1e8个long long型
运行效果如下:
总结发现,快排是最快的,归并排序排第二,还有基数排序、计数排序、桶排序我没有实现。
因为代码具体实现不一样,所以我测试的效率只能说明我的代码的效率。
我还使用了另外两种方式实现堆排序,效果更差,没有体现出堆的特点,所以效率不能提升。最后这个堆排序算法实现了堆的特点,但是效率还是不够理想。关于各个排序算法的具体思想可以查看别的博客,我这里只对以上算法进行实现与分析。
如果是内排序,使用快速排序比较好用。代码实现简单,效率很高。
如果是外排序,使用归并排序比较好用。它可以对文件进行读块,对文件指针偏移,一次读取可以存放的信息量。完成对每两个块的合并,块大小在排序完一次后乘2,两个文件反转进行下一次归并排序,直至整个文件为一块。边合并边向另一个文件写入。放回原文件。
如果要求稳定性,也最好使用归并排序。希尔、快排、堆排序都不稳定。