众所周知,著名的8大排序算法相信大家都看过,但我唯独对归并排序是情有独钟。因为这个算法,是一个可以轻松而愉快的进行并行排序的东西,而且归并排序是稳定的。当数量达到一定级别的时候,无论再优秀的算法,都会消耗一定的时间。所以,这个时候,就必须利用多线程去加速计算,但多线程又是一个极其难以控制的东西,因为必须将主体分为绝对互不影响的N部分就分别计算后统一,恰恰归并排序就有这个性质,并且还是均分的。归并的思想是什么呢?是分治。就是说把一个大的个体分成N部分,划分后去解决一个问题可以使得问题变得简单或得到更快的速度。
废话不多说,先看看测试,我的测试是对2600W左右的数据进行排序(跑亿级的数据2个线程估计还是得费点时间),因为我为了简单仅仅做了个例程,设计上就只开2个线程和单线程对比下而已,看看有莫得问题,实际应用这种设计肯定不行的哈。这个还是算上创建线程等额外耗时,基本上都有2倍左右的差距
#include<Windows.h>
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <process.h>
#include <csignal>
#include <map>
using namespace std;
int threadIdentity[2];
map<string, void*> _map;
void Merge(int* src,int start,int mid,int end,int* tmp)
{
int i, j, k;
for(i=start,j=mid+1,k=start;i<=mid&&j<=end;)
{
if(src[i]<src[j])
{
tmp[k++] = src[i++];
}
else
{
tmp[k++] = src[j++];
}
}
for(;i<=mid;++i)
{
tmp[k++] = src[i];
}
for(;j<=end;++j)
{
tmp[k++] = src[j];
}
for(i=start;i<=end;++i)
{
src[i] = tmp[i];
}
}
void MergeSort(int* src,int start,int end,int* tmp)
{
if(start<end)
{
int mid = (start + end) / 2 ;
MergeSort(src, start, mid,tmp);
MergeSort(src, mid + 1, end,tmp);
Merge(src, start, mid, end, tmp);
}
}
void print(int* src,int start,int end)
{
for (int i = start; i<=end; ++i)
{
cout << src[i] << ' ';
}
cout << endl<<endl;
}
struct MultiArith_
{
int identity;
int* src;
int* tmp;
int start;
int end;
};
unsigned _stdcall multiArith(void* arg)
{
MultiArith_* pma = (MultiArith_*)arg;
MergeSort(pma->src, pma->start, pma->end, pma->tmp);
threadIdentity[pma->identity] = 1;
raise(SIGINT);
return 0;
}
void handler(int sig)
{
if(threadIdentity[0]==1&&threadIdentity[1]==1)
{
int* src = (int*)_map["a"];
int* tmp = (int*)_map["tmp"];
int size = *(int*)_map["size"];
// print(src, 0, (size - 1) / 2);
// print(src, (size - 1) / 2 + 1, size - 1);
Merge(src, 0, (size - 1) / 2, size - 1, tmp);
// print(src, 0,size-1);
cout << "Finished!2 Thread Use Time(ms):" << clock() - *(clock_t*)_map["t1"] << endl;
for (int i = 0; i<size; ++i)
{
src[i] = rand();
}
clock_t t1 = clock();
MergeSort(src, 0, size - 1, tmp);
cout << "Finished!Use Time(ms):" << clock() - t1 << endl;
}
signal(SIGINT, handler);
}
int main()
{
signal(SIGINT, handler);
srand(time(0));
int size = 1024/4*1024*1024/10;
int* a = (int*)malloc(sizeof(int)*size);
int* tmp = (int*)malloc(sizeof(int)*size);
int mid = (size - 1) / 2;
for (int i = 0; i<size; ++i)
{
a[i] = rand();
}
//print(a, 0, size - 1);
_map["a"] = a;
_map["tmp"] = tmp;
_map["size"] = &size;
MultiArith_ ma[2];
ma[0].src = a;
ma[0].tmp = tmp;
ma[0].start = 0;
ma[0].end = mid;
ma[0].identity = 0;
ma[1].src = a;
ma[1].tmp = tmp;
ma[1].start = mid+1;
ma[1].end = size-1;
ma[1].identity = 1;
threadIdentity[0] = 0;
threadIdentity[1] = 0;
clock_t t1 = clock();
_map["t1"] = &t1;
_beginthreadex(0, 0, multiArith, &ma[0], 0, 0);
_beginthreadex(0, 0, multiArith, &ma[1], 0, 0);
Sleep(INFINITE);
}