用多线程加速快排(方式一)

目的:

在双核处理器上用多线程将快排运行速度加倍。

硬件条件:

电脑必须是双核及以上,这样两个线程才能真正分布到两个处理器上运行。

开发环境:

VC6.0


经典的快排如下(我做了一个非常小的修改,见程序中的注释):

#include<iostream>
#include<time.h>
#include<windows.h>
#include<process.h>
using namespace std;
int a[1000]={
1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
};

void quicksort(int p,int r);

int partition(int p,int r)
{
int x=a[r];
int t;
int i=p-1;
for(int j=p;j<=r-1;j++)
if(a[j]<=x)
{
i++;
t=a[i];
a[i]=a[j];
a[j]=t;
}
for(int i1=0;i1<1000;i1++)//这是和经典快排唯一的不同:两重循环只是让程序运行时间长一些,方便测出运行时间。在下去的多线程版本的快排中也是这样做的。
for(int j1=0;j1<1000;j1++)
{
}
t=a[i+1];
a[i+1]=a[r];
a[r]=t;
return i+1;
}

void quicksort(int p,int r)
{
if(p<r) 
{
int q = partition(p,r);
quicksort(p,q-1);
quicksort(q+1,r);
}
}

int main()
{
HANDLE hThread2;
unsigned int threadID1;
clock_t start, end,t1;
int j=0,k=0;
double time;
start=clock();
quicksort(0,999);
end=clock();
time=(double)(end-start);
cout<<"  time is : "<<time<<endl;
// for(int r=0;r<100;r++)
// cout<<a[r]<<endl;
return 0;
}
好了,上面是经典版的快排,我要开始改了。
我的改动有两处:

第一,增加了一个
unsigned __stdcall  quicksort_thread(void *),它和经典版本的void quicksort(int p,int r)完全一样,之所以 要有这个函数,只是我用来作为创建的子线程的入口函数,而子线程也只是第一次把它作为入口函数会进入一次,以后会进入正常的void quicksort(int p,int r)。

第二,main函数也要改动一点。在main函数中先用partition(int p,int r)将原数据分成两部分,前一部分给子线程处理,后一部分给主线程处理(经典版是整个全部给主线程处理(虽然经典版程序员没有创建新线程,但进程本身的执行也是个线程,为了和程序员手动创建的线程区分区分,我把它叫做主线程。总之,经典版快排只有一个主线程,多线程版的有个主线程,还有个子线程))。注意,如果主线程中这次的partition(int p,int r)操作无法分为平均,也就是在中间左右,那么采用多线程的效率会大幅下降。比如说,如果分的结果是前一部分只有1个元素,剩下的全部作为第二部分给主线程处理,那么前一部分很快被子线程执行完,而子线程执行完是不会帮助主线程去一起处理第二部分的数据的,这样效率几乎没提高。

代码如下:

#include<iostream>
#include<time.h>
#include<windows.h>
#include<process.h>
using namespace std;

int first_thread_p,first_thread_r;
int thread_end=0;//子线程执行完后,将这个置为1,主线程就知道子线程也执行完了,这个变量是用来同步的。
int a[1000]={
1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,20
};

void quicksort(int p,int r);

int partition(int p,int r)
{
int x=a[r];
int t;
int i=p-1;
for(int j=p;j<=r-1;j++)
if(a[j]<=x)
{
i++;
t=a[i];
a[i]=a[j];
a[j]=t;
}
for(int i1=0;i1<1000;i1++)
for(int j1=0;j1<1000;j1++)
{
}
t=a[i+1];
a[i+1]=a[r];
a[r]=t;
return i+1;
}


unsigned __stdcall  quicksort_thread(void *)
{
int p=first_thread_p;
int r=first_thread_r;
if(p<r) 
{
int q = partition(p,r);
quicksort(p,q-1);
quicksort(q+1,r);
}
thread_end=1;
return 0;
}


void quicksort(int p,int r)
{
if(p<r) 
{
int q = partition(p,r);
quicksort(p,q-1);
quicksort(q+1,r);
}
}


int main()
{
HANDLE hThread2;
unsigned int threadID1;
clock_t start, end,t;
int j=0,k=0;
double total_time;
start=clock();

t=partition(0,999);
first_thread_p=0;first_thread_r=t-1;
hThread2 = (HANDLE)_beginthreadex(NULL, 0, &quicksort_thread, NULL, 0, &threadID1);
quicksort(t+1,999);
while(thread_end==0)
{
}
end=clock();
total_time=(double)(end-start);
cout<<"total  time is : "<<total_time<<endl;
return 0;
}


运行结果:

经典快排的运行时间是(运行了3次,得到了3组数据(事实上,还得到了更多的数据,大部分都是这个数据,我去掉了个别和大部分数据差异特别大的)):

3992
3993
3971

而经过简单改造成的多线程版本的快排的运行时间是( 运行了3次,3组数据):

1883 

2005

1883

通过比较可以看出,节省了近一半的时间。如果在运行时查看资源管理器,也可以看到,经典版的程序CPU利用率

只有50%左右,说明只有一个处理器在工作,另一个处理器在睡大觉。运行多线程版(在这里其实是2个线程)时CPU利用率是100%,说明

此时两个处理器都在运行。——结语:多线程充分利用了多处理器的硬件条件,加速了快排的执行。

这是快排多线程版本一,它的局限在于在主线程中第一次用partition(int p,int r)时,必须将原数据分别为大小尽可能相等的两部分,原因上面解释过了。

我还有个快排的多线程的版本二,那是另一种思路,可以避开这个问题。

版本二的思路会比较复杂,但解决问题的思路更有意思一些,见我的另一篇博客《用多线程加速快排(方式二)





  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值