奇偶排序使用CUDA加速

奇偶排序

可以证明奇偶排序是一种使序列逆序数降为0(全局最优)的迭代算法,当连续一轮奇排列和偶排列完成后,交换的元素个数都为0个,则排序已收敛到全局最优。
考虑到每次奇或偶排列需要LENGTH/2组比较,且在每次排序中,组之间没有依赖关系,于是在序列长度较大时,使用CUDA对其加速成为可能,将每一组比较对作为一个CUDA线程执行,于是奇排序有如下代码:

int curLocation = *tid % *length;
if(!(curLocation % 2) && curLocation != *length-1)
	if(array[curLocation] > array[curLocation+1])
	{
        int temp = array[curLocation];
        atomicExch(&array[curLocation], array[curLocation+1]);
        atomicExch(&array[curLocation+1], temp);
        atomicAdd(vOdd, 1);
	 }

偶排序有如下代码:

int curLocation = *tid % *length;
if(curLocation % 2 == 1 && curLocation != *length-1)
    if(array[curLocation] > array[curLocation+1])
    {
        int temp = array[curLocation];
        atomicExch(&array[curLocation], array[curLocation+1]);
        atomicExch(&array[curLocation+1], temp);
        atomicAdd(vEven, 1);
    }

每个线程执行一组奇、偶排序,考虑到偶排序需要在奇排序完成之后才能执行,因此需要使用线程同步函数,由于GRID中只设置了一个BLOCK,于是线程同步函数就可以保证所有分组比较全部完成,代码如下:

//printf("4\n");
int tId = threadIdx.x + blockIdx.x * blockDim.x;
//printf("tID: %d\n", tId);
oddSort(array, length, &tId, vOdd);
// 等待Block内所有线程完成奇排序
__syncthreads();  
evenSort(array, length, &tId, vEven);
// 等待Block内所有线程完成偶排序
__syncthreads();

于是整个CUDA并行加速奇偶排序算法如下所示:

#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <iostream>

using namespace std;

#define LENGTH 1024

__device__ void oddSort(int *array, int *length, int* tid, int *vOdd)
{
    int curLocation = *tid % *length;
    if(!(curLocation % 2) && curLocation != *length-1)
        if(array[curLocation] > array[curLocation+1])
        {
            int temp = array[curLocation];
            atomicExch(&array[curLocation], array[curLocation+1]);
            atomicExch(&array[curLocation+1], temp);
            atomicAdd(vOdd, 1);
        }
}

__device__ void evenSort(int *array, int *length, int* tid, int *vEven)
{
    int curLocation = *tid % *length;
    if(curLocation % 2 == 1 && curLocation != *length-1)
        if(array[curLocation] > array[curLocation+1])
        {
            int temp = array[curLocation];
            atomicExch(&array[curLocation], array[curLocation+1]);
            atomicExch(&array[curLocation+1], temp);
            atomicAdd(vEven, 1);
        }
}


__global__ void oddEvenSort(int *array, int *length, int *vOdd, int* vEven)
{
    //printf("4\n");
    int tId = threadIdx.x + blockIdx.x * blockDim.x;
    //printf("tID: %d\n", tId);
    oddSort(array, length, &tId, vOdd);
    // 等待Block内所有线程完成奇排序
    __syncthreads();  
    evenSort(array, length, &tId, vEven);
    // 等待Block内所有线程完成偶排序
    __syncthreads();
}


extern "C" void hostOddEvenSort(int *array, int length, int odd, int even)
{
    int *arrayDev, *lengthDev, *vOdd, *vEven;
    cudaMalloc(&arrayDev, length*sizeof(int));
    cudaMemcpy(arrayDev, array, length*sizeof(int), cudaMemcpyHostToDevice);
    cudaMalloc(&lengthDev, sizeof(int));
    cudaMemcpy(lengthDev, &length, sizeof(int), cudaMemcpyHostToDevice);
    cudaMalloc(&vOdd, sizeof(int));
    cudaMalloc(&vEven, sizeof(int));

    int *arrayHost, *vOddHost, *vEvenHost;
    arrayHost = (int*)malloc(length*sizeof(int));
    vOddHost = (int*)malloc(sizeof(int));
    vEvenHost = (int*)malloc(sizeof(int));
    do
    {
        cudaMemcpy(vOdd, &odd, sizeof(int), cudaMemcpyHostToDevice);
        cudaMemcpy(vEven, &even, sizeof(int), cudaMemcpyHostToDevice);
        //cout<< 2<< endl;
        oddEvenSort<<<1, LENGTH>>>(arrayDev, lengthDev, vOdd, vEven);
        //cout<< 3<< endl;
        cudaMemcpy(vOddHost, vOdd, sizeof(int), cudaMemcpyDeviceToHost);
        cudaMemcpy(vEvenHost, vEven, sizeof(int), cudaMemcpyDeviceToHost);
        cout<< "odd: "<< *vOddHost<< "even: "<< *vEvenHost<< endl;
    }while(*vOddHost!=0 || *vEvenHost!=0);
    cudaMemcpy(arrayHost, arrayDev, length*sizeof(int), cudaMemcpyDeviceToHost);
    for(int i=0; i<length; i++)
        cout<< arrayHost[i]<< ",";
    cout<< "."<< endl;
}


int main()
{
    cout<< "begin: "<< endl;
    int *array = new int[LENGTH];
    // cout<< -1<< endl;
    // 逆序数输入
    for(int i=0; i<LENGTH; i++)
        array[i] = LENGTH - i;
    // cout<< 0<< endl;
    // 奇偶排序后顺序输出
    hostOddEvenSort(array, LENGTH, 0, 0);
    return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值