奇偶排序使用CUDA加速

最新推荐文章于 2022-11-03 18:12:00 发布

我长得很帅

最新推荐文章于 2022-11-03 18:12:00 发布

阅读量540

点赞数 2

分类专栏： CUDA 文章标签： cuda 排序算法

本文链接：https://blog.csdn.net/weixin_42773994/article/details/108392206

版权

CUDA 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

奇偶排序

可以证明奇偶排序是一种使序列逆序数降为0(全局最优)的迭代算法，当连续一轮奇排列和偶排列完成后，交换的元素个数都为0个，则排序已收敛到全局最优。
考虑到每次奇或偶排列需要LENGTH/2组比较，且在每次排序中，组之间没有依赖关系，于是在序列长度较大时，使用CUDA对其加速成为可能，将每一组比较对作为一个CUDA线程执行，于是奇排序有如下代码:

int curLocation = *tid % *length;
if(!(curLocation % 2) && curLocation != *length-1)
	if(array[curLocation] > array[curLocation+1])
	{
        int temp = array[curLocation];
        atomicExch(&array[curLocation], array[curLocation+1]);
        atomicExch(&array[curLocation+1], temp);
        atomicAdd(vOdd, 1);
	 }

偶排序有如下代码:

int curLocation = *tid % *length;
if(curLocation % 2 == 1 && curLocation != *length-1)
    if(array[curLocation] > array[curLocation+1])
    {
        int temp = array[curLocation];
        atomicExch(&array[curLocation], array[curLocation+1]);
        atomicExch(&array[curLocation+1], temp);
        atomicAdd(vEven, 1);
    }

每个线程执行一组奇、偶排序，考虑到偶排序需要在奇排序完成之后才能执行，因此需要使用线程同步函数，由于GRID中只设置了一个BLOCK，于是线程同步函数就可以保证所有分组比较全部完成，代码如下:

//printf("4\n");
int tId = threadIdx.x + blockIdx.x * blockDim.x;
//printf("tID: %d\n", tId);
oddSort(array, length, &tId, vOdd);
// 等待Block内所有线程完成奇排序
__syncthreads();  
evenSort(array, length, &tId, vEven);
// 等待Block内所有线程完成偶排序
__syncthreads();

于是整个CUDA并行加速奇偶排序算法如下所示:

#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <iostream>

using namespace std;

#define LENGTH 1024

__device__ void oddSort(int *array, int *length, int* tid, int *vOdd)
{
    int curLocation = *tid % *length;
    if(!(curLocation % 2) && curLocation != *length-1)
        if(array[curLocation] > array[curLocation+1])
        {
            int temp = array[curLocation];
            atomicExch(&array[curLocation], array[curLocation+1]);
            atomicExch(&array[curLocation+1], temp);
            atomicAdd(vOdd, 1);
        }
}

__device__ void evenSort(int *array, int *length, int* tid, int *vEven)
{
    int curLocation = *tid % *length;
    if(curLocation % 2 == 1 && curLocation != *length-1)
        if(array[curLocation] > array[curLocation+1])
        {
            int temp = array[curLocation];
            atomicExch(&array[curLocation], array[curLocation+1]);
            atomicExch(&array[curLocation+1], temp);
            atomicAdd(vEven, 1);
        }
}


__global__ void oddEvenSort(int *array, int *length, int *vOdd, int* vEven)
{
    //printf("4\n");
    int tId = threadIdx.x + blockIdx.x * blockDim.x;
    //printf("tID: %d\n", tId);
    oddSort(array, length, &tId, vOdd);
    // 等待Block内所有线程完成奇排序
    __syncthreads();  
    evenSort(array, length, &tId, vEven);
    // 等待Block内所有线程完成偶排序
    __syncthreads();
}


extern "C" void hostOddEvenSort(int *array, int length, int odd, int even)
{
    int *arrayDev, *lengthDev, *vOdd, *vEven;
    cudaMalloc(&arrayDev, length*sizeof(int));
    cudaMemcpy(arrayDev, array, length*sizeof(int), cudaMemcpyHostToDevice);
    cudaMalloc(&lengthDev, sizeof(int));
    cudaMemcpy(lengthDev, &length, sizeof(int), cudaMemcpyHostToDevice);
    cudaMalloc(&vOdd, sizeof(int));
    cudaMalloc(&vEven, sizeof(int));

    int *arrayHost, *vOddHost, *vEvenHost;
    arrayHost = (int*)malloc(length*sizeof(int));
    vOddHost = (int*)malloc(sizeof(int));
    vEvenHost = (int*)malloc(sizeof(int));
    do
    {
        cudaMemcpy(vOdd, &odd, sizeof(int), cudaMemcpyHostToDevice);
        cudaMemcpy(vEven, &even, sizeof(int), cudaMemcpyHostToDevice);
        //cout<< 2<< endl;
        oddEvenSort<<<1, LENGTH>>>(arrayDev, lengthDev, vOdd, vEven);
        //cout<< 3<< endl;
        cudaMemcpy(vOddHost, vOdd, sizeof(int), cudaMemcpyDeviceToHost);
        cudaMemcpy(vEvenHost, vEven, sizeof(int), cudaMemcpyDeviceToHost);
        cout<< "odd: "<< *vOddHost<< "even: "<< *vEvenHost<< endl;
    }while(*vOddHost!=0 || *vEvenHost!=0);
    cudaMemcpy(arrayHost, arrayDev, length*sizeof(int), cudaMemcpyDeviceToHost);
    for(int i=0; i<length; i++)
        cout<< arrayHost[i]<< ",";
    cout<< "."<< endl;
}


int main()
{
    cout<< "begin: "<< endl;
    int *array = new int[LENGTH];
    // cout<< -1<< endl;
    // 逆序数输入
    for(int i=0; i<LENGTH; i++)
        array[i] = LENGTH - i;
    // cout<< 0<< endl;
    // 奇偶排序后顺序输出
    hostOddEvenSort(array, LENGTH, 0, 0);
    return 0;
}