奇偶冒泡排序的CUDA实现

最新推荐文章于 2022-04-13 15:34:33 发布

maowenge

最新推荐文章于 2022-04-13 15:34:33 发布

阅读量1.2k

点赞数

分类专栏：算法

算法专栏收录该内容

53 篇文章 0 订阅

订阅专栏

由于奇偶冒泡排序分奇下标和偶下标排序，这就确保了在一轮排序【奇/偶】过程中，隔一个处理一个，同一轮的排序中被处理的各元素是互不影响的，所以可以并行处理。。。事实上，奇偶冒泡排序在串行CPU上本没一点优势,他揍是为并行而生.

[cpp]view plaincopy 
   
 #include <cuda.h>  
 #include <time.h>  
 #include <stdio.h>  
 #include <stdlib.h>  
 #include <math.h>  
 #include <pz_cpy>  
 #include "cuPrintf.cu"  
 bool InitGPUSet()  
 {  
     cudaDeviceProp tCard;  
     int num = 0;  
     if(cudaSuccess == cudaGetDeviceCount(&num))  
     {  
         for(int i = 0; i < num; ++ i)  
         {  
             cudaSetDevice(i);  
             cudaGetDeviceProperties(&tCard, i);  
             printf("GPU: %s/n", tCard.name);  
         }  
     }  
     else  return false;  
     return true;  
 }  
 bool InitCuPrint()  
 {  
     cudaError_t err = cudaPrintfInit();  
     return 0 == strcmp("no error", cudaGetErrorString(err));  
 }  
 __global__ void even_sort(int* ary, int size)  
 {  
     int tid = threadIdx.x;//线程从0开始编号  
     if(1 == (tid + 1 ) % 2)//第奇数个轮回  
     {  
         cuPrintf("before swap %d/n", tid);  
         for(int i = 0; i < size; ++ i)  cuPrintf("%d ", ary[i]);  
         cuPrintf("/n");  
         if(ary[tid] > ary[tid + 1] && tid + 1 < size)  
         {  
             int tp = ary[tid];  
             ary[tid] = ary[tid + 1];  
             ary[tid + 1] = tp;  
             cuPrintf("after swap %d/n", tid);  
             for(int i = 0; i < size; ++ i)  cuPrintf("%d ", ary[i]);  
             cuPrintf("/n");  
         }  
     }  
     __syncthreads();  
     //cuPrintf("block: %d/t", bid);  
     //for(int i = 0; i < size; ++ i)  cuPrintf("%d ", ary[i]);  
     //cuPrintf("/n");  
 }  
 __global__ void odd_sort(int* ary, int size)  
 {  
     int tid = threadIdx.x;//线程从0开始编号  
     if(0 == (tid + 1 ) % 2)//第奇数个轮回  
     {  
         /*cuPrintf("before swap %d/n", tid); 
         for(int i = 0; i < size; ++ i)  cuPrintf("%d ", ary[i]); 
         cuPrintf("/n");*/  
         if(ary[tid] > ary[tid + 1] && tid + 1 < size)  
         {  
             int tp = ary[tid];  
             ary[tid] = ary[tid + 1];  
             ary[tid + 1] = tp;  
 /* 
             cuPrintf("after swap %d/n", tid); 
             for(int i = 0; i < size; ++ i)  cuPrintf("%d ", ary[i]); 
             cuPrintf("/n"); 
             */  
         }  
     }  
     //__syncthreads();  
 }  
 int main(void)  
 {  
     const int SIZE = 27;  
     const int BLOCK_SIZE = SIZE;  
     const int THREAD_SIZE = SIZE;  
     if(!InitGPUSet())  puts("GPU failed");  
     else  if(!InitCuPrint())  puts("cuPrintf failed");  
     else  
     {  
         int ary[SIZE];// = {6, 5, 4, 3, 2, 1};  
         srand(time(0));  
         for(int i = 0; i < SIZE; ++ i)  ary[i] = rand() % 100;  
         for(int i = 0; i < SIZE; ++ i)  printf("%d ", ary[i]);  
         puts("");  
         int* gary;  
         cudaMalloc((void**) &gary, SIZE * sizeof(int));  
         cudaMemcpy(gary, ary, SIZE * sizeof(int), cudaMemcpyHostToDevice);  
         for(int i = 0; i < BLOCK_SIZE; ++ i)  
         {  
             even_sort<<<1, THREAD_SIZE, 0>>>(gary, SIZE);  
             //cudaPrintfDisplay(stdout, false);  
             odd_sort<<<1, THREAD_SIZE, 0>>>(gary, SIZE);  
         }  
         //cudaPrintfEnd();  
         cudaMemcpy(ary, gary, SIZE * sizeof(int), cudaMemcpyDeviceToHost);  
         for(int i = 0; i < SIZE; ++ i)  printf("%d ", ary[i]);  
         puts("");  
         cudaFree(gary);  
     }  
     return 0;  
 }

maowenge

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
奇偶冒泡排序的CUDA实现

由于奇偶冒泡排序分奇下标和偶下标排序，这就确保了在一轮排序【奇/偶】过程中，隔一个处理一个，同一轮的排序中被处理的各元素是互不影响的，所以可以并行处理。。。事实上，奇偶冒泡排序在串行CPU上本没一点优势,他揍是为并行而生.[cpp] view plaincopy#include #include #include
复制链接

扫一扫