问题描述
#include <stdio.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#define SIZE 6
__global__ void kernel1(int* key, int* value)
{
for(int i = 0; i < SIZE; i++)
{
printf("key[%d] = %d value[%d] = %d\n", i, key[i], i, value[i]);
}
printf("\n");
}
int main(){
int key[SIZE] = {24, 43, 27, 42, 25, 40};
int value[SIZE] = {0, 1, 2, 3, 4, 5};
int *d_key, *d_value;
cudaMalloc((void**)&d_key, sizeof(int) * SIZE);
cudaMalloc((void**)&d_value, sizeof(int) * SIZE);
cudaMemcpy(d_key, key, sizeof(int) * SIZE, cudaMemcpyHostToDevice);
cudaMemcpy(d_value, value, sizeof(int) * SIZE, cudaMemcpyHostToDevice);
kernel1<<<1,1>>>(d_key, d_value);
cudaDeviceSynchronize();
//- sort
thrust::device_ptr<int> t_key(d_key);
thrust::device_ptr<int> t_value(d_value);
thrust::sort_by_key(t_value, t_value + SIZE, t_key);
cudaDeviceSynchronize();
kernel1<<<1,1>>>(d_key, d_value);
cudaDeviceSynchronize();
return 0;
}
sort
前后的输出结果分别为:
#- 预期结果 运行结果
key[0] = 24 value[0] = 0 key[0] = 24 value[0] = 0
key[1] = 43 value[1] = 1 key[1] = 43 value[1] = 1
key[2] = 27 value[2] = 2 key[2] = 27 value[2] = 2
key[3] = 42 value[3] = 3 key[3] = 42 value[3] = 3
key[4] = 25 value[4] = 4 key[4] = 25 value[4] = 4
key[5] = 40 value[5] = 5 key[5] = 40 value[5] = 5
key[0] = 24 value[0] = 0 key[0] = 24 value[0] = 0
key[1] = 25 value[1] = 4 key[1] = 43 value[1] = 1
key[2] = 27 value[2] = 2 key[2] = 27 value[2] = 2
key[3] = 40 value[3] = 5 key[3] = 42 value[3] = 3
key[4] = 42 value[4] = 3 key[4] = 25 value[4] = 4
key[5] = 43 value[5] = 1 key[5] = 40 value[5] = 5