Use thrust reduce_by_key with raw pointers instead of device pointers

Thrust is a powerful and easy-to-use library to utilize NVDIA GPU improve the performance of the program who has relatively abundant of memory and computation. 

Thrust is self -independent which often needs to pass thrust's own device pointer(device_ptr) or device_vector as input argument of function API.  

It is sometimes painful to work with cuda native memory raw pointers allocated by cudaMalloc. 

My previous blog post has a short discussion about the conversion between cuda raw pointers and thrust's device pointer. 

http://blog.csdn.net/niuqingpeng/article/details/12489807

This blog post will  give a short and independent example about how the conversion help and it is also example about how to use thrust's reduce_by_key API.

#include<cuda.h>
#include<stdio.h>
#include<thrust/device_ptr.h>
#include<thrust/reduce.h>

template <typename Key, typename Value>
int reduce_by_key_with_raw_pointers(Key* d_key, Key* d_key_last, Value* d_value, 
    Key* d_okey, Value* d_ovalue) {
  thrust::device_ptr<Key> d_keyp = thrust::device_pointer_cast(d_key);
  thrust::device_ptr<Key> d_key_lastp = thrust::device_pointer_cast(d_key_last);
  thrust::device_ptr<Value> d_valuep = thrust::device_pointer_cast(d_value);
  thrust::device_ptr<Key> d_okeyp = thrust::device_pointer_cast(d_okey);
  thrust::device_ptr<Value> d_ovaluep = thrust::device_pointer_cast(d_ovalue);
  thrust::pair<thrust::device_ptr<Key>, thrust::device_ptr<Value> > new_end;
  new_end = thrust::reduce_by_key(d_keyp, d_key_lastp, d_valuep, d_okeyp, d_ovaluep);
  return new_end.first - d_okeyp;
}

void output_device_array_with_raw_pointer(int* d_array, int count, const char* msg) {
  printf("%s\n", msg);
  thrust::device_ptr<int> d_arrayp = thrust::device_pointer_cast(d_array);
  for (int i = 0; i < count; ++i) {
    printf("%d\t", (int)d_arrayp[i]);
  }
  printf("\n");
}

int main() {
  const int N = 7;
  int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
  int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
  int* d_A = NULL;
  cudaMalloc((void**)&d_A, N * sizeof(unsigned)); 
  cudaMemcpy(d_A, A, sizeof(int) * N, cudaMemcpyHostToDevice); 
  int* d_B = NULL;
  cudaMalloc((void**)&d_B, N * sizeof(unsigned)); 
  cudaMemcpy(d_B, B, sizeof(int) * N, cudaMemcpyHostToDevice); 
  output_device_array_with_raw_pointer(d_A, N, "d_A");
  output_device_array_with_raw_pointer(d_B, N, "d_B");
  int n = reduce_by_key_with_raw_pointers<int, int>(
      d_A, d_A + N, d_B, d_A, d_B);
  output_device_array_with_raw_pointer(d_A, n, "d_A");
  output_device_array_with_raw_pointer(d_B, n, "d_B");
  // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4.
  // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4.  return 0;
}


Output results will be

d_A
1 3 33221
d_B
9 8 76543
d_A
1 3 21
d_B
9 21 93



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值