本人 CUDA小白一枚,要是有什么不对,还望各位大佬指点。
本文及后面的几篇将分别从几个方面来大概阐述一下Thrust的一些接口。原来的网址在这里。
1.Algorithms
1.4 Reductions
1.4.1 reduce
template<typename DerviedPolicy, typename InputIterator>
__host__ __device__ thrust::iterator_traits< InputIterator >::value_type thrust::reduce(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last
);
template<typename InputIterator>
thrust::iterator_traits< InputIterator >::value_type thrust::reduce(
InputIterator first,
InputIterator last
);
template<typename DerviedPolicy, typename InputIterator, typename T>
__host__ __device__ T thrust::reduce(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
T init
);
template<typename InputIterator, typename T>
T thrust::reduce(
InputIterator first,
InputIterator last,
T init
);
template<typename DerviedPolicy, typename InputIterator, typename T, typename BinaryFunction>
__host__ __device__ T thrust::reduce(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
T init,
BinaryFunction binary_op
);
template<typename InputIterator, typename T>
T thrust::reduce(
InputIterator first,
InputIterator last,
T init,
BinaryFunction binary_op
);
对数组进行一定的操作得到一个值的输出,可以指定起始值,可以指定特定的操作,默认的操作是求和
例子:
int data[6] = {1, 0, 2, 2, 1, 3};
int result1 = thrust::reduce(thrust::host, data, data + 6);
// int result1 = thrust::reduce(data, data + 6);
// result1 = 9
int result2 = thrust::reduce(thrust::host, data, data + 6, 1);
// int result2 = thrust::reduce(data, data + 6, 1);
// result2 = 10
int result3 = thrust::reduce(thrust::host, data, data + 6, 1, thrust::maximum<int>());
// int result3 = thrust::reduce(data, data + 6, 1, thrust::maximum<int>());
// reuslt3 = 3
1.4.2 reduce_by_key
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2>
__host__ __device__ thrust::pair< OutputIterator1, OutputIterator2 > reduce_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2>
thrust::pair< OutputIterator1, OutputIterator2 > reduce_by_key(
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2, typename BinaryPredicate>
__host__ __device__ thrust::pair< OutputIterator1, OutputIterator2 > reduce_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output,
BinaryPredicate binary_pred
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2, typename BinaryPredicate>
thrust::pair< OutputIterator1, OutputIterator2 > reduce_by_key(
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output,
BinaryPredicate binary_pred
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2, typename BinaryPredicate, typename BinaryFunction>
__host__ __device__ thrust::pair< OutputIterator1, OutputIterator2 > reduce_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output,
BinaryPredicate binary_pred,
BinaryFunction binary_op
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2, typename BinaryPredicate, typename BinaryFunction>
thrust::pair< OutputIterator1, OutputIterator2 > reduce_by_key(
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_output,
OutputIterator2 values_output,
BinaryPredicate binary_pred,
BinaryFunction binary_op
);
按照key的值来对整个数组进行操作,可以指定对应的操作(默认求和)和对key的判断条件,用来判断那些key可以执行对应的操作。对于keys中连续出现相同数值的时候,对对应的value进行reduce的操作。
例子:
const int N = 7;
int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys
int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values
int C[N];
int D[N];
thrust::pair<int *, int *> new_end;
new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D);
// new_end = thrust::reduce_by_key( A, A + N, B, C, D);
// C = {1, 3, 2, 1} new_end.first - C = 4
// D = {9, 21, 9, 3} new_end.second - D = 4
int C1[N];
int D1[N];
thrust::pair<int *, int *> new_end1;
thrust::equal_to<int> binary_pred;
new_end1 = thrust::reduce_by_key(thrust::host, A, A + N, B, C1, D1, binary_pred);
// new_end1 = thrust::reduce_by_key(A, A + N, B, C1, D1, binary_pred);
// 由于默认的binary_pred操作就是判断是都相等,因此C1、D1和new_end1的结果和C、D和new_end相同
int C2[N];
int D2[N];
thrust::pair<int *, int *> new_end2;
thrust::plus<int> binary_op;
new_end2 = thrust::reduce_by_key(thrust::host, A, A + N, B, C2, D2, binary_pred, binary_op);
// new_end2 = thrust::reduce_by_key(A, A + N, B, C2, D2, binary_pred, binary_op);
// 由于默认的binary_op操作就是相加,因此C2、D2和new_end2的结果和C、D和new_end相同
1.4.3 euqal
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2>
__host__ __device__ bool thrust::equal(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2
);
template <typename InputIterator1, typename InputIterator2>
bool thrust::equal(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
__host__ __device__ bool thrust::equal(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
BinaryPredicate binary_pred
);
template <typename InputIterator1, typename InputIterator2, typename BinaryPredicate>
bool thrust::equal(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
BinaryPredicate binary_pred
);
比较两个数组是否相同,可以指定对应判断条件
例子:
int A1[7] = {3, 1, 4, 1, 5, 9, 3};
int A2[7] = {3, 1, 4, 2, 8, 5, 7};
bool result = thrust::equal(thrust::host, A1, A1 + 7, A2);
// bool result = thrust::equal(A1, A1 + 7, A2);
// result = false;
struct compare_modulo_two {
__host__ __device__
bool operator()(int x, int y) const {
return (x % 2) == (y % 2);
}
}
bool result = thrust::equal(thrust::host, A1, A1 + 7, A2, compare_modulo_two());
// bool result = thrust::equal(A1, A1 + 7, A2, compare_modulo_two());
// result = false
// PS 官网的例返回的结果有点问题
1.4.4 count
template <typename DerivedPolicy, typename InputIterator, typename EqualityComparable>
__host__ __device__ thrust::iterator_traits< InputIterator >::difference_type thrust::count(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
const EqualityComparable & value
);
template <typename InputIterator, typename EqualityComparable>
thrust::iterator_traits< InputIterator >::difference_type thrust::count(
InputIterator first,
InputIterator last,
const EqualityComparable & value
);
template <typename DerivedPolicy, typename InputIterator, typename Predicate>
__host__ __device__ thrust::iterator_traits< InputIterator >::difference_type thrust::count(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
Predicate pred
);
template <typename InputIterator, typename EqualityComparable>
thrust::iterator_traits< InputIterator >::difference_type thrust::count(
InputIterator first,
InputIterator last,
Predicate pred
);
计数,可以统计相同元素,也统计一类元素
例子:
thrust::device_vecotr<int> vec(5, 0);
vec[1] = 1;
vec[3] = 1;
vec[4] = 1;
int result = thrust::count(thrust::device, vec.begin(), vec.end(), 1);
// int result = thrust::count(vec.begin(), vec.end(), 1);
// 返回的result为3
struct is_odd {
__host__ __device__
bool operator()(int &x) {
return x & 1;
}
};
int result = thrust::count(thrust::device, vec.begin(), vec.end(). is_odd());
// int result = thrust::count(vec.begin(), vec.end(). is_odd());
// 返回结果为3
1.4.5 min_element 和 max_element
template <typename DerivedPolicy, typename ForwardIterator>
__host__ __device__ ForwardIterator thrust::min_element(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last
);
template <typename ForwardIterator>
ForwardIterator thrust::min_element(
ForwardIterator first,
ForwardIterator last
);
template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
__host__ __device__ ForwardIterator thrust::min_element(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
BinaryPredicate comp
);
template <typename ForwardIterator, typename BinaryPredicate>
ForwardIterator thrust::min_element(
ForwardIterator first,
ForwardIterator last,
BinaryPredicate comp
);
找到一组元素中的最小值
int data[6] = {1, 0, 2, 2, 1, 3};
int *result = thrust::min_element(thrust::host, data, data + 6);
// int *result = thrust::min_element(thrust::host, data, data + 6);
// 结果为0
struct key_value {
int key;
int value;
};
struct compare_key_value {
__host__ __device__
bool operator()(key_value lhs, key_value rhs) {
return lhs.key < rhs.key;
}
};
key_value data[4] = {{4, 5}, {0, 7}, {2, 3}, {6, 1}};
key_value *result = thrust::min_element(thrust::host, data, data + 4, compare_key_value);
// key_value *result = thrust::min_element(data, data + 4, compare_key_value);
// 结果为{0, 7}
1.4.6 minmax_element
template <typename DerivedPolicy, typename ForwardIterator>
__host__ __device__ thrust::pair< ForwardIterator, ForwardIterator > thrust::minmax_element(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last
);
template <typename ForwardIterator>
thrust::pair< ForwardIterator, ForwardIterator > thrust::minmax_element(
ForwardIterator first,
ForwardIterator last
);
template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
__host__ __device__ thrust::pair< ForwardIterator, ForwardIterator > thrust::minmax_element(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
BinaryPredicate comp
);
template <typename ForwardIterator, typename BinaryPredicate>
thrust::pair< ForwardIterator, ForwardIterator > thrust::minmax_element(
ForwardIterator first,
ForwardIterator last,
BinaryPredicate comp
);
返回最小元素和最大元素
例子:
int data[6] = {1, 0, 2, 2, 1, 3};
thrust::pair<int *, int*> result = thrust::minmax_element(thrust:host, data, data + 6);
// thrust::pair<int *, int*> result = thrust::minmax_element(data, data + 6);
// result.first 是最小值 0
// result.second 是最大值 3
struct key_value {
int key;
int value;
};
struct compare_key_value {
__host__ __device__
bool operator()(key_value lhs, key_value rhs) {
return lhs.key < rhs.key;
}
};
key_value data[4] = {{4, 5}, {0, 7}, {2, 3}, {6, 1}};
thrust::pair<key_value *, key_value *> result = thrust::minmax_element(thrust::host, data, data + 4, compare_key_value());
// thrust::pair<key_value *, key_value *> result = thrust::minmax_element(data, data + 4, compare_key_value());
// 最小值为{0, 7} 最大的为{6, 1}
1.4.7 all_of
template <typename DerivedPolicy, typename InputIterator, typename Predicate>
__host__ __device__ bool thrust::all_of(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
Predicate pred
);
template <typename InputIterator, typename Predicate>
bool thrust::all_of(
InputIterator first,
InputIterator last,
Predicate pred
);
判断指定元素是否全等于pred, 如果指定元素为空,则返回false
例子:
bool A[3] = {true, true, false};
thrust::all_of(thrust::host, A, A + 2, thrust::identity<bool>());
// thrust::all_of(A, A + 2, thrust::identity<bool>());
// resunt true
thrust::all_of(thrust::host, A, A + 3, thrust::identity<bool>());
// thrust::all_of(A, A + 3, thrust::identity<bool>());
// resunt false
1.4.8 any_of
template <typename DerivedPolicy, typename InputIterator, typename Predicate>
__host__ __device__ bool thrust::any_of(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
Predicate pred
);
template <typename InputIterator, typename Predicate>
bool thrust::any_of(
InputIterator first,
InputIterator last,
Predicate pred
);
判断指定元素是否存在pred,如果指定元素为空,则返回false
例子:
bool A[3] = {true, true, false};
thrust::any_of(thrust::host, A, A + 2, thrust::identity<bool>());
// thrust::any_of(A, A + 2, thrust::identity<bool>());
// resunt true
thrust::any_of(thrust::host, A + 2, A + 3, thrust::identity<bool>());
// thrust::any_of(A + 2, A + 3, thrust::identity<bool>());
// resunt false
1.4.9 none_of
template <typename DerivedPolicy, typename InputIterator, typename Predicate>
__host__ __device__ bool thrust::none_of(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
Predicate pred
);
template <typename InputIterator, typename Predicate>
bool thrust::none_of(
InputIterator first,
InputIterator last,
Predicate pred
);
判断指定元素是否不存在pred,如果为空,则返回true
例子:
bool A[3] = {true, true, false};
thrust::none_of(thrust::host, A, A + 2, thrust::identity<bool>());
// thrust::none_of(A, A + 2, thrust::identity<bool>());
// resunt false
thrust::none_of(thrust::host, A + 2, A + 3, thrust::identity<bool>());
// thrust::none_of(A + 2, A + 3, thrust::identity<bool>());
// resunt true
1.4.10 is_partitioned
template <typename DerivedPolicy, typename InputIterator, typename Predicate>
__host__ __device__ bool thrust::is_partitioned(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
Predicate pred
);
template <typename InputIterator, typename Predicate>
bool thrust::is_partitioned(
InputIterator first,
InputIterator last,
Predicate pred
);
判断指定范围内数据是否按照特定的规则进行划分。
例子:
struct is_even {
__host__ __device__
bool operator()(const int &x) {
return (x % 2) == 0;
}
}
int A[] = {2, 4, 6, 8, 10, 1,3, 5, 7, 9};
int B[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
thrust::is_partitioned(thrust::host, A, A + 10, is_even()); // returns true
// thrust::is_partitioned(A, A + 10, is_even()); // returns true
thrust::is_partitioned(thrust::host, B, B + 10, is_even()); // returns false
// thrust::is_partitioned(B, B + 10, is_even()); // returns false
1.4.11 is_sorted
template <typename DerivedPolicy, typename ForwardIterator>
__host__ __device__ bool thrust::is_sorted(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last
);
template <typename ForwardIterator>
__host__ __device__ bool thrust::is_sorted(
ForwardIterator first,
ForwardIterator last
);
判断是否是有序的(递增)
thrust::device_vector<int> v(6);
v[0] = 1;
v[1] = 4;
v[2] = 2;
v[3] = 8;
v[4] = 5;
v[5] = 7;
bool result = thrust::is_sorted(thrust::device, v.begin(), v.end()); // return false
// bool result = thrust::is_sorted(v.begin(), v.end());
thrust::greater<int> copm;
bool result = thrust::is_sorted(thrust::device, v.begin(), v.end(), comp);
// bool result = thrust::is_sorted(v.begin(), v.end(), comp);
1.4.12 is_sorted_until
template <typename DerivedPolicy, typename ForwardIterator>
__host__ __device__ ForwardIterator thrust::is_sorted_until(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last
);
template <typename ForwardIterator>
ForwardIterator thrust::is_sorted_until(
ForwardIterator first,
ForwardIterator last
);
template <typename DerivedPolicy, typename ForwardIterator, typename Compare>
__host__ __device__ ForwardIterator thrust::is_sorted_until(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
Compare comp
);
template <typename ForwardIterator, typename Compare>
ForwardIterator thrust::is_sorted_until(
ForwardIterator first,
ForwardIterator last,
Compare comp
);
返回指定范围内的元素从first开始符合递增的最大index,同样的可以指定排序的方式
int A[8] = {0, 1, 2, 3, 0, 1, 2, 3};
int *B = thrust::is_sorted_until(thrust::host, A, A + 8);
// int *B = thrust::is_sorted_until(A, A + 8);
// 返回的结果 B - A = 4,(A, B)之间是有序递增的
thrust::greater<int> comp;
int *C = thrust::is_sorted_until(thrust::host, A, A + 8, comp);
// int *C = thrust::is_sorted_until(A, A + 8, comp);
1.4.13 inner_product
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputType>
__host__ __device__ OutputType thrust::inner_product(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputType init
);
template <typename InputIterator1, typename InputIterator2, typename OutputType>
OutputType thrust::inner_product(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputType init
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputType, typename BinaryFunction1, typename BinaryFunction2>
__host__ __device__ OutputType thrust::inner_product(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputType init,
BinaryFunction1 binary_op1,
BinaryFunction2 binary_op2
);
template <typename InputIterator1, typename InputIterator2, typename OutputType, typename BinaryFunction1, typename BinaryFunction2>
OutputType thrust::inner_product(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputType init,
BinaryFunction1 binary_op1,
BinaryFunction2 binary_op2
);
两个向量按位相乘求和,可以指定初始值,也可以指定对应位置元素的操作(默认相乘)及针对每一位的结果需要进行的操作(默认求和)。
例子:
float vec1[3] = {1.0f, 2.0f, 5.0f};
float vec1[3] = {4.0f, 1.0f, 5.0f};
float result = 0.0f;
thrust::plus<float> binary_op1;
thrust::multiplies<float> binary_op2;
result = thrust::inner_product(thrust::host, vec1, vec1 + 3, vec2, 0.0f);
// result = thrust::inner_product(vec1, vec1 + 3, vec2, 0.0f);
// result = thrust::inner_product(thrust::host, vec1, vec1 + 3, vec2, 0.0f, binary_op1, binary_op2);
// result = thrust::inner_product(vec1, vec1 + 3, vec2, 0.0f, binary_op1, binary_op2);
// 四种算出来的结果是一致的
1.4.14 transform_reduce
template <typename DerivedPolicy, typename InputIterator, typename UnaryFunction, typename OutputType, typename BinaryFunction>
__host__ __device__ OutputType thrust::transform_reduce(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
UnaryFunction unary_op,
OutputType init,
BinaryFunction binary_op
);
template <typename InputIterator, typename UnaryFunction, typename OutputType, typename BinaryFunction>
OutputType thrust::transform_reduce(InputIterator first,
InputIterator last,
UnaryFunction unary_op,
OutputType init,
BinaryFunction binary_op);
将transform和reduce结合,首先g根据unary_op将[first, last)中的数据进行transform,然后结合初始值对转换之后的数值进行binary_op的操作。
例子:
struct absolute_value : public unary_function<T, T> {
__host__ __device__ T operator()(const T &x) const {
return x < T(0) ? -x : x;
}
};
int data[6] = {-1, 0, -2, -2, 1, -3};
int result = thrust::transform_reduce(thrust::host, data, data + 6, absolute_value<int>(), 0, thrust::maximum<int>());
// int result = thrust::transform_reduce(data, data + 6, absolute_value<int>(), 0, thrust::maximum<int>());