本人 CUDA小白一枚,要是有什么不对,还望各位大佬指点。
本文及后面的几篇将分别从几个方面来大概阐述一下Thrust的一些接口。原来的网址在这里。
1.Algorithms
1.3 Prefix Sums
1.3.1 inclusive_scan
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator>
__host__ __device__ OutputIterator thrust::inclusive_scan(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result
);
template <typename InputIterator, typename OutputIterator>
OutputIterator thrust::inclusive_scan(
InputIterator first,
InputIterator last,
OutputIterator result
);
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename AssociativeOperator>
__host__ __device__ OutputIterator thrust::inclusive_scan(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
AssociativeOperator binary_op
);
template <typename InputIterator, typename OutputIterator, typename AssociativeOperator>
OutputIterator thrust::inclusive_scan(
InputIterator first,
InputIterator last,
OutputIterator result,
AssociativeOperator binary_op
);
将[first, last)中每个index对应的result的结果为[first, index]之间的数值进行操作,默认为求和,可以指定特殊的操作。
例子:
int data[6] = {1, 0, 2, 2, 1, 3};
thrust::inclusive_scan(thrust::host, data, data + 6, data);
// thrust::inclusive_scan(data, data + 6, data);
// data中的结果为{1, 1, 3, 5, 6, 9}
int data2[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
thrust::maximum<int> binary_op; // 取最大值
thrust::inclusive_scan(thrust::host, data2, data2+ 6, data2);
// thrust::inclusive_scan(thrust::host, data2, data2+ 6, data2);
// data2中的结果为{-5, 0, 2, 2, 2,4, 4, 4, 4, 8}
1.3.2 exclusive_scan
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator>
__host__ __device__ OutputIterator thrust::exclusive_scan(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result
);
template <typename InputIterator, typename OutputIterator>
OutputIterator thrust::exclusive_scan(
InputIterator first,
InputIterator last,
OutputIterator result
);
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T>
__host__ __device__ OutputIterator thrust::exclusive_scan(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
T init
);
template <typename InputIterator, typename OutputIterator, typename T>
OutputIterator thrust::exclusive_scan(
InputIterator first,
InputIterator last,
OutputIterator result,
T init
);
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T, typename AssociativeOperator>
__host__ __device__ OutputIterator thrust::exclusive_scan(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
T init,
AssociativeOperator binary_op
);
template <typename InputIterator, typename OutputIterator, typename T, typename AssociativeOperator>
OutputIterator thrust::exclusive_scan(
InputIterator first,
InputIterator last,
OutputIterator result,
T init,
AssociativeOperator binary_op
);
将[first, last)中每个index对应的result的结果为[first, index)之间的数值进行操作,默认为求和,可以指定特殊的操作,也可以指定初始化的值。与exclusive唯一的区别在于当前index是否包含在内。指定的初始化值主要是针对第一个index的时候,相当于在原来的数组前增加一个值。
int data[6] = {1, 0, 2, 2, 1, 3};
thrust::exclusive_scan(thrust::host, data, data + 6, data);
// thrust::exclusive_scan(data, data + 6, data);
// data输出 {0, 1, 1, 3, 5, 6}
int data2[6] = {1, 0, 2, 2, 1, 3};
thrust::exclusive_scan(thrust::host, data, data + 6, data, 4);
// thrust::exclusive_scan(data, data + 6, data, 4);
// data2 {4, 5, 5, 7, 9, 10}
int data3[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
thrust::maximum<int> binary_op;
thrust::exclusive_scan(thrust::host, data, data + 10, data, 1, binary_op);
// thrust::exclusive_scan(data, data + 10, data, 1, binary_op);
// data3 {1, 1, 1, 2, 2, 2, 4, 4, 4, 4}
1.3.3 inclusive_scan_by_key
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator>
__host__ __device__ OutputIterator thrust::inclusive_scan_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
OutputIterator thrust::inclusive_scan_by_key(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryPredicate>
__host__ __device__ OutputIterator thrust::inclusive_scan_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
BinaryPredicate binary_pred
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryPredicate>
OutputIterator thrust::inclusive_scan_by_key(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
BinaryPredicate binary_pred
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryPredicate, typename AssociativeOperator>
__host__ __device__ OutputIterator thrust::inclusive_scan_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
BinaryPredicate binary_pred,
AssociativeOperator binary_op
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryPredicate, typename AssociativeOperator>
OutputIterator thrust::inclusive_scan_by_key(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
BinaryPredicate binary_pred,
AssociativeOperator binary_op
);
针对[first1, last1)中的数值如果出现连续相同的数值,称为一个segment。每一个segment中执行inclusive的操作。可以指定segment定义的方法,也可以指定同一个segment中需要完成的操作。
例子:
int A[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int B[10] = {2, 1, 2, 2, 3, 4, 5, 5, 5, 5};
thrust::inclusive_scan_by_key(thrust::host, B, B + 10, A, A);
// thrust::inclusive_scan_by_key(B, B + 10, A, A);
// A的结果为 {1, 1, 1, 2, 1, 1, 1, 2, 3, 4}
// 如果 A[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
// 则对应的输出结果为 {1, 0, 1, 1, 1, 0, 1, 1, 2, 2}
int C[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
thrust::equal_to<int> binary_pred;
thrust::inclusive_scan_by_key(thrust::host, B, B + 10, C, C, binary_pred);
// thrust::inclusive_scan_by_key(B, B + 10, C, C, binary_pred);
// C的结果为{1, 0, 1, 1, 1, 0, 1, 1, 2, 2}
int D[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
thrust::plus<int> binary_op;
thrust::inclusive_scan_by_key(thrust::host, B, B + 10, C, C, binary_pred, binary_op);
// thrust::inclusive_scan_by_key(B, B + 10, C, C, binary_pred, binary_op);
// C的结果为{1, 0, 1, 1, 1, 0, 1, 1, 2, 2}
1.3.4 exclusive_scan_by_key
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator>
__host__ __device__ OutputIterator thrust::exclusive_scan_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator>
OutputIterator thrust::exclusive_scan_by_key(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T>
__host__ __device__ OutputIterator thrust::exclusive_scan_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
T init
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T>
OutputIterator thrust::exclusive_scan_by_key(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
T init
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T, typename BinaryPredicate>
__host__ __device__ OutputIterator thrust::exclusive_scan_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
T init,
BinaryPredicate binary_pred
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T, typename BinaryPredicate>
OutputIterator thrust::exclusive_scan_by_key(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
T init,
BinaryPredicate binary_pred
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T, typename BinaryPredicate, typename AssociativeOperator>
__host__ __device__ OutputIterator thrust::exclusive_scan_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
T init,
BinaryPredicate binary_pred,
AssociativeOperator binary_op
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename T, typename BinaryPredicate, typename AssociativeOperator>
OutputIterator thrust::exclusive_scan_by_key(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
T init,
BinaryPredicate binary_pred,
AssociativeOperator binary_op
);
原理同inclusive_scan_by_key,同样可以指定segment的划分方法,同一个segment中需要执行的操作,以及同一个segment中初始的值。
例子:
int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3};
int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals);
// thrust::exclusive_scan_by_key(key, key + 10, vals, vals);
// vals的值为 {0, 1, 2, 0, 1, 0, 0, 1, 2, 3}
int vals2[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
int init = 5;
thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals2, vals2, init);
// thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init);
// vals的值为 {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}
int vals3[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
thrust::equal_to<int> binary_op;
thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals3, vals3, init, binary_op);
// thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_op);
// vals的值为 {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}
1.3.5 transform_inclusive_scan
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename UnaryFunction, typename AssociativeOperator>
__host__ __device__ OutputIterator thrust::transform_inclusive_scan(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
UnaryFunction unary_op,
AssociativeOperator binary_op
);
template <typename InputIterator, typename OutputIterator, typename UnaryFunction, typename AssociativeOperator>
OutputIterator thrust::transform_inclusive_scan(
InputIterator first,
InputIterator last,
OutputIterator result,
UnaryFunction unary_op,
AssociativeOperator binary_op
);
在原先inclusive_scan的基础上增加了transform的操作。
例子:
int data[8] = {1, 0, 2, 2, 1, 3};
thrust::negate<int> unary_op;
thrust::plus<int> binary_op;
thrust::transform_inclusive_scan(thrust::host, data, data + 6, data, unary_op, binary_op);
// thrust::transform_inclusive_scan(data, data + 6, data, unary_op, binary_op);
// data {-1, -1, -3, -5, -6, -9}
1.3.6 transform_exclusive_scan
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename UnaryFunction, typename T, typename AssociativeOperator>
__host__ __device__ OutputIterator thrust::transform_exclusive_scan(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
UnaryFunction unary_op,
T init,
AssociativeOperator binary_op
);
template <typename InputIterator, typename OutputIterator, typename UnaryFunction, typename T, typename AssociativeOperator>
OutputIterator thrust::transform_exclusive_scan(
InputIterator first,
InputIterator last,
OutputIterator result,
UnaryFunction unary_op,
T init,
AssociativeOperator binary_op
);
在exclusive_scan的基础上增加了transform的操作。
例子:
int data[6] = {1, 0, 2, 2, 1, 3};
thrust::negate<int> unary_op;
thrust::plus<int> binary_op;
thrust::transform_exclusive_scan(thrust:host, data, data + 6, data, unary_op, 4, binary_op);
// thrust::transform_exclusive_scan(data, data + 6, data, unary_op, 4, binary_op);
// data的值为 {4, 3, 3, 1, -1, -2}