CUDA提供的函数评估Occupancy

#include <iostream>                                                                                                                                 
#include <cuda_runtime.h>
#include <cuda_occupancy.h>

// Device code
__global__ void MyKernel(int *d, int *a, int *b)
{
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    d[idx] = a[idx] * b[idx];
}
// Host code
int main()
{
    int numBlocks; // Occupancy in terms of active blocks
    int blockSize = 32;
    // These variables are used to convert occupancy to warps
    int device;
    cudaDeviceProp prop;
    int activeWarps;
    int maxWarps;
    cudaGetDevice(&device);
    cudaGetDeviceProperties(&prop, device);
    cudaOccupancyMaxActiveBlocksPerMultiprocessor(
    &numBlocks,
    MyKernel,
    blockSize,
    0);
    activeWarps = numBlocks * blockSize / prop.warpSize;
            maxWarps = prop.maxThreadsPerMultiProcessor / prop.warpSize;
    std::cout << "Occupancy: " << (double)activeWarps / maxWarps * 100 << "%" << std::endl;
return 0;
}

其实occupancy可以用nvpprof来获取的;倒是可以从这个例子看下occupancy的计算过程。

NV提供的例子选择合适的blocks, threads

// Device code
__global__ void MyKernel(int *array, int arrayCount)
{
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx < arrayCount) {
    array[idx] *= array[idx];
    }
}

// Host code
int launchMyKernel(int *array, int arrayCount)
{
    int blockSize; // The launch configurator returned block size
    int minGridSize; // The minimum grid size needed to achieve the
    // maximum occupancy for a full device
    // launch
    int gridSize; // The a
    cudaOccupancyMaxPotentialBlockSize(
    &minGridSize,
    &blockSize,
    (void*)MyKernel,
    0,arrayCount);
    // Round up according to array size
    gridSize = (arrayCount + blockSize - 1) / blockSize;
    MyKernel<<<gridSize, blockSize>>>(array, arrayCount);
    cudaDeviceSynchronize();
    // If interested, the occupancy can be calculated with
    // cudaOccupancyMaxActiveBlocksPerMultiprocessor
    return 0;
}

感觉这些API使用的应该不多正常情况下,自己可以测试下不同的blocks和grids的比例。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值