CUDA抢占模式测试

CUDA抢占模式测试

CUDA抢占模式测试

  • 默认为抢占模式,后台运行一个Kernel,将利用率打满
  • 运行一个Kernel,Profing smsp__warps_restored,发现有值
  • nvidia-smi -c 3设置为EXCLUSIVE模式,创建多个上下文时:CUDA-capable device(s) is/are busy or unavailable!

一.参考链接

二.复现过程

tee preemption.cu<<-'EOF'
#include <cuda_runtime.h>
#include <iostream>

__global__ void Kernel_v1(float *data) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    for(int i=0;i<1;i++)
    {
        atomicAdd(&data[idx], idx*i);
    }
}

__global__ void Kernel_v2(float *data) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    for(int i=0;i<1;i++)
    {
        atomicAdd(&data[idx], idx*i);
    }
}
EOF

/usr/local/cuda/bin/nvcc -std=c++17 -dc -lineinfo -arch=sm_86 -ptx preemption.cu -o preemption.ptx
/usr/local/cuda/bin/nvcc -arch=sm_86 preemption.ptx -cubin -o preemption.cubin
/usr/local/cuda/bin/nvcc -arch=sm_86 preemption.cubin -fatbin -o preemption.fatbin
cat preemption.ptx
/usr/local/cuda/bin/cuobjdump --dump-sass preemption.fatbin

tee preemption_main.cpp<<-'EOF'
#include <stdio.h>
#include <string.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include <thread>

int run(const char *kernel_name,int loop)
{
    CUresult error;
    cuInit(0);
    CUdevice cuDevice;
    int deviceCount = 0;
    error = cuDeviceGetCount(&deviceCount);
    error = cuDeviceGet(&cuDevice, 0);
    if(error!=CUDA_SUCCESS)
    {
        printf("Error happened in get device!\n");
        return -1;
    }
    CUcontext cuContext;
    error = cuCtxCreate(&cuContext, 0, cuDevice);
    if(error!=CUDA_SUCCESS)
    {
        char *error_str=new char[1024];
        cuGetErrorString(error,(const char**)&error_str);
        printf("Error happened in create context:%s!\n",error_str);
        delete []error_str;
        return -1;
    }
    int block_count=100000;int block_size=1024;
    int thread_size=block_count*block_size;
    int data_size=sizeof(float)*thread_size;

    float *output_ptr=nullptr;
    float *input_ptr=nullptr;
    int cudaStatus=0;
    cudaStatus = cudaMalloc((void**)&input_ptr, data_size);
    void *kernelParams[]= {(void*)&input_ptr};

    CUmodule module;
    CUfunction function;
    const char* module_file = "preemption.fatbin";
    error = cuModuleLoad(&module, module_file);
    if(error!=CUDA_SUCCESS)
    {
        printf("Error happened in load moudle %d!\n",error);
        return -1;
    }
    error = cuModuleGetFunction(&function, module, kernel_name);
    if(error!=CUDA_SUCCESS)
    {
        printf("get double_function error!\n");
        return -1;
    }
    do
    {
        cuLaunchKernel(function,
                        block_count, 1, 1,
                        block_size, 1, 1,
                        0,0,kernelParams, 0);
        cuCtxSynchronize();
    }while(loop);
    cudaFree(input_ptr);
    cuModuleUnload(module);
    cuCtxDestroy(cuContext);
    printf("finished\n");
    return 0;
}

int main(int argc,char *argv[])
{
    int loop=atoi(argv[1]);
    int value=0;
    //获取是否支持抢占模式
    cudaDeviceGetAttribute(&value,cudaDevAttrComputePreemptionSupported,0);
    printf("cudaDevAttrComputePreemptionSupported:%d\n",value);
    auto t0=new std::thread(run,"_Z9Kernel_v1Pf",loop);
    auto t1=new std::thread(run,"_Z9Kernel_v2Pf",loop);
    t0->join();
    t1->join();
    delete t0;
    delete t1;
}
EOF
killall -9 preemption_main
g++ preemption_main.cpp -o preemption_main -I /usr/local/cuda/include -L /usr/local/cuda/lib64 -lcudart -lcuda -lpthread
./preemption_main 1 &
/usr/local/NVIDIA-Nsight-Compute/ncu --metrics smsp__warps_restored  ./preemption_main 0
  • 输出

smsp__warps_restored    Counter         warp            # of warp preemption-restore events

  Kernel_v2(float *) (100000, 1, 1)x(1024, 1, 1), Context 2, Stream 7, Device 0, CC 8.6
    Section: Command line profiler metrics
    ------------------------ ----------- ------------
    Metric Name              Metric Unit Metric Value
    ------------------------ ----------- ------------
    smsp__warps_restored.avg        warp            0
    smsp__warps_restored.max        warp            0
    smsp__warps_restored.min        warp            0
    smsp__warps_restored.sum        warp            0
    ------------------------ ----------- ------------

  Kernel_v1(float *) (100000, 1, 1)x(1024, 1, 1), Context 1, Stream 14, Device 0, CC 8.6
    Section: Command line profiler metrics
    ------------------------ ----------- ------------
    Metric Name              Metric Unit Metric Value
    ------------------------ ----------- ------------
    smsp__warps_restored.avg        warp         0.58
    smsp__warps_restored.max        warp            8
    smsp__warps_restored.min        warp            0
    smsp__warps_restored.sum        warp           65
    ------------------------ ----------- ------------
  • EXCLUSIVE模式测试
#设置模式
nvidia-smi -c 3
Set compute mode to EXCLUSIVE_PROCESS for GPU 00000000:03:00.0.
All done.

#错误信息
Error happened in create context:CUDA-capable device(s) is/are busy or unavailable!

#恢复
nvidia-smi -c 0
Compute mode is already set to DEFAULT for GPU 00000000:03:00.0.
All done.
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Hi20240217

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值