cuda实践之一(hello world)

1.实践点

  • cuda核函数调用方式:<<< ... >>>

  • Grid和Block的内部三维索引

blockIdx.x, blockIdx.y, blockIdx.z; 
threadIdx.x, threadIdx.y, threadId
  • 核函数的定义:

2. 代码

  • common.h

#include <sys/time.h>

#ifndef _COMMON_H
#define _COMMON_H

#define CHECK(call)                                                            \
{                                                                              \
    const cudaError_t error = call;                                            \
    if (error != cudaSuccess)                                                  \
    {                                                                          \
        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
                cudaGetErrorString(error));                                    \
        exit(1);                                                               \
    }                                                                          \
}

#define CHECK_CUBLAS(call)                                                     \
{                                                                              \
    cublasStatus_t err;                                                        \
    if ((err = (call)) != CUBLAS_STATUS_SUCCESS)                               \
    {                                                                          \
        fprintf(stderr, "Got CUBLAS error %d at %s:%d\n", err, __FILE__,       \
                __LINE__);                                                     \
        exit(1);                                                               \
    }                                                                          \
}

#define CHECK_CURAND(call)                                                     \
{                                                                              \
    curandStatus_t err;                                                        \
    if ((err = (call)) != CURAND_STATUS_SUCCESS)                               \
    {                                                                          \
        fprintf(stderr, "Got CURAND error %d at %s:%d\n", err, __FILE__,       \
                __LINE__);                                                     \
        exit(1);                                                               \
    }                                                                          \
}

#define CHECK_CUFFT(call)                                                      \
{                                                                              \
    cufftResult err;                                                           \
    if ( (err = (call)) != CUFFT_SUCCESS)                                      \
    {                                                                          \
        fprintf(stderr, "Got CUFFT error %d at %s:%d\n", err, __FILE__,        \
                __LINE__);                                                     \
        exit(1);                                                               \
    }                                                                          \
}

#define CHECK_CUSPARSE(call)                                                   \
{                                                                              \
    cusparseStatus_t err;                                                      \
    if ((err = (call)) != CUSPARSE_STATUS_SUCCESS)                             \
    {                                                                          \
        fprintf(stderr, "Got error %d at %s:%d\n", err, __FILE__, __LINE__);   \
        cudaError_t cuda_err = cudaGetLastError();                             \
        if (cuda_err != cudaSuccess)                                           \
        {                                                                      \
            fprintf(stderr, "  CUDA error \"%s\" also detected\n",             \
                    cudaGetErrorString(cuda_err));                             \
        }                                                                      \
        exit(1);                                                               \
    }                                                                          \
}

inline double seconds()
{
    struct timeval tp;
    struct timezone tzp;
    int i = gettimeofday(&tp, &tzp);
    return ((double)tp.tv_sec + (double)tp.tv_usec * 1.e-6); // amazing float
}

#endif // _COMMON_H
  • hello.cu

    #include "../common/common.h"
    #include <stdio.h>
    
    /*
     * A simple introduction to programming in CUDA. This program prints "Hello
     * World from GPU! from 10 CUDA threads running on the GPU.
     */
    
    __global__ void helloFromGPU()
    {
        printf("Hello World from GPU block(%d, %d, %d) thread (%d, %d, %d)!\n", blockIdx.x, blockIdx.y, blockIdx.z,
                    threadIdx.x, threadIdx.y, threadIdx.z); // why GPU can call printf function ??????
    }
    
    int main(int argc, char **argv)
    {
        printf("Hello World from CPU!\n");
    
        // grid(1,1,1) with only 1 block, 10 threads for each block (10, 1, 1)
        helloFromGPU<<<1, 20>>>();
        // CHECK(cudaDeviceReset());
        CHECK(cudaDeviceSynchronize());
        return 0;
    }
  • Makefile

APPS=hello

all: ${APPS}

%: %.cu
    nvcc -O2 -arch=sm_60 -o $@ $<

clean:
    rm -f ${APPS}

3. QA:

  • 核函数是在GPU设备上执行的,为什么能执行printf函数,其实现原理是什么样的?

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

敩科炼技堂

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值