cuda实践之一（hello world）

最新推荐文章于 2024-07-26 20:25:06 发布

敩科炼技堂

最新推荐文章于 2024-07-26 20:25:06 发布

阅读量318

点赞数 1

分类专栏：异构计算之cuda实践篇文章标签： c++ 机器学习深度学习

本文链接：https://blog.csdn.net/johnpub/article/details/126752596

版权

异构计算之cuda实践篇专栏收录该内容

1 篇文章 0 订阅

订阅专栏

1.实践点

cuda核函数调用方式：<<< ... >>>

Grid和Block的内部三维索引

blockIdx.x, blockIdx.y, blockIdx.z; 
threadIdx.x, threadIdx.y, threadId

核函数的定义：

2. 代码

common.h

#include <sys/time.h>

#ifndef _COMMON_H
#define _COMMON_H

#define CHECK(call)                                                            \
{                                                                              \
    const cudaError_t error = call;                                            \
    if (error != cudaSuccess)                                                  \
    {                                                                          \
        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                 \
        fprintf(stderr, "code: %d, reason: %s\n", error,                       \
                cudaGetErrorString(error));                                    \
        exit(1);                                                               \
    }                                                                          \
}

#define CHECK_CUBLAS(call)                                                     \
{                                                                              \
    cublasStatus_t err;                                                        \
    if ((err = (call)) != CUBLAS_STATUS_SUCCESS)                               \
    {                                                                          \
        fprintf(stderr, "Got CUBLAS error %d at %s:%d\n", err, __FILE__,       \
                __LINE__);                                                     \
        exit(1);                                                               \
    }                                                                          \
}

#define CHECK_CURAND(call)                                                     \
{                                                                              \
    curandStatus_t err;                                                        \
    if ((err = (call)) != CURAND_STATUS_SUCCESS)                               \
    {                                                                          \
        fprintf(stderr, "Got CURAND error %d at %s:%d\n", err, __FILE__,       \
                __LINE__);                                                     \
        exit(1);                                                               \
    }                                                                          \
}

#define CHECK_CUFFT(call)                                                      \
{                                                                              \
    cufftResult err;                                                           \
    if ( (err = (call)) != CUFFT_SUCCESS)                                      \
    {                                                                          \
        fprintf(stderr, "Got CUFFT error %d at %s:%d\n", err, __FILE__,        \
                __LINE__);                                                     \
        exit(1);                                                               \
    }                                                                          \
}

#define CHECK_CUSPARSE(call)                                                   \
{                                                                              \
    cusparseStatus_t err;                                                      \
    if ((err = (call)) != CUSPARSE_STATUS_SUCCESS)                             \
    {                                                                          \
        fprintf(stderr, "Got error %d at %s:%d\n", err, __FILE__, __LINE__);   \
        cudaError_t cuda_err = cudaGetLastError();                             \
        if (cuda_err != cudaSuccess)                                           \
        {                                                                      \
            fprintf(stderr, "  CUDA error \"%s\" also detected\n",             \
                    cudaGetErrorString(cuda_err));                             \
        }                                                                      \
        exit(1);                                                               \
    }                                                                          \
}

inline double seconds()
{
    struct timeval tp;
    struct timezone tzp;
    int i = gettimeofday(&tp, &tzp);
    return ((double)tp.tv_sec + (double)tp.tv_usec * 1.e-6); // amazing float
}

#endif // _COMMON_H

hello.cu

#include "../common/common.h"
#include <stdio.h>

/*
 * A simple introduction to programming in CUDA. This program prints "Hello
 * World from GPU! from 10 CUDA threads running on the GPU.
 */

__global__ void helloFromGPU()
{
    printf("Hello World from GPU block(%d, %d, %d) thread (%d, %d, %d)!\n", blockIdx.x, blockIdx.y, blockIdx.z,
                threadIdx.x, threadIdx.y, threadIdx.z); // why GPU can call printf function ??????
}

int main(int argc, char **argv)
{
    printf("Hello World from CPU!\n");

    // grid(1,1,1) with only 1 block, 10 threads for each block (10, 1, 1)
    helloFromGPU<<<1, 20>>>();
    // CHECK(cudaDeviceReset());
    CHECK(cudaDeviceSynchronize());
    return 0;
}

Makefile

APPS=hello

all: ${APPS}

%: %.cu
    nvcc -O2 -arch=sm_60 -o $@ $<

clean:
    rm -f ${APPS}

3. QA：

核函数是在GPU设备上执行的，为什么能执行printf函数，其实现原理是什么样的？

敩科炼技堂

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录