CLION配置CUDA环境

最新推荐文章于 2024-06-07 18:34:39 发布

衍生动物

最新推荐文章于 2024-06-07 18:34:39 发布

阅读量2.6k

点赞数 3

分类专栏：并行计算 CUDA环境

本文链接：https://blog.csdn.net/zmh657628320/article/details/126443355

版权

并行计算同时被 2 个专栏收录

3 篇文章 0 订阅

订阅专栏

CUDA环境

2 篇文章 0 订阅

订阅专栏

花了很长的时间终于陆陆续续把CLION环境整合完毕, 踩了无数的坑，在这里记录一下

Visual Studio + CUDA + CLION + Windows

CUDA环境，这个毕竟简单，直接到英伟达官网下载对应的CUDA版本即可, 目前CUDA只支持nividia的显卡，如果不是nividia的，建议学习openCL同样这样并行计算的技术，查看本机支持的最高CUDA环境,去英伟达官网下载即可，CUDA的学习推荐这篇博客(https://face2ai.com/program-blog/#GPU%E7%BC%96%E7%A8%8B%EF%BC%88CUDA%EF%BC%89)
Visual Studio，我的电脑是两个版本，2017和2019, 我这里出了很多错误，所以把2019给卸载了，这里需要十分注意vs的环境，需要win10 SDK, 注意SDK的路径，还要注意MSVC版本与CUDA的对应关系，环境变量的配置也是需要的，这里有个巨大的坑，就是windows环境变量会出现莫名其妙的双引号！！！！！！ 首先按照C++ desktop 使用visual studio installer ，然后设置环境变量

这里需要设置很多环境变量,首先需要set path查看本机的环境变量是否有双引号，这一点十分重要，否则nvcc会出现编译错误，如果路径中有双引号，直接去修改环境变量即可

下面是需要设置的环境变量及其值
VS_INCLUDE_PATH

C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include
D:\Windows Kits\10\Lib\10.0.17763.0\ucrt

VS_LIB_PATH
这个路径需要和CUDA对应 32位就是x64, 64位就是x86，这个地方没有一篇文章是说清楚的，所以我的路径选了x86以后, 就出现了一堆错误，一百多个编译错误，直接心态崩了

C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\lib\x64
D:\Windows Kits\10\Lib\10.0.17763.0\ucrt\x64

VS_PATH

C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\bin\Hostx64\x64

环境变量配置好以后在path下直接用%VS_PATH%，这样来引用即可，配置完以后需要reboot

本机环境变量
3. CLION
需要在CLION里面去配置visual studio，如果出现visual studio路径找不到，那多半是visual的环境变量有问题，这就回到之前我说的双引号问题，我这边CUDA版本是32位的，所以选择架构是arm64,这个东西是和CUDA版本有关，不要听其他博客人云亦云，说什么一定要选arm，许多百度的结果都不说明白理由，这些问题都是我去stackflow上才发现的
基本上能找到就可以了
4. 愉快的开始coding
我的cmake

cmake_minimum_required(VERSION 3.17)
project(CUDA_project CUDA)
include_directories(./include)
find_package(CUDA)
set(CMAKE_CUDA_STANDARD 14)

add_executable(CUDA_project information.cu)

set_target_properties(
        CUDA_project
        PROPERTIES
        CUDA_SEPARABLE_COMPILATION ON)

#include <cuda_runtime.h>
#include <stdio.h>

int main(int argc,char** argv)
{
    printf("%s Starting ...\n",argv[0]);
    int deviceCount = 0;
    cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
    if(error_id!=cudaSuccess)
    {
        printf("cudaGetDeviceCount returned %d\n ->%s\n",
               (int)error_id,cudaGetErrorString(error_id));
        printf("Result = FAIL\n");
        exit(EXIT_FAILURE);
    }
    if(deviceCount==0)
    {
        printf("There are no available device(s) that support CUDA\n");
    }
    else
    {
        printf("Detected %d CUDA Capable device(s)\n",deviceCount);
    }
    int dev=0,driverVersion=0,runtimeVersion=0;
    cudaSetDevice(dev);
    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp,dev);
    printf("Device %d:\"%s\"\n",dev,deviceProp.name);
    cudaDriverGetVersion(&driverVersion);
    cudaRuntimeGetVersion(&runtimeVersion);
    printf("  CUDA Driver Version / Runtime Version         %d.%d  /  %d.%d\n",
           driverVersion/1000,(driverVersion%100)/10,
           runtimeVersion/1000,(runtimeVersion%100)/10);
    printf("  CUDA Capability Major/Minor version number:   %d.%d\n",
           deviceProp.major,deviceProp.minor);
    printf("  Total amount of global memory:                %.2f GBytes (%llu bytes)\n",
           (float)deviceProp.totalGlobalMem/pow(1024.0,3),deviceProp.totalGlobalMem);
    printf("  GPU Clock rate:                               %.0f MHz (%0.2f GHz)\n",
           deviceProp.clockRate*1e-3f,deviceProp.clockRate*1e-6f);
    printf("  Memory Bus width:                             %d-bits\n",
           deviceProp.memoryBusWidth);
    if (deviceProp.l2CacheSize)
    {
        printf("  L2 Cache Size:                            	%d bytes\n",
               deviceProp.l2CacheSize);
    }
    printf("  Max Texture Dimension Size (x,y,z)            1D=(%d),2D=(%d,%d),3D=(%d,%d,%d)\n",
           deviceProp.maxTexture1D,deviceProp.maxTexture2D[0],deviceProp.maxTexture2D[1]
            ,deviceProp.maxTexture3D[0],deviceProp.maxTexture3D[1],deviceProp.maxTexture3D[2]);
    printf("  Max Layered Texture Size (dim) x layers       1D=(%d) x %d,2D=(%d,%d) x %d\n",
           deviceProp.maxTexture1DLayered[0],deviceProp.maxTexture1DLayered[1],
           deviceProp.maxTexture2DLayered[0],deviceProp.maxTexture2DLayered[1],
           deviceProp.maxTexture2DLayered[2]);
    printf("  Total amount of constant memory               %lu bytes\n",
           deviceProp.totalConstMem);
    printf("  Total amount of shared memory per block:      %lu bytes\n",
           deviceProp.sharedMemPerBlock);
    printf("  Total number of registers available per block:%d\n",
           deviceProp.regsPerBlock);
    printf("  Wrap size:                                    %d\n",deviceProp.warpSize);
    printf("  Maximun number of thread per multiprocesser:  %d\n",
           deviceProp.maxThreadsPerMultiProcessor);
    printf("  Maximun number of thread per block:           %d\n",
           deviceProp.maxThreadsPerBlock);
    printf("  Maximun size of each dimension of a block:    %d x %d x %d\n",
           deviceProp.maxThreadsDim[0],deviceProp.maxThreadsDim[1],deviceProp.maxThreadsDim[2]);
    printf("  Maximun size of each dimension of a grid:     %d x %d x %d\n",
           deviceProp.maxGridSize[0],
           deviceProp.maxGridSize[1],
           deviceProp.maxGridSize[2]);
    printf("  Maximu memory pitch                           %lu bytes\n",deviceProp.memPitch);
    printf("----------------------------------------------------------\n");
    printf("Number of multiprocessors:                      %d\n", deviceProp.multiProcessorCount);
    printf("Total amount of constant memory:                %4.2f KB\n",
           deviceProp.totalConstMem/1024.0);
    printf("Total amount of shared memory per block:        %4.2f KB\n",
           deviceProp.sharedMemPerBlock/1024.0);
    printf("Total number of registers available per block:  %d\n",
           deviceProp.regsPerBlock);
    printf("Warp size                                       %d\n", deviceProp.warpSize);
    printf("Maximum number of threads per block:            %d\n", deviceProp.maxThreadsPerBlock);
    printf("Maximum number of threads per multiprocessor:  %d\n",
           deviceProp.maxThreadsPerMultiProcessor);
    printf("Maximum number of warps per multiprocessor:     %d\n",
           deviceProp.maxThreadsPerMultiProcessor/32);
    return EXIT_SUCCESS;

}