CMake:构建CUDA项目_cmake包含cuda-CSDN博客

本文链接：https://blog.csdn.net/weixin_42849849/article/details/127112060

CMake构建CUDA项目

使用CMake构建CUDA项目

kernel.cuh

#pragma once

__global__ void
gpu_add(int n, const float * __restrict__ a, const float * __restrict__ b, float * __restrict__ c)
{
      int tid=threadIdx.x+blockIdx.x*blockDim.x;
      int nthread=blockDim.x*gridDim.x;

      for(int i=tid; i<n; i+=nthread)
      {
	     c[i]=a[i]+b[i];
      }

      return;
}

kernel.cu

#include "kernel.cuh"

#include <thrust/host_vector.h>
#include <thrust/device_vector.h>

#include <algorithm>
#include <iostream>


static int const N=4096;

void run_test()
{
    thrust::host_vector<float> h_a(N),h_b(N);

    for(int i=0; i<h_a.size(); i++)
    {
		h_a[i]=1.0f;
		h_b[i]=2.0f;
    }
    
    thrust::device_vector<float> d_a=h_a;
    thrust::device_vector<float> d_b=h_b;

    thrust::device_vector<float> d_c(N);

    dim3 grid(32);
    dim3 block(32);

    gpu_add<<<grid,block>>>(N, thrust::raw_pointer_cast(d_a.data()),
			       thrust::raw_pointer_cast(d_b.data()),
			       thrust::raw_pointer_cast(d_c.data()));

    thrust::host_vector<float> h_c=d_c;
    std::cout<<h_c[100]<<std::endl;
}

main.cpp

#include <iostream>

void run_test();

int main(int argc, char **argv)
{

  run_test();  
  
  return(0);
}

CMakeLists.txt

cmake_minimum_required (VERSION 3.18)

project(Demo LANGUAGES C CXX CUDA)

#=======================================================================

find_package(CUDAToolkit 11.0 REQUIRED)

SET(CUDA_ALLOW_ARCH "35;52;60;70;80")

if(DEFINED CUDA_ARCH)
    # User passed a CUDA_ARCH so check it matches
    # Error if incorrect CUDA_ARCH passed
    FOREACH(ARCH IN LISTS CUDA_ARCH)
        message(STATUS "Checking if arch " ${ARCH} " is supported...")
        IF(NOT ${ARCH} IN_LIST CUDA_ALLOW_ARCH)
            message(STATUS
                "Chosen CUDA_ARCH ${ARCH} not expected for current CUDA version. "
                "Please choose one or more of ${CUDA_ALLOW_ARCH}.")
        ENDIF()
    ENDFOREACH()
ELSE()

    # Set a default
    SET(CUDA_ARCH "${CUDA_ALLOW_ARCH}" CACHE STRING "Target Architectures (SM60 SM70 SM80), multiple are allowed")

ENDIF()

#==============================================
find_package(OpenMP)
message("OpenMP FLAGS:" ${OpenMP_C_FLAGS})
#===============================================
find_package(MPI)
message("Thist is a MPI build:" ${MPI_FOUND})
#===============================================

add_executable(Demo main.cpp kernel.cu kernel.cuh)
target_compile_features(Demo PUBLIC cxx_std_14)
set_target_properties(Demo PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(Demo CUDA::cublas CUDA::cusparse CUDA::cusolver CUDA::nppc CUDA::nvToolsExt)