CMake构建CUDA项目
使用CMake构建CUDA项目
kernel.cuh
#pragma once
__global__ void
gpu_add(int n, const float * __restrict__ a, const float * __restrict__ b, float * __restrict__ c)
{
int tid=threadIdx.x+blockIdx.x*blockDim.x;
int nthread=blockDim.x*gridDim.x;
for(int i=tid; i<n; i+=nthread)
{
c[i]=a[i]+b[i];
}
return;
}
kernel.cu
#include "kernel.cuh"
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <algorithm>
#include <iostream>
static int const N=4096;
void run_test()
{
thrust::host_vector<float> h_a(N),h_b(N);
for(int i=0; i<h_a.size(); i++)
{
h_a[i]=1.0f;
h_b[i]=2.0f;
}
thrust::device_vector<float> d_a=h_a;
thrust::device_vector<float> d_b=h_b;
thrust::device_vector<float> d_c(N);
dim3 grid(32);
dim3 block(32);
gpu_add<<<grid,block>>>(N, thrust::raw_pointer_cast(d_a.data()),
thrust::raw_pointer_cast(d_b.data()),
thrust::raw_pointer_cast(d_c.data()));
thrust::host_vector<float> h_c=d_c;
std::cout<<h_c[100]<<std::endl;
}
main.cpp
#include <iostream>
void run_test();
int main(int argc, char **argv)
{
run_test();
return(0);
}
CMakeLists.txt
cmake_minimum_required (VERSION 3.18)
project(Demo LANGUAGES C CXX CUDA)
#=======================================================================
find_package(CUDAToolkit 11.0 REQUIRED)
SET(CUDA_ALLOW_ARCH "35;52;60;70;80")
if(DEFINED CUDA_ARCH)
# User passed a CUDA_ARCH so check it matches
# Error if incorrect CUDA_ARCH passed
FOREACH(ARCH IN LISTS CUDA_ARCH)
message(STATUS "Checking if arch " ${ARCH} " is supported...")
IF(NOT ${ARCH} IN_LIST CUDA_ALLOW_ARCH)
message(STATUS
"Chosen CUDA_ARCH ${ARCH} not expected for current CUDA version. "
"Please choose one or more of ${CUDA_ALLOW_ARCH}.")
ENDIF()
ENDFOREACH()
ELSE()
# Set a default
SET(CUDA_ARCH "${CUDA_ALLOW_ARCH}" CACHE STRING "Target Architectures (SM60 SM70 SM80), multiple are allowed")
ENDIF()
#==============================================
find_package(OpenMP)
message("OpenMP FLAGS:" ${OpenMP_C_FLAGS})
#===============================================
find_package(MPI)
message("Thist is a MPI build:" ${MPI_FOUND})
#===============================================
add_executable(Demo main.cpp kernel.cu kernel.cuh)
target_compile_features(Demo PUBLIC cxx_std_14)
set_target_properties(Demo PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(Demo CUDA::cublas CUDA::cusparse CUDA::cusolver CUDA::nppc CUDA::nvToolsExt)