这里记录了在同一个项目中,如何在cpp文件中引用 .cu 文件。
cpp文件 和 cu 文件的相互引用
utils.hpp
#ifndef __UTILS_HPP__
#define __UTILS_HPP__
#include <cuda_runtime.h>
#include <system_error>
// 一般cuda的check都是这样写成宏
#define CUDA_CHECK(call) { \
cudaError_t error = call; \
if (error != cudaSuccess) { \
printf("ERROR: %s:%d, ", __FILE__, __LINE__); \
printf("CODE:%d, DETAIL:%s\n", error, cudaGetErrorString(error)); \
exit(1); \
} \
}
#endif //__UTILS__HPP__
print_index.h
#ifndef __PRINT_INDEX_HPP
#define __PRINT_INDEX_HPP
#include <cuda_runtime.h>
void print_idx_device(dim3 grid, dim3 block);
void print_dim_device(dim3 grid, dim3 block);
void print_thread_idx_per_block_device(dim3 grid, dim3 block);
void print_thread_idx_device(dim3 grid, dim3 block);
#endif //__PRINT_INDEX_HPP
print_index.cu
#include <cuda_runtime.h>
#include "utils.hpp"
#include <stdio.h>
__global__ void print_idx_kernel() {
printf("block idx: (%3d, %3d, %3d), thread idx: (%3d, %3d, %3d)\n", blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x);
}
__global__ void print_dim_kernel() {
printf("grid dimension: (%3d, %3d, %3d), thread dimension: (%3d, %3d, %3d)\n",
gridDim.z, gridDim.y, gridDim.x,
blockDim.z, blockDim.y, blockDim.x);
}
__global__ void print_thread_idx_per_block_kernel() {
int index = threadIdx.z * blockDim.x * blockDim.y + \
threadIdx.y * blockDim.x + \
threadIdx.x;
printf("block idx: (%3d, %3d, %3d), thread idx: %3d\n",
blockIdx.z, blockIdx.y, blockIdx.x,
index);
}
__global__ void print_thread_idx_kernel() {
int bSize = blockDim.z * blockDim.y * blockDim.x;
int bIndex = blockIdx.z * gridDim.x * gridDim.y + \
blockIdx.y * gridDim.x + \
blockIdx.x;
int tIndex = threadIdx.z * blockDim.x * blockDim.y + \
threadIdx.y * blockDim.x + \
threadIdx.x;
int index = bIndex * bSize + tIndex;
printf("block idx: %3d, thread idx in block: %3d, thread idx: %3d\n",
bIndex, tIndex, index);
}
void print_idx_device(dim3 grid, dim3 block) {
print_idx_kernel << <grid, block >> > ();
CUDA_CHECK(cudaDeviceSynchronize());
}
void print_dim_device(dim3 grid, dim3 block) {
print_dim_kernel << <grid, block >> > ();
CUDA_CHECK(cudaDeviceSynchronize());
}
void print_thread_idx_per_block_device(dim3 grid, dim3 block) {
print_thread_idx_per_block_kernel << <grid, block >> > ();
CUDA_CHECK(cudaDeviceSynchronize());
}
void print_thread_idx_device(dim3 grid, dim3 block) {
print_thread_idx_kernel << <grid, block >> > ();
CUDA_CHECK(cudaDeviceSynchronize());
}
main.cpp
#include <stdio.h>
#include <cuda_runtime.h>
#include "print_index.h"
void print_one_dim(int inputSize, int blockSize) {
int gridSize = inputSize / blockSize;
dim3 block(blockSize);
dim3 grid(gridSize);
// print_idx_device(block, grid);
// print_dim_device(block, grid);
// print_thread_idx_per_block_device(block, grid);
print_thread_idx_device(block, grid);
}
void print_two_dim(int inputSize, int blockSize) {
int gridSize = inputSize / blockSize;
dim3 block(blockSize, blockSize);
dim3 grid(gridSize, gridSize);
// print_idx_device(block, grid);
// print_dim_device(block, grid);
// print_thread_idx_per_block_device(block, grid);
print_thread_idx_device(block, grid);
}
int main() {
int inputSize;
int blockSize;
/* one-dimention test */
// inputSize = 32;
// blockSize = 4;
// print_one_dim(inputSize, blockSize);
/* two-dimention test */
inputSize = 8;
blockSize = 4;
print_two_dim(inputSize, blockSize);
return 0;
}