程序名字为main.cu
//#include "cuda_check.h"
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/cudaarithm.hpp>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cuda.h>
#include <device_functions.h>
//=================CUDA实现图像灰度化====================
__global__ void BGR2GRAYincuda(uchar3* const d_in, unsigned char* const d_out,
uint imgheight, uint imgwidth)
{
const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int idy = blockIdx.y * blockDim.y + threadIdx.y;
if (idx < imgwidth && idy < imgheight)
{
uchar3 rgb = d_in[idy * imgwidth + idx];
d_out[idy * imgwidth + idx] = 0.299f * rgb.x + 0.587f * rgb.y + 0.114f * rgb.z;
}
}
int main()
{
cv::Mat img1 = cv::imread("H:\\opencv_project\\opencv_cuda学习\\image\\hist_02.jpg");
int imgHeight = img1.rows;
int imgWidth = img1.cols;
int length = imgWidth * imgHeight;
//============CUDA实现后的传回的图像=======
cv::Mat grayImg(imgHeight, imgWidth, CV_8UC1, cv::Scalar(255));
//==============创建GPU内存============
uchar3* d_in1;
unsigned char* d_out;
cudaMalloc((void**)&d_in1, imgHeight * imgWidth * sizeof(char3));
cudaMalloc((void**)&d_out, imgHeight * imgWidth * sizeof(unsigned char));
//===========将图像从CPU传入GPU========
cudaMemcpy(d_in1, img1.data, imgHeight * imgWidth * sizeof(char3), cudaMemcpyHostToDevice);
//===========3维网格和3维线程块===========
dim3 grid(1 + (length / (32 * 32 + 1)), 1, 1); // grid
dim3 block(32, 32, 1);
//=========使用2维网格和2维的线程块========
dim3 block2(16, 16);
dim3 grid2((imgWidth + block2.x - 1) / block2.x, (imgHeight + block2.y - 1) / block2.y);
//调用核函数
BGR2GRAYincuda << <grid2, block2 >> > (d_in1, d_out, imgHeight, imgWidth);
cudaMemcpy(grayImg.data, d_out, imgHeight * imgWidth * sizeof(unsigned char), cudaMemcpyDeviceToHost);
//释放GPU内存
cudaFree(d_in1);
cudaFree(d_out);
cv::namedWindow("mm",cv::WINDOW_NORMAL);
cv::imshow("mm", grayImg);
cv::waitKey(0);
return 0;
}
工程文件名字为pro_cuda_opencv
# required cmake version
cmake_minimum_required(VERSION 3.3)
# CMakeLists.txt for G4CU project
project(pro_cuda_opencv)
set(CMAKE_CXX_COMPILER g++)
set(CMAKE_CXX_FLAGS "-std=c++11")
# packages
find_package(CUDA REQUIRED)
#find_package(OPENCV REQUIRED)
set(OpenCV_DIR /home/opencv-4.1.2/build)
find_package(OpenCV REQUIRED)
message(${OpenCV_LIBS} )
set(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
# nvcc flags
set(CUDA_NVCC_FLAGS -gencode arch=compute_52,code=sm_52;-G;-g)
#file(GLOB CURRENT_SOURCES main.cu)
CUDA_ADD_EXECUTABLE(pro_cuda_opencv main.cu)
target_link_libraries( ${PROJECT_NAME} ${CURRENT_SOURCES} ${OpenCV_LIBS})
参考文章
- https://blog.csdn.net/zhangdaoliang1/article/details/122170669
- https://www.freesion.com/article/6226191106/