第一个CUDA代码

爱吃橙子的哈士奇

已于 2023-09-04 10:47:10 修改

阅读量115

点赞数

文章标签： opencv 人工智能计算机视觉

于 2023-07-04 10:35:18 首次发布

本文链接：https://blog.csdn.net/qq_34176467/article/details/131529702

版权

继配置完CUDA环境后，开始写CUDA代码，此处记录一下。
查看显卡的详细信息：
cd C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.0\extras\demo_suite
deviceQuery
调用CUDA脚本在这里插入图片描述

首先推荐看一篇博客：CUDA编程：与Opencv的结合
.cu

#include <iostream>
#include <opencv.hpp>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
__global__ void THRESH_BINARY_CUDA(unsigned char* d_in, unsigned char* d_out, int imageHeight, int imageWidth, int threadDown,int threadUP)
{
	int xIndex = threadIdx.x + blockIdx.x * blockDim.x;
	int yIndex = threadIdx.y + blockIdx.y * blockDim.y;
	int index = yIndex * imageWidth + xIndex;
	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
	int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
	if (xIndex < imageWidth && yIndex < imageHeight)
	{
		d_out[index] = (d_in[index] >= threadDown && d_in[index] <= threadUP) ? 255 : 0;
	}
}
extern "C" void Binary_Image_CUDA(unsigned char* d_in, unsigned char* d_out, int imageHeight, int imageWidth, int threadDown, int threadUP)
{
	dim3 block(16, 16);
	dim3 grid((imageWidth + block.x - 1) / block.x, (imageHeight + block.y - 1) / block.y);
	THRESH_BINARY_CUDA << <grid, block >> > (d_in, d_out, imageHeight, imageWidth, threadDown,threadUP);
	cudaThreadSynchronize();
}

.cpp

#include <iostream>
#include <chrono>
#include <opencv2/opencv.hpp>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
using namespace cv;
using namespace std;
extern "C" void Binary_Image_CUDA(unsigned char* d_in, unsigned char* d_out, int imageHeight, int imageWidth, int threadDown, int threadUP);
int main() 
{
	cv::Mat img = cv::imread("05.png", 0);
	if (img.empty())
	{
		return 0;
	}
	int imgHeight = img.rows;
	int imgWidth = img.cols;
	int length = imgHeight * imgWidth;
	unsigned char* d_in;
	unsigned char* d_out;
	cv::Mat dstImg(imgHeight, imgWidth, CV_8UC1, cv::Scalar(60));
	//======================创建gpu内存=================
	cudaMalloc((void**)&d_in, imgHeight * imgWidth * sizeof(unsigned char));
	cudaMalloc((void**)&d_out, imgHeight * imgWidth * sizeof(unsigned char));
	//==================host内存到device设备内存进行拷贝===========
	cudaMemcpy(d_in, img.data, imgHeight * imgWidth * sizeof(unsigned char), cudaMemcpyHostToDevice);
	//=================创建线程格和线程块==============
	auto starttime = std::chrono::system_clock::now();
	Binary_Image_CUDA(d_in, d_out, imgHeight, imgWidth, 0,60);
	std::chrono::duration<double> diff = std::chrono::system_clock::now() - starttime;
	cout << "CUDA耗时：" << diff.count()*1000 << "ms" << endl;
	cudaMemcpy(dstImg.data, d_out, imgHeight * imgWidth * sizeof(unsigned char), cudaMemcpyDeviceToHost);

	//====================释放GPU内存================
	cudaFree(d_in);
	cudaFree(d_out);
	cv::namedWindow("cuda", cv::WINDOW_NORMAL);
	cv::imshow("cuda", dstImg);
	cv::waitKey(0);
	return 0;
}