使用步骤
step1: 实例化类对象cudnnConvObject
step2: 调用初始化函数initParam
step3: 调用creatKernel创建卷积核
step4: 调用performConOperat方法执行卷积计算
头文件代码
#pragma once
#include <iostream>
#include <opencv.hpp>
#include <cudnn.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include<time.h>
class cudnnConvObject
{
public:
cudnnConvObject() = default;
~cudnnConvObject();
/*
@brief: 参数初始化函数, 并申请相关的内存空间
@param[in] imageH 卷积图像的高
@param[in] imageW 卷积图像的宽
@param[in] templateH 创建卷积核所用模板图像的高
@param[in] templateW 创建卷积核所用模板图像的宽
@param[in] templateNum 创建卷积核模板图像的数量,且每个模板图像必须是单通道图像。
*/
bool initParam(int imageH, int imageW, int batchSize, int templateH, int templateW, int templateNum);
/*
@brief: 创建卷积核,并将模板的数据写入到缓存空间中。
@param[in] weldTeamplateList weld的模板图像, 必须保证所有的模板图像的数据类型为float
*/
bool creatKernel(std::vector<cv::Mat>& weldTeamplateList);
/*
@brief: 执行卷积运算
*/
bool performConOperat(cv::Mat convImage, std::vector<cv::Mat>& convResultList);
/*
@brief: 获取模型处理的图像的高宽和卷积输出的通道数
*/
int getImageH() { return m_height; }
int getImageW() { return m_width; }
int getOutChannels() { return m_outChannelNum; }
private:
bool initConvParam(); // 初始化卷积参数
// 与卷积相关的变量参数
cudaError_t m_error;
cudnnStatus_t m_status; // 返回状态
cudnnHandle_t m_hcudnn; // cudnn句柄
cudnnTensorDescriptor_t m_tensorIn, m_tensorOut; // 定义输入输出张量的描述符
cudnnFilterDescriptor_t m_kernel; // 定义卷积核的张量描述符
cudnnConvolutionDescriptor_t m_conv; // 定义卷积核的张量描述符
cudnnConvolutionFwdAlgoPerf_t m_convAlgorithm; // 定义算法的描述结构体
size_t m_workspaceSize = 0; // 卷积计算所需的内存空间大小
void* m_cworkSpace = nullptr; // 卷积计算所需的内存空间
// 与数据相关的设置参数
float* m_inHostBuffer = nullptr; // 在Host上存储输入的图像
float* m_outHostBuffer = nullptr; // 在Host上存储卷积结果后的图像数据
float* m_kernelHostBuffer = nullptr; // 在Host上存储卷积核
float* m_kernelDeviceBuffer = nullptr; // 在device上存储卷积核
float* m_inDeviceBuffer = nullptr; // 在Device上存储卷积的输入图像数据
float* m_outDeviceBuffer = nullptr; // 在Device上存储卷积输出的特征图像数据
int m_inChannelNum = 1; // 卷积的输入的通道数,等于图像的通道数,默认为1
int m_outChannelNum = 3; // 卷积输出的通道数,等于weld模板的数量
int m_templateH = 0; // 焊点模板的高
int m_templateW = 0; // 焊点模板的宽
int m_batchSize = 1; // 输入图像数据的batch数,默认为1.不可更改
int m_height = 5120; // 输入图像的高
int m_width = 5120; // 输入图像的宽
};
cpp文件代码
#include "cudnnConvObject.h"
cudnnConvObject::~cudnnConvObject()
{
if (m_inHostBuffer != nullptr)free(m_inHostBuffer); // 在Host上存储输入的图像
if (m_outHostBuffer != nullptr) free(m_outHostBuffer); // 在Host上存储卷积结果后的图像数据
if (m_kernelHostBuffer != nullptr) free(m_kernelHostBuffer); // 在host上存储卷积核
if (m_kernelDeviceBuffer != nullptr) cudaFree(m_kernelDeviceBuffer); // 在device上存储卷积核
if (m_inDeviceBuffer != nullptr) cudaFree(m_inDeviceBuffer); // 在Device上存储卷积的输入图像数据
if (m_outDeviceBuffer != nullptr) cudaFree(m_outDeviceBuffer); // 在Device上存储卷积输出的特征图像数据
// 释放卷积描述相关的对象
if (m_tensorIn != nullptr) cudnnDestroyTensorDescriptor(m_tensorIn);
if (m_tensorOut != nullptr) cudnnDestroyTensorDescriptor(m_tensorOut);
if (m_kernel != nullptr) cudnnDestroyFilterDescriptor(m_kernel);
if (m_conv != nullptr) cudnnDestroyConvolutionDescriptor(m_conv);
if (m_hcudnn != nullptr) cudnnDestroy(m_hcudnn);
if (m_cworkSpace != nullptr) cudaFree(m_cworkSpace);
}
bool cudnnConvObject::initParam(int imageH, int imageW, int batchSize, int templateH, int templateW, int templateNum)
{
m_outChannelNum = templateNum; // 卷积输出的通道数,等于weld模板的数量
m_inChannelNum = 1; // 卷积的输入的通道数,等于图像的通道数,默认为1
m_templateH = templateH; // 焊点模板的高
m_templateW = templateW; // 焊点模板的宽
m_height = imageH; // 输入图像的高
m_width = imageW; // 输入图像的宽
m_batchSize = batchSize;
// 申请内存空间
size_t bySizeImage = m_height * m_width * sizeof(float);
m_inHostBuffer = (float*)malloc(bySizeImage);
m_outHostBuffer = (float*)malloc(bySizeImage*m_outChannelNum);
size_t bySizeKernel = templateH * templateW*templateNum * sizeof(float);
m_error = cudaMalloc((void**)&m_kernelDeviceBuffer, bySizeKernel);
if (m_error != cudaSuccess) {
std::cout << "Failed to request Device space for the kernels!" << std::endl;
return false;
}
m_kernelHostBuffer = (float*)malloc(bySizeKernel);
m_error = cudaMalloc((void**)&m_inDeviceBuffer, bySizeImage);
if (m_error != cudaSuccess) {
std::cout << "Failed to request Device space for the input of convolutional computation!" << std::endl;
return false;
}
m_error = cudaMalloc((void**)&m_outDeviceBuffer, bySizeImage*m_outChannelNum);
if (m_error != cudaSuccess) {
std::cout << "Failed to request Device space for the output of convolutional computation!" << std::endl;
return false;
}
// 调用卷积对象初始化
if (!initConvParam()) {
std::cout << "Failed to initialize convolutional description object!" << std::endl;
return false;
}
return true;
}
bool cudnnConvObject::initConvParam()
{
if (cudnnCreate(&m_hcudnn) != CUDNN_STATUS_SUCCESS) { // 创建cuDNN上下文句柄
return false;
}
// =================================================创建以及设置输入输出张量
m_status = cudnnCreateTensorDescriptor(&m_tensorIn); // 2. 创建输入张量
m_status = cudnnSetTensor4dDescriptor(m_tensorIn, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, m_batchSize, m_inChannelNum, m_height, m_width); // 3. 设置输入张量数据
if (m_status != CUDNN_STATUS_SUCCESS) {
return false;
}
cudnnCreateTensorDescriptor(&m_tensorOut); // 4. 创建输出张量, 并设置相关属性
m_status = cudnnSetTensor4dDescriptor(m_tensorOut, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, m_batchSize, m_outChannelNum, m_height, m_width);
if (m_status != CUDNN_STATUS_SUCCESS) {
return false;
}
// ==================================================创建卷积核,并设置相关参数
cudnnCreateFilterDescriptor(&m_kernel);
m_status = cudnnSetFilter4dDescriptor(m_kernel, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, m_outChannelNum, 1, m_templateH, m_templateW); // out_channels,in_channels,kernel_height,kernel_width
if (m_status != CUDNN_STATUS_SUCCESS) {
return false;
}
// ==================================================设置卷积相关的参数
m_status = cudnnCreateConvolutionDescriptor(&m_conv);
// 计算pad的数值
int pad_height = static_cast<int>(std::floor(m_templateH / 2));
int pad_width = static_cast<int>(std::floor(m_templateW / 2));
m_status = cudnnSetConvolution2dDescriptor(m_conv, pad_height, pad_width, 1, 1, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); // pad_height, pad_width, vertical_stride,horizontal_stride,dilation_height,dilation_width,mode,computeType
if (m_status != CUDNN_STATUS_SUCCESS) {
return false;
}
// =================================================设置卷积前向运算的算法
int cnt = 0;
cudnnGetConvolutionForwardAlgorithmMaxCount(m_hcudnn, &cnt);
int ret_cnt = 0;
m_status = cudnnGetConvolutionForwardAlgorithm_v7(m_hcudnn, m_tensorIn, m_kernel, m_conv, m_tensorOut, 1, &ret_cnt, &m_convAlgorithm);
if (m_status != CUDNN_STATUS_SUCCESS) {
return false;
}
// =================================================设置卷积时的工作区以及分配内存空间
m_status = cudnnGetConvolutionForwardWorkspaceSize(m_hcudnn, m_tensorIn, m_kernel, m_conv, m_tensorOut, m_convAlgorithm.algo, &m_workspaceSize);
if (m_status != CUDNN_STATUS_SUCCESS) {
return false;
}
m_error = cudaMalloc(&m_cworkSpace, m_workspaceSize);
if (m_error != cudaSuccess) {
return false;
}
return true;
}
bool cudnnConvObject::creatKernel(std::vector<cv::Mat>& weldTeamplateList)
{
if (weldTeamplateList.size() < 1) {
std::cout << "The number of convolution kernel template images is empty!" << std::endl;
return false;
}
for (int imgIndex = 0; imgIndex < weldTeamplateList.size(); imgIndex++)
{
if (weldTeamplateList[imgIndex].channels() != 1 || weldTeamplateList[imgIndex].rows != m_templateH || weldTeamplateList[imgIndex].cols != m_templateW) {
std::cout << "Incorrect properties of the template image used to create the convolution kernel!" << std::endl;
return false;
}
cv::Mat temWeldTemplate = weldTeamplateList[imgIndex];
for (int row = 0; row < temWeldTemplate.rows; row++) {
float* temData = temWeldTemplate.ptr<float>(row);
memcpy(m_kernelHostBuffer + imgIndex * m_templateH*m_templateW + row * m_templateW, temData, m_templateW * sizeof(float));
}
}
// 将kernel的数据拷贝到device
m_error = cudaMemcpy((void*)m_kernelDeviceBuffer, (void*)m_kernelHostBuffer, m_outChannelNum * m_templateH * m_templateW * sizeof(float), cudaMemcpyHostToDevice);
if (m_error != cudaSuccess) {
std::cout << "Failed to transfer convolutional kernel data saved on host to Device!" << std::endl;
return false;
}
return true;
}
bool cudnnConvObject::performConOperat(cv::Mat convImage, std::vector<cv::Mat>& convResultList)
{
if (convImage.empty()) {
std::cout << "The image fed into the convolution calculation is empty!" << std::endl;
return false;
}
if (convImage.channels() == 3) {
cv::cvtColor(convImage, convImage, cv::COLOR_BGR2GRAY);
}
convImage.convertTo(convImage, CV_32FC1);
if (convImage.rows != m_height || convImage.cols != m_width || convImage.channels() != m_inChannelNum) {
std::cout << "The image attributes fed into the convolution do not match the initialized image parameters!" << std::endl;
return false;
}
// 将图像数据写入到缓存区中
for (int row = 0; row < convImage.rows; row++) {
float* imgData = convImage.ptr<float>(row);
memcpy(m_inHostBuffer + row * convImage.cols, imgData, convImage.cols * sizeof(float));
}
m_error = cudaMemcpy((void*)m_inDeviceBuffer, (void*)m_inHostBuffer, m_height * m_width * sizeof(float), cudaMemcpyHostToDevice);
if (m_error != cudaSuccess) {
std::cout << "Failed to transfer image data from Host to Device!" << std::endl;
return false;
}
float alpha = 1.; // 所获得的卷积结果图像将被归一化到0-1之间。若为255.就是0-255之间
float beta = 0.;
m_status = cudnnConvolutionForward(m_hcudnn, &alpha, m_tensorIn, m_inDeviceBuffer, m_kernel, m_kernelDeviceBuffer, m_conv,
m_convAlgorithm.algo, m_cworkSpace, m_workspaceSize, &beta, m_tensorOut, m_outDeviceBuffer);
if (m_status != CUDNN_STATUS_SUCCESS) {
std::cout << "Failure to perform convolutional computation. Make sure that the convolution object has been created and not released first!!" << std::endl;
return false;
}
// Transfer convolution results saved on Device to Host
m_error = cudaMemcpy((void*)m_outHostBuffer, (void*)m_outDeviceBuffer, m_outChannelNum * m_height * m_width * sizeof(float), cudaMemcpyDeviceToHost);
if (m_error != cudaSuccess) {
std::cout << "Transferring convolution results at device to host fails!" << std::endl;
return false;
}
// 将结果转换为Mat数组
convResultList.clear();
for (int num = 0; num < m_outChannelNum; num++) {
cv::Mat convImg(m_height, m_width, CV_32FC1, m_outHostBuffer + num * (m_height * m_width));
cv::normalize(convImg, convImg, 1., 0., cv::NORM_MINMAX);
convResultList.emplace_back(convImg);
}
return true;
}