【CUDA】灰度图像处理——使用CUDA来加速图像的灰度化处理

最新推荐文章于 2024-09-22 17:02:28 发布

水瓶子里不装水

最新推荐文章于 2024-09-22 17:02:28 发布

阅读量759

点赞数 15

文章标签：图像处理人工智能深度学习

本文链接：https://blog.csdn.net/m0_74152439/article/details/141025102

版权

在RGB彩色图像中，一种彩色由R（红色），G（绿色），B（蓝色）三原色按比例混合而成。图像的基本单元是一个像素，一个像素需要3块表示，分别代表R，G，B，如果8为表示一个颜色，就由0-255区分不同亮度的某种原色。

灰度图像是用不同饱和度的黑色来表示每个图像点，比如用8位 0-255数字表示“灰色”程度，每个像素点只需要一个灰度值，8位即可，这样一个3X3的灰度图，只需要9个byte就能保存RGB值和灰度的转换，实际上是人眼对于彩色的感觉到亮度感觉的转换，这是一个心理学问题，有一个公式：

Grey = 0.299*R + 0.587*G + 0.114*B

根据这个公式，依次读取每个像素点的R，G，B值，进行计算灰度值（转换为整型数），将灰度值赋值给新图像的相应位置，所有像素点遍历一遍后完成转换。

函数定义与具体实现

//graygpu.cu

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include "graygpu.h"

using namespace cv;
using namespace std;

#define THREAD_NUM 16

// 核函数定义
__global__ void rgb2grayInCuda(uchar3* dataIn, unsigned char* dataOut, int imgHeight, int imgWidth)
{
    int xIndex = threadIdx.x + blockIdx.x * blockDim.x;	
    int yIndex = threadIdx.y + blockIdx.y * blockDim.y;	
    if (xIndex < imgWidth && yIndex < imgHeight)
    {
        uchar3 rgb = dataIn[yIndex * imgWidth + xIndex];
        dataOut[yIndex * imgWidth + xIndex] = static_cast<unsigned char>(0.299f * rgb.x + 0.587f * rgb.y + 0.114f * rgb.z);
    }
}

extern "C" int CUDAfunc(const char* inputfilename) {
    // 传入图片
    Mat srcImg = imread(inputfilename, IMREAD_COLOR);

    int imgHeight = srcImg.rows;
    int imgWidth = srcImg.cols;

    Mat grayImg(imgHeight, imgWidth, CV_8UC1, Scalar(0)); // 输出灰度图

    // 在GPU中开辟输入输出空间
    uchar3* d_in;
    unsigned char* d_out;

    // 分配内存空间
    cudaError_t status;
    status = cudaMalloc((void**)&d_in, imgHeight * imgWidth * sizeof(uchar3));


    status = cudaMalloc((void**)&d_out, imgHeight * imgWidth * sizeof(unsigned char));
  
    // 将图像数据传入GPU中
    status = cudaMemcpy(d_in, srcImg.data, imgHeight * imgWidth * sizeof(uchar3), cudaMemcpyHostToDevice);

    dim3 threadsPerBlock(THREAD_NUM, THREAD_NUM);
    dim3 blocksPerGrid((imgWidth + THREAD_NUM - 1) / THREAD_NUM, (imgHeight + THREAD_NUM - 1) / THREAD_NUM);

    // 调用核函数
    rgb2grayInCuda << <blocksPerGrid, threadsPerBlock >> > (d_in, d_out, imgHeight, imgWidth);
    status = cudaDeviceSynchronize();

    // 将数据从GPU传回CPU
    status = cudaMemcpy(grayImg.data, d_out, imgHeight * imgWidth * sizeof(unsigned char), cudaMemcpyDeviceToHost);

    // 释放内存
    cudaFree(d_in);
    cudaFree(d_out);

    // 输出灰度图片
    string outputFilename = string(inputfilename) + "gray.png";
    imwrite(outputFilename, grayImg);

    return 0;
}

CUDA核函数 (`rgb2grayInCuda`)

这个函数是在GPU上并行执行的核心部分。核函数接收一个彩色图像的输入数据 dataIn（每个像素由三个通道的 uchar3 表示），并输出一个灰度图像 dataOut（每个像素为一个 unsigned char）。

核函数中，每个线程处理一个像素，计算该像素的灰度值。它使用了线性加权的方法，这是一种标准的将RGB值转换为灰度值的方法。也就是上面我们说的Grey = 0.299*R + 0.587*G + 0.114*B这个公式。

// 核函数定义
__global__ void rgb2grayInCuda(uchar3* dataIn, unsigned char* dataOut, int imgHeight, int imgWidth)
{
    int xIndex = threadIdx.x + blockIdx.x * blockDim.x;	
    int yIndex = threadIdx.y + blockIdx.y * blockDim.y;	
    if (xIndex < imgWidth && yIndex < imgHeight)
    {
        uchar3 rgb = dataIn[yIndex * imgWidth + xIndex];
        dataOut[yIndex * imgWidth + xIndex] = static_cast<unsigned char>(0.299f * rgb.x + 0.587f * rgb.y + 0.114f * rgb.z);
    }
}

但是由于CUDA中的计算是以浮点进行的，而结果需要存储在无符号字符（unsigned char）中，这可能导致精度损失。因此需要将浮点结果显式转换为 unsigned char 类型：

dataOut[yIndex * imgWidth + xIndex] = static_cast<unsigned char>(
0.299f * rgb.x + 0.587f * rgb.y + 0.114f * rgb.z
);

CUDA接口函数 (`CUDAfunc`)

这个函数作为CUDA处理的接口，其主要工作是：

使用OpenCV的 imread 函数加载输入图片。
分配CUDA内存来存储输入图像和输出图像。
将输入图像数据从主机内存复制到GPU内存。
配置CUDA核函数的执行参数（线程块大小和网格大小）。
调用核函数 rgb2grayInCuda 来执行实际的灰度化处理。
将处理后的灰度图像数据从GPU内存复制回主机内存。
释放CUDA内存。
使用OpenCV的 imwrite 函数将灰度图像保存到磁盘。

extern "C" int CUDAfunc(const char* inputfilename) {
    // 传入图片
    Mat srcImg = imread(inputfilename, IMREAD_COLOR);

    int imgHeight = srcImg.rows;
    int imgWidth = srcImg.cols;

    Mat grayImg(imgHeight, imgWidth, CV_8UC1, Scalar(0)); // 输出灰度图

    // 在GPU中开辟输入输出空间
    uchar3* d_in;
    unsigned char* d_out;

    // 分配内存空间
    cudaError_t status;
    status = cudaMalloc((void**)&d_in, imgHeight * imgWidth * sizeof(uchar3));


    status = cudaMalloc((void**)&d_out, imgHeight * imgWidth * sizeof(unsigned char));
  
    // 将图像数据传入GPU中
    status = cudaMemcpy(d_in, srcImg.data, imgHeight * imgWidth * sizeof(uchar3), cudaMemcpyHostToDevice);

    dim3 threadsPerBlock(THREAD_NUM, THREAD_NUM);
    dim3 blocksPerGrid((imgWidth + THREAD_NUM - 1) / THREAD_NUM, (imgHeight + THREAD_NUM - 1) / THREAD_NUM);

    // 调用核函数
    rgb2grayInCuda << <blocksPerGrid, threadsPerBlock >> > (d_in, d_out, imgHeight, imgWidth);
    status = cudaDeviceSynchronize();

    // 将数据从GPU传回CPU
    status = cudaMemcpy(grayImg.data, d_out, imgHeight * imgWidth * sizeof(unsigned char), cudaMemcpyDeviceToHost);

    // 释放内存
    cudaFree(d_in);
    cudaFree(d_out);

    // 输出灰度图片
    string outputFilename = string(inputfilename) + "gray.png";
    imwrite(outputFilename, grayImg);

    return 0;
}

头文件对核函数以及接口函数的声明

声明了 CUDAfunc 函数，它接受一个 const char* 类型的参数，这个参数是输入图像文件的路径。int 返回类型意味着函数将返回一个整数，通常用来表示函数调用是否成功。
声明了 __global__ 函数 rgb2grayInCuda，这是一个CUDA核函数，它在GPU上并行执行。它接受一个 uchar3* 类型的输入图像数据，一个 unsigned char* 类型的输出图像数据，以及图像的高度和宽度。__global__ 关键字告诉CUDA编译器，这个函数将在GPU上执行，但可以从CPU代码中调用。

graygpu.h

#ifndef GRAYGPU_H
#define GRAYGPU_H

#ifdef __cplusplus
extern "C" {
#endif

int CUDAfunc(const char* inputfilename);
__global__ void rgb2grayInCuda(uchar3* dataIn, unsigned char* dataOut, int imgHeight, int imgWidth);

#ifdef __cplusplus
}
#endif

#endif // GRAYGPU_H

其中#ifdef __cplusplus 和 extern "C" 是用来处理C++和C编译器之间的差异。当我们在C++代码中包含这个头文件时，__cplusplus 宏会被定义，因此 extern "C" 会被使用。extern "C" 告诉C++编译器这些函数应该使用C语言的链接方式。这是为了防止C++编译器改变函数名（即名称修饰），确保如果其他C语言代码需要链接这些函数时，能够正确找到它。

主函数

主函数是程序的入口点。它定义了要处理的图像的文件路径，并调用 CUDAfunc 来处理该图像。如果 CUDAfunc 返回非零值，表示处理失败，主函数将打印错误消息。

main.cu

#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include "graygpu.h"

using namespace std;

int main() {
    // 指定要处理的图片文件名
    string inputfilename = "C:\\Users\\Admin\\Desktop\\zhangshuhan\\LearnCUDA\\CUDAtest3\\Picture\\wuhu.png";

    cout << "Waiting: " << inputfilename << endl;
    if (CUDAfunc(inputfilename.c_str()) != 0) {
        cerr << "CUDA processing failed." << endl;
        return -1;
    }

    return 0;
}

CMake 配置文件

这段代码是一个 CMake 配置文件，用于设置和构建一个使用CUDA和OpenCV的C++项目。CMake是一个跨平台的自动化构建系统，它使用CMakeLists.txt文件来配置编译和链接参数。

cmakelist.txt

# 指定最小 CMake 版本
cmake_minimum_required(VERSION 3.10)

# 项目名称和版本
project(MyCUDAProject VERSION 1.0)

# 可执行文件输出路径
set(EXECUTABLE_OUTPUT_PATH "C:\\Users\\Admin\\Desktop\\zhangshuhan\\LearnCUDA\\CUDAtest3\\graygpu\\bin")

# 启用 C++11 支持
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED True)

# 查找CUDA
find_package(CUDA REQUIRED)

# 设置OpenCV的查找路径并查找OpenCV
set(OpenCV_DIR "E:/OpenCV-3.4/opencv/build/x64/vc14/lib")
find_package(OpenCV REQUIRED)

if(CUDA_FOUND)
    # 如果编译器不支持CUDA，添加警告并允许继续
    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -allow-unsupported-compiler)
endif()

# 包含CUDA和OpenCV的头文件路径以及自定义头文件路径
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/include ${OpenCV_INCLUDE_DIRS} )


# 打印查找到的路径
message(STATUS "CUDA_INCLUDE_DIRS: ${CUDA_INCLUDE_DIRS}")
message(STATUS "OpenCV_INCLUDE_DIRS: ${OpenCV_INCLUDE_DIRS}")

# 定义CUDA源文件
set(CUDA_SOURCES "${CMAKE_SOURCE_DIR}/src/main.cu" "${CMAKE_SOURCE_DIR}/src/graygpu.cu")

# 添加CUDA库
# cuda_add_library(graycpu "src/graycpu.cu")

# 编译CUDA源文件为可执行文件
cuda_add_executable(MyCUDAExecutable ${CUDA_SOURCES})

# 链接CUDA库和OpenCV库
target_link_libraries(MyCUDAExecutable ${CUDA_LIBRARIES} ${OpenCV_LIBS} ${CUDA_cudart_LIBRARY})

# 打印CUDA库信息
message(STATUS "CUDA_LIBRARIES: ${CUDA_LIBRARIES}")

这个CMake配置文件为使用CUDA和OpenCV的项目提供了一个基本的构建环境。通过运行CMake，它将自动检测环境、查找必要的依赖，并设置编译器和链接器参数，以便构建项目。