CUDA+OpenCV简单处理图像

最新推荐文章于 2024-07-19 16:16:34 发布

YaoJiawei329

最新推荐文章于 2024-07-19 16:16:34 发布

阅读量2.7k

点赞数 3

文章标签： opencv cuda

本文链接：https://blog.csdn.net/YaoJiawei329/article/details/108078241

版权

CUDA+OpenCV简单处理图像

本文所使用的环境：Ubuntu18.04+CUDA10.1+OpenCV3.4.11+QtCreator5
本文针对的是在OpenCV中最常用的图片类型：CV_8UC1和CV_8UC3。在cpu上，可以用cv::Mat::at或者cv::Mat::ptr或者迭代器来对图像进行逐个像素的访问和处理，但是想要移植到gpu里，又不使用cv::cuda模块，就只能用基本数据类型(比如uchar和uchar3)，关键在于指针的传递。为了照顾初学者，先不使用shared memory和cuda stream等概念，只用global memory和默认流。

查到的资料说，无论图片是什么类型的，它的指针，即cv::Mat::data，默认都是uchar* 类型(我做过测试验证过，没出毛病，应该跟数据存储方式有关)，用的时候可以用强制类型转换成需要的类型，比如uchar3* 。

先上原图

35.jpg

//这是用uchar*类型的指针进行传递    BRG变成RGB
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/core.hpp>
#include <cuda.h>
#include <cuda_runtime.h>

using namespace std;


__global__ void deal_image1(const uchar* d_in, uchar* d_out, int width, int height)
{
    for(int row = blockDim.y * blockIdx.y + threadIdx.y; row < height; row += gridDim.y * blockDim.y)
        for(int col = blockDim.x * blockIdx.x + threadIdx.x; col < width; col += gridDim.x * blockDim.x)
        {
            d_out[(row * width + col) * 3 + 0] = d_in[(row * width + col) * 3 + 2];
            d_out[(row * width + col) * 3 + 1] = d_in[(row * width + col) * 3 + 1];
            d_out[(row * width + col) * 3 + 2] = d_in[(row * width + col) * 3 + 0];
        }
}

int main()
{
    cv::Mat image1 = cv::imread("../../pictures/35.jpg");
    cv::Size size(540, 810);
    cv::resize(image1, image1, size);//原图太大，调整一下大小
    int height = image1.rows;
    int width = image1.cols;
    int channel = image1.channels();
    size_t image1_size = sizeof(uchar) * height * width * channel;

	if(!image1.isContinuous())
    {
        cout << "img1 is not continuous." << endl;
    }
    
    cv::Mat image1_(height, width, CV_8UC3);//创建好一个和image一样大小的图片用于保存处理完之后的图片
    
    uchar* d_in = NULL;
    uchar* d_out = NULL;

    cudaMalloc((void**)&d_in, image1_size);
    cudaMalloc((void**)&d_out, image1_size);
    cudaMemcpy(d_in, image1.data, image1_size, cudaMemcpyHostToDevice);

    dim3 dimGrid(8, 8, 1);
    dim3 dimBlock(32, 32, 1);

    deal_image1 << <dimGrid, dimBlock>> >(d_in, d_out, width, height);

    cudaMemcpy(image1_.data, d_out, image1_size, cudaMemcpyDeviceToHost);

    cv::imshow("image1_", image1_);
    cv::imwrite("../../pictures/35_RGB.jpg",image1_);
    cv::waitKey();

    cudaFree(d_in);
    cudaFree(d_out);

   return 0;
}

效果图
在这里插入图片描述
再是用uchar3*类型指针进行传递的代码

//这是强制类型转换成uchar3*再进行传递
#include <iostream>
#include <opencv2/opencv.hpp>
#include "opencv2/core.hpp"
#include "cuda.h"
#include "cuda_runtime.h"

using namespace std;


__global__ void deal_image1(const uchar3* d_in, uchar3* d_out, int width, int height)
{
    for(int row = blockDim.y * blockIdx.y + threadIdx.y; row < height; row += gridDim.y * blockDim.y)
        for(int col = blockDim.x * blockIdx.x + threadIdx.x; col < width; col += gridDim.x * blockDim.x)
        {
            d_out[row * width + col].x = d_in[row * width + col].z;
            d_out[row * width + col].y = d_in[row * width + col].y;
            d_out[row * width + col].z = d_in[row * width + col].x;
        }
}

int main()
{
    cv::Mat image1 = cv::imread("../../pictures/35.jpg");
    cv::Size size(540, 810);
    cv::resize(image1, image1, size);
    int height = image1.rows;
    int width = image1.cols;
    size_t image1_size = sizeof(uchar3) * height * width;

	if(！image1.isContinuous())
    {
        cout << "img1 is not continuous." << endl;
    }
    
    cv::Mat image1_(height, width, CV_8UC3);//创建好一个和image一样大小的图片用于保存处理完之后的图片
    uchar3* d_in = NULL;
    uchar3* d_out = NULL;
    uchar3* h_out = (uchar3*)image1_.data;

    cudaMalloc((void**)&d_in, image1_size);
    cudaMalloc((void**)&d_out, image1_size);
    cudaMemcpy(d_in, (uchar3*)image1.data, image1_size, cudaMemcpyHostToDevice);

    dim3 dimGrid(8, 8, 1);
    dim3 dimBlock(32, 32, 1);

    deal_image1 << <dimGrid, dimBlock>> >(d_in, d_out, width, height);

    cudaMemcpy(h_out, d_out, image1_size, cudaMemcpyDeviceToHost);

    cv::imshow("image1_", image1_);
    cv::waitKey();

    cudaFree(d_in);
    cudaFree(d_out);

   return 0;
}

效果是一样的。灰度图只需要用到uchar*指针，我这里就不赘述了。最后写一个BRG换成RGB，并且图片向左旋转90度的，其实都差不多，有点不一样而已。

//BRG转RGB，并左转90度
#include <iostream>
#include <opencv2/opencv.hpp>
#include "opencv2/core.hpp"
#include "cuda.h"
#include "cuda_runtime.h"

using namespace std;


__global__ void deal_image1(const uchar3* d_in, uchar3* d_out, int width, int height)
{
    for(int row = blockDim.y * blockIdx.y + threadIdx.y; row < height; row += gridDim.y * blockDim.y)
        for(int col = blockDim.x * blockIdx.x + threadIdx.x; col < width; col += gridDim.x * blockDim.x)
        {
            d_out[(width - col) * height + row].x = d_in[row * width + col].z;
            d_out[(width - col) * height + row].y = d_in[row * width + col].y;
            d_out[(width - col) * height + row].z = d_in[row * width + col].x;
        }
}

int main()
{
    cv::Mat image1 = cv::imread("../../pictures/35.jpg");
    int width = image1.cols;
    int height = image1.rows;
    size_t size_image1 = sizeof(uchar3) * width * height;
    cv::Mat image1_(width, height, CV_8UC3);//这里不一样

    uchar3* d_in = NULL;
    uchar3* d_out = NULL;
    uchar3* h_out = (uchar3*)image1_.data;

    cudaMalloc((void**)&d_in, size_image1);
    cudaMalloc((void**)&d_out, size_image1);
    cudaMemcpy(d_in, (uchar3*)image1.data, size_image1, cudaMemcpyHostToDevice);

    dim3 dimGrid(8, 8, 1);
    dim3 dimBlock(32, 32, 1);
    deal_image1 << <dimGrid, dimBlock>> >(d_in, d_out, width, height);

    cudaMemcpy(h_out, d_out, size_image1, cudaMemcpyDeviceToHost);

    cv::imshow("image1_", image1_);
    cv::imwrite("../../pictures/35_RGB_90.jpg",image1_);
    cv::waitKey();

    cudaFree(d_in);
    cudaFree(d_out);

    return 0;
}

效果图

总结一下，如果是CV_8UC3类型的图片，可以用uchar* 或者uchar3* 类型的指针进行传递；但是如果是CV_8UC1类型的图片，即单通道，只能用uchar* 类型的指针进行传递，不能用uchar3* 。另外可以看出，如果不用cv::cuda模块，自己写的代码量会很大，如果图片要进行很复杂的处理，将是个大工程！

最后的最后，附上CMakeLists.txt

cmake_minimum_required(VERSION 3.5)

project(cuda_c_test_9 LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

include_directories(include
${CUDA_INCLUDE_DIRS}
${OpenCV_INCLUDE_DIRS}
)

link_directories(${OpenCV_LIBRARY_DIRS})

find_package(CUDA REQUIRED)
find_package(OpenCV REQUIRED)

INCLUDE(/home/psdz/cmake-3.9.0/Modules/FindCUDA.cmake)

FILE(GLOB SOURCES "*.cu" "*.cpp" "*.c" "*.h")

set(CUDA_NVCC_FLAGS "-g -G")

CUDA_ADD_EXECUTABLE(cuda_c_test_9 main.cu)

target_link_libraries(cuda_c_test_9 ${OpenCV_LIBS})

主要参考
https://blog.csdn.net/kelvin_yan/article/details/48315175
https://www.cnblogs.com/dwdxdy/p/3528711.html
https://blog.csdn.net/lingsuifenfei123/article/details/83444159

YaoJiawei329

关注

3
点赞
踩
12

收藏

觉得还不错? 一键收藏
0
评论
CUDA+OpenCV简单处理图像

CUDA+OpenCV简单处理图像本文用到的环境：Ubuntu18.04+CUDA10.1+OpenCV3.4.11+QtCreator5本文针对的是在OpenCV中最常用的图片类型：CV_8UC1和CV_8UC3。在cpu上，可以用cv::Mat::at或者cv::Mat::ptr来对图像进行逐个像素的访问和处理，但是想要移植到gpu里，又不使用cv::cuda模块，关键在于指针的传递。...
复制链接

扫一扫