CUDA NPP/核函数图像灰度处理

dllmayday

已于 2024-03-29 17:39:47 修改

阅读量142

点赞数 2

分类专栏： CUDA 文章标签： opencv 计算机视觉图像处理

于 2024-03-29 17:38:56 首次发布

本文链接：https://blog.csdn.net/sinat_34665848/article/details/137148389

版权

CUDA 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

NPP

#include <iostream>
#include <opencv2/opencv.hpp>
#include <npp.h>
#include <nppi.h>

int main() {
    cv::Mat inputImage = cv::imread("input_image.jpg", cv::IMREAD_COLOR);

    if (inputImage.empty()) {
        std::cerr << "Error: Could not read the image file." << std::endl;
        return -1;
    }

    int width = inputImage.cols;
    int height = inputImage.rows;
    int size = width * height * inputImage.channels();

    unsigned char *inputImagePtr, *outputImagePtr;
    unsigned char *dev_inputImage, *dev_outputImage;

    inputImagePtr = inputImage.data;
    outputImagePtr = new unsigned char[size];

    cudaMalloc(&dev_inputImage, size);
    cudaMalloc(&dev_outputImage, size);

    cudaMemcpy(dev_inputImage, inputImagePtr, size, cudaMemcpyHostToDevice);

    NppiSize imageSize = { width, height };
    int srcStep = width * inputImage.channels();
    int dstStep = width * sizeof(unsigned char);

    // Convert to grayscale using NPP
    NppiSize roiSize = { width, height };
    nppiRGBToGray_8u_C3C1R(dev_inputImage, srcStep, dev_outputImage, dstStep, roiSize);

    cudaMemcpy(outputImagePtr, dev_outputImage, size, cudaMemcpyDeviceToHost);

    cv::Mat outputImage(height, width, CV_8UC1, outputImagePtr);

    cv::imshow("Input Image", inputImage);
    cv::imshow("Output Image (Grayscale)", outputImage);
    cv::waitKey(0);

    cudaFree(dev_inputImage);
    cudaFree(dev_outputImage);
    delete[] outputImagePtr;

    return 0;
}

##核函数

#include <iostream>
#include <opencv2/opencv.hpp>
 //核函数
__global__ void grayscale(unsigned char *inputImage, unsigned char *outputImage, int width, int height) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x < width && y < height) {
        int tid = y * width + x;
        outputImage[tid] = 0.299f * inputImage[3 * tid] + 0.587f * inputImage[3 * tid + 1] + 0.114f * inputImage[3 * tid + 2];
    }
}

int main() {
    cv::Mat inputImage = cv::imread("input_image.jpg", cv::IMREAD_COLOR);

    if (inputImage.empty()) {
        std::cerr << "Error: Could not read the image file." << std::endl;
        return -1;
    }

    int width = inputImage.cols;
    int height = inputImage.rows;
    int size = width * height * inputImage.channels();

    unsigned char *inputImagePtr, *outputImagePtr;
    unsigned char *dev_inputImage, *dev_outputImage;

    inputImagePtr = inputImage.data;
    outputImagePtr = new unsigned char[size];
	/* 申请设备内存 */
    cudaMalloc(&dev_inputImage, size);
    cudaMalloc(&dev_outputImage, size);
	/* 将主机内存数据复制到设备内存 */
    cudaMemcpy(dev_inputImage, inputImagePtr, size, cudaMemcpyHostToDevice);
	/*设置设备的线程数，并调用核函数*/
    dim3 threadsPerBlock(16, 16);
    dim3 numBlocks((width + threadsPerBlock.x - 1) / threadsPerBlock.x, (height + threadsPerBlock.y - 1) / threadsPerBlock.y);
    grayscale<<<numBlocks, threadsPerBlock>>>(dev_inputImage, dev_outputImage, width, height);
    
	/*将设备内存数据复制到主机内存 */
    cudaMemcpy(outputImagePtr, dev_outputImage, size, cudaMemcpyDeviceToHost);

    cv::Mat outputImage(height, width, CV_8UC1, outputImagePtr);

    cv::imshow("Input Image", inputImage);
    cv::imshow("Output Image (Grayscale)", outputImage);
    cv::waitKey(0);
    
	/* 释放设备内存 */
    cudaFree(dev_inputImage);
    cudaFree(dev_outputImage);
    delete[] outputImagePtr;

    return 0;
}