高斯滤波 从C++多线程到CUDA

原理不做解释
有兴趣的看其他人的文章,讲的都很好
直接上代码

C++


//Created by pumao on 2021/5/8.

#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <cmath>

using namespace std;
using namespace cv;

void gaussian(const Mat &img_in,Mat &img_out,const int &size,const float &sigma)
{
    //comp array
    img_out = Mat::zeros(img_in.size(),CV_8UC1);
    float arr[size * size];
    const auto size_2 = size >> 1;
    auto getGuassionArray=[&]()
            {
                double sum = 0.0;
                auto sigma_2 = sigma * sigma;
                for(int i{}; i < size; ++i)
                {
                    auto dx = i - size;
                    for(int j{}; j < size; ++j)
                    {
                        auto dy = j - size;
                        arr[i * size + j] = exp(-(dx*dx+dy*dy) / (sigma_2 * 2));
                        sum += arr[i * size + j];
                    }
                }
                for(size_t i{}; i < size; ++i)
                {
                    for(size_t j{}; j < size; ++j)
                    {
                        arr[i * size + j] /= sum;
                    }
                }
            };

    getGuassionArray();

    for(size_t i{};i<size;++i)
    {
        for(size_t j{};j<size;++j)
        {
        cout << arr[i * size + j] << " ";
        }
        cout<<endl;
    }

    for(auto i{size_2}; i < img_in.rows - size_2; ++i)
    {
        auto out_p = &img_out.data[i * img_in.cols];
        for(auto j{size_2}; j < img_in.cols - size_2; ++j)
        {
            float sum = 0.0;
            for(int y{}; y < size; ++y)
            {
                auto in_p = &img_in.data[(i+y) * img_in.cols + j];
                for(int x{}; x < size; ++x)
                {
                    sum += *(in_p + x) * arr[x * size + y];
                }
            }
            *(out_p + j)=(char)sum;
        }
    }
}

int main()
{
    auto img =imread("../123.jpg");
    Mat img_gray;
    cvtColor(img, img_gray, CV_BGR2GRAY);
    Mat _gaussian;
    gaussian(img_gray, _gaussian,5,100);
    imwrite("../gaussian.jpg",_gaussian);
}

C++多线程

//
// Created by pumao on 2021/5/8.
//
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <cmath>
#include <thread>
#include <vector>

using namespace std;
using namespace cv;

void gaussian_thread(const Mat &img_in,Mat &img_out,const int &size,const float &sigma)
{
    img_out = Mat::zeros(img_in.size(),CV_8UC1);
    float arr[size * size];
    const auto size_2 = size >> 1;
    const auto max_thread = thread::hardware_concurrency();
    vector<thread> thread_bar;
    const auto t_rows = img_in.rows / (max_thread);
    auto getGuassionArray=[&]()
    {
        double sum = 0.0;
        auto sigma_2 = sigma * sigma;
        for(int i{}; i < size; ++i)
        {
            auto dx = i - size;
            for(int j{}; j < size; ++j)
            {
                auto dy = j - size;
                arr[i * size + j] = exp(-(dx*dx+dy*dy) / (sigma_2 * 2));
                sum += arr[i * size + j];
            }
        }
        for(size_t i{}; i < size; ++i)
        {
            for(size_t j{}; j < size; ++j)
            {
                arr[i * size + j] /= sum;
            }
        }
    };
    getGuassionArray();

    auto compGuassion_thread = [&](const int thread_id)
    {
        for(auto i{ t_rows * (thread_id - 1)}; i < t_rows *thread_id; ++i)
        {
            auto out_p = &img_out.data[i * img_in.cols];
            for(auto j{size_2}; j < img_in.cols - size_2; ++j)
            {
                float sum = 0.0;
                for(int y{}; y < size; ++y)
                {
                    auto in_p = &img_in.data[(i+y) * img_in.cols + j];
                    for(int x{}; x < size; ++x)
                    {
                        sum += *(in_p + x) * arr[x * size + y];
                    }
                }
                *(out_p + j)=(char)sum;
            }
        }
    };
    for(int thread_id = 1; thread_id <= max_thread; ++thread_id)
    {
        thread_bar.emplace_back(compGuassion_thread, thread_id);
    }
    for(auto &i : thread_bar)
        i.join();
}

int main()
{
    auto img =imread("../123.jpg");
    Mat img_gray;
    cvtColor(img, img_gray, CV_BGR2GRAY);
    Mat _gaussian;
    gaussian_thread(img_gray, _gaussian,7,100);
    imwrite("../gaussian_thread.jpg",_gaussian);
}

CUDA

#include<opencv2/opencv.hpp>
#include<iostream>
#include<cuda_runtime.h>
#include<device_launch_parameters.h>
#include<cmath>

using namespace cv;
using namespace std;


__global__ void gaussian_kernel(uchar *d_img_in, uchar *d_img_out, float *d_arr,
                                const int img_cols, const int img_rows, const int size)
{
    const auto col_id = blockDim.x*threadIdx.y + threadIdx.x;
    const auto row_id = gridDim.x*blockIdx.y + blockIdx.x;
    if (col_id < img_cols - size && row_id < img_rows - size)
    {
        float sum{};
        for (int y = 0; y < size; ++y)
        {
            for (int x = 0; x < size; ++x)
            {
                sum += d_arr[y * size + x] * d_img_in[col_id + x + (row_id + y)* img_cols];
            }
        }
        d_img_out[col_id + row_id * img_cols] = (uchar)sum;
    }


}


void gaussian_cuda(const Mat &img_in, Mat &img_out, const int &size, const float &sigma, int block_size = 16)
{
    bool ifdebug = false;

    const int img_sizeof = img_in.cols*img_in.rows * sizeof(uchar);
    const int arr_sizeof = size * size * sizeof(float);
    img_out = Mat::zeros(img_in.size(), CV_8UC1);
    float *arr = (float*)malloc(size*size * sizeof(float));
    auto getGuassionArray = [&]()
    {
        float sum = 0.0;
        auto sigma_2 = sigma * sigma;
        for (int i{}; i < size; ++i)
        {
            auto dx = i - size;
            for (int j{}; j < size; ++j)
            {
                auto dy = j - size;
                arr[i * size + j] = exp(-(dx*dx + dy * dy) / (sigma_2 * 2));
                sum += arr[i * size + j];
            }
        }
        for (size_t i{}; i < size; ++i)
        {
            for (size_t j{}; j < size; ++j)
            {
                arr[i * size + j] /= sum;
            }
        }
    };
    getGuassionArray();

    if(ifdebug)
        for (int i{}; i < size; ++i)
        {
            for (int j{}; j < size; ++j)
                cout << arr[j + i * size] << " ";
            cout << endl;
        }

    float *d_arr;		//之后做成共享内存
    uchar *d_img_in;
    uchar *d_img_out;
    cudaMalloc(&d_arr, arr_sizeof);
    cudaMalloc(&d_img_in,img_sizeof);
    cudaMalloc(&d_img_out,img_sizeof);
    cudaMemcpy(d_arr, arr, arr_sizeof, cudaMemcpyHostToDevice);
    cudaMemcpy(d_img_in, img_in.data, img_sizeof, cudaMemcpyHostToDevice);

    dim3 block{ (uint)32, (uint)ceil((double)img_in.cols / 32) };
    dim3 grid{(uint)ceil((double)img_in.rows / block_size),(uint)block_size };


    gaussian_kernel << < grid, block >> >
    (d_img_in, d_img_out, d_arr, img_in.cols, img_in.rows, size);

    cudaMemcpy(img_out.data, d_img_out, img_sizeof, cudaMemcpyDeviceToHost);
    cudaFree(d_arr);
    cudaFree(d_img_in);
    cudaFree(d_img_out);
    free(arr);
}

int main()
{

    auto img = imread("../123.jpg", IMREAD_GRAYSCALE);
    auto img2 {Mat::zeros(33,33, CV_8UC1)};
    Mat gaussian;
    gaussian_cuda(img, gaussian, 7, 100);
    imwrite("../gaussian_cuda.jpg", gaussian);

}

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值