原理不做解释
有兴趣的看其他人的文章,讲的都很好
直接上代码
C++
//Created by pumao on 2021/5/8.
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <cmath>
using namespace std;
using namespace cv;
void gaussian(const Mat &img_in,Mat &img_out,const int &size,const float &sigma)
{
//comp array
img_out = Mat::zeros(img_in.size(),CV_8UC1);
float arr[size * size];
const auto size_2 = size >> 1;
auto getGuassionArray=[&]()
{
double sum = 0.0;
auto sigma_2 = sigma * sigma;
for(int i{}; i < size; ++i)
{
auto dx = i - size;
for(int j{}; j < size; ++j)
{
auto dy = j - size;
arr[i * size + j] = exp(-(dx*dx+dy*dy) / (sigma_2 * 2));
sum += arr[i * size + j];
}
}
for(size_t i{}; i < size; ++i)
{
for(size_t j{}; j < size; ++j)
{
arr[i * size + j] /= sum;
}
}
};
getGuassionArray();
for(size_t i{};i<size;++i)
{
for(size_t j{};j<size;++j)
{
cout << arr[i * size + j] << " ";
}
cout<<endl;
}
for(auto i{size_2}; i < img_in.rows - size_2; ++i)
{
auto out_p = &img_out.data[i * img_in.cols];
for(auto j{size_2}; j < img_in.cols - size_2; ++j)
{
float sum = 0.0;
for(int y{}; y < size; ++y)
{
auto in_p = &img_in.data[(i+y) * img_in.cols + j];
for(int x{}; x < size; ++x)
{
sum += *(in_p + x) * arr[x * size + y];
}
}
*(out_p + j)=(char)sum;
}
}
}
int main()
{
auto img =imread("../123.jpg");
Mat img_gray;
cvtColor(img, img_gray, CV_BGR2GRAY);
Mat _gaussian;
gaussian(img_gray, _gaussian,5,100);
imwrite("../gaussian.jpg",_gaussian);
}
C++多线程
//
// Created by pumao on 2021/5/8.
//
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <cmath>
#include <thread>
#include <vector>
using namespace std;
using namespace cv;
void gaussian_thread(const Mat &img_in,Mat &img_out,const int &size,const float &sigma)
{
img_out = Mat::zeros(img_in.size(),CV_8UC1);
float arr[size * size];
const auto size_2 = size >> 1;
const auto max_thread = thread::hardware_concurrency();
vector<thread> thread_bar;
const auto t_rows = img_in.rows / (max_thread);
auto getGuassionArray=[&]()
{
double sum = 0.0;
auto sigma_2 = sigma * sigma;
for(int i{}; i < size; ++i)
{
auto dx = i - size;
for(int j{}; j < size; ++j)
{
auto dy = j - size;
arr[i * size + j] = exp(-(dx*dx+dy*dy) / (sigma_2 * 2));
sum += arr[i * size + j];
}
}
for(size_t i{}; i < size; ++i)
{
for(size_t j{}; j < size; ++j)
{
arr[i * size + j] /= sum;
}
}
};
getGuassionArray();
auto compGuassion_thread = [&](const int thread_id)
{
for(auto i{ t_rows * (thread_id - 1)}; i < t_rows *thread_id; ++i)
{
auto out_p = &img_out.data[i * img_in.cols];
for(auto j{size_2}; j < img_in.cols - size_2; ++j)
{
float sum = 0.0;
for(int y{}; y < size; ++y)
{
auto in_p = &img_in.data[(i+y) * img_in.cols + j];
for(int x{}; x < size; ++x)
{
sum += *(in_p + x) * arr[x * size + y];
}
}
*(out_p + j)=(char)sum;
}
}
};
for(int thread_id = 1; thread_id <= max_thread; ++thread_id)
{
thread_bar.emplace_back(compGuassion_thread, thread_id);
}
for(auto &i : thread_bar)
i.join();
}
int main()
{
auto img =imread("../123.jpg");
Mat img_gray;
cvtColor(img, img_gray, CV_BGR2GRAY);
Mat _gaussian;
gaussian_thread(img_gray, _gaussian,7,100);
imwrite("../gaussian_thread.jpg",_gaussian);
}
CUDA
#include<opencv2/opencv.hpp>
#include<iostream>
#include<cuda_runtime.h>
#include<device_launch_parameters.h>
#include<cmath>
using namespace cv;
using namespace std;
__global__ void gaussian_kernel(uchar *d_img_in, uchar *d_img_out, float *d_arr,
const int img_cols, const int img_rows, const int size)
{
const auto col_id = blockDim.x*threadIdx.y + threadIdx.x;
const auto row_id = gridDim.x*blockIdx.y + blockIdx.x;
if (col_id < img_cols - size && row_id < img_rows - size)
{
float sum{};
for (int y = 0; y < size; ++y)
{
for (int x = 0; x < size; ++x)
{
sum += d_arr[y * size + x] * d_img_in[col_id + x + (row_id + y)* img_cols];
}
}
d_img_out[col_id + row_id * img_cols] = (uchar)sum;
}
}
void gaussian_cuda(const Mat &img_in, Mat &img_out, const int &size, const float &sigma, int block_size = 16)
{
bool ifdebug = false;
const int img_sizeof = img_in.cols*img_in.rows * sizeof(uchar);
const int arr_sizeof = size * size * sizeof(float);
img_out = Mat::zeros(img_in.size(), CV_8UC1);
float *arr = (float*)malloc(size*size * sizeof(float));
auto getGuassionArray = [&]()
{
float sum = 0.0;
auto sigma_2 = sigma * sigma;
for (int i{}; i < size; ++i)
{
auto dx = i - size;
for (int j{}; j < size; ++j)
{
auto dy = j - size;
arr[i * size + j] = exp(-(dx*dx + dy * dy) / (sigma_2 * 2));
sum += arr[i * size + j];
}
}
for (size_t i{}; i < size; ++i)
{
for (size_t j{}; j < size; ++j)
{
arr[i * size + j] /= sum;
}
}
};
getGuassionArray();
if(ifdebug)
for (int i{}; i < size; ++i)
{
for (int j{}; j < size; ++j)
cout << arr[j + i * size] << " ";
cout << endl;
}
float *d_arr; //之后做成共享内存
uchar *d_img_in;
uchar *d_img_out;
cudaMalloc(&d_arr, arr_sizeof);
cudaMalloc(&d_img_in,img_sizeof);
cudaMalloc(&d_img_out,img_sizeof);
cudaMemcpy(d_arr, arr, arr_sizeof, cudaMemcpyHostToDevice);
cudaMemcpy(d_img_in, img_in.data, img_sizeof, cudaMemcpyHostToDevice);
dim3 block{ (uint)32, (uint)ceil((double)img_in.cols / 32) };
dim3 grid{(uint)ceil((double)img_in.rows / block_size),(uint)block_size };
gaussian_kernel << < grid, block >> >
(d_img_in, d_img_out, d_arr, img_in.cols, img_in.rows, size);
cudaMemcpy(img_out.data, d_img_out, img_sizeof, cudaMemcpyDeviceToHost);
cudaFree(d_arr);
cudaFree(d_img_in);
cudaFree(d_img_out);
free(arr);
}
int main()
{
auto img = imread("../123.jpg", IMREAD_GRAYSCALE);
auto img2 {Mat::zeros(33,33, CV_8UC1)};
Mat gaussian;
gaussian_cuda(img, gaussian, 7, 100);
imwrite("../gaussian_cuda.jpg", gaussian);
}