cuda编程-立体匹配算法

最新推荐文章于 2024-08-23 10:01:00 发布

??.?ieC

最新推荐文章于 2024-08-23 10:01:00 发布

阅读量606

点赞数

文章标签：算法

本文链接：https://blog.csdn.net/qq_46054965/article/details/125000957

版权

#include "opencv2/opencv.hpp"
#include "cuda_runtime.h"
using namespace cv;
using namespace std;
const int dpMax = 20;
__device__ int ad(int channels, uchar *left, uchar *right) {
   int sum = 0;
   for (int i = 0; i < channels; i++) {
       sum += abs(left[i] - right[i]);
   }
   return sum / channels;
}
__global__ void kernel_ad(int d, int rows, int cols, int channels, uchar *left, uchar *right, uchar *out) {
   int x = threadIdx.x + blockIdx.x*blockDim.x;
   int y = threadIdx.y + blockIdx.y*blockDim.y;
   int offset = x + y * cols;
   if (x <d || x > cols || y > rows) return;
   out[offset] = ad(channels, &left[offset*channels], &right[offset*channels - d * channels]);
}
__global__ void kernel_wta(int rows, int cols, uchar *dsi, uchar *out) {
   int x = threadIdx.x + blockIdx.x*blockDim.x;
   int y = threadIdx.y + blockIdx.y*blockDim.y;
   int offset = x + y * cols;
   int k = 0;
   for (int i = 0; i < dpMax; i++) {
       if (dsi[i*rows*cols + offset] < dsi[k*rows*cols + offset]) k = i;
   }
   out[offset] = k;
}
__global__ void boxFilter(int rows, int cols, int patSize, uchar* img, uchar *out) {
   int x = threadIdx.x + blockIdx.x*blockDim.x;
   int y = threadIdx.y + blockIdx.y*blockDim.y;
   int offset = x + y * cols;
   int pat[9];
   for (int i = 0; i < patSize*patSize; i++) pat[i] = 1;
   //pat[4] = 1;
   int k = patSize / 2;
   int sum = 0;
   for (int row = 0; row < patSize; row++) {
       for (int col = 0; col < patSize; col++) {
           sum += img[offset + (row - k)*cols + (col - k)]*pat[row*patSize+col];
       }
   }
   out[offset] = sum / (patSize*patSize);
}
int main() {
   Mat left = imread("D:\\cones\\im0.ppm");
   Mat right = imread("D:\\cones\\im1.ppm");
   if (left.empty() == 1 && right.empty() == 1) return -1;
   cout << left.size() << endl;
   imshow("LEFT", left);
   imshow("RIGHT", right);
   int rows = left.rows;
   int cols = left.cols;
   int channels = left.channels();
   uchar *dev_left, *dev_right, *dev_out, *dev_dsi;
   cudaMalloc(&dev_left, rows*cols*channels);
   cudaMalloc(&dev_right, rows*cols*channels);
   cudaMalloc(&dev_out, rows*cols*channels);
   cudaMalloc(&dev_dsi, rows*cols*dpMax);
   cudaMemcpy(dev_left, left.data, rows*cols*channels, cudaMemcpyHostToDevice);
   cudaMemcpy(dev_right, right.data, rows*cols*channels, cudaMemcpyHostToDevice);
   uchar *dev_tmp; cudaMalloc(&dev_tmp, rows*cols*channels);
   dim3 grid((cols + 31) / 32, (rows + 31) / 32);
   dim3 block(32, 32);
   for (int i = 0; i < dpMax; i++)
   {
       kernel_ad << <grid, block >> > (i, rows, cols, channels, dev_left, dev_right, dev_tmp);
       boxFilter << <grid, block >> > (rows, cols, 3, dev_tmp, &dev_dsi[i*rows*cols]);
   }
   kernel_wta << <grid, block >> > (rows, cols, dev_dsi, dev_out);
   Mat img(left.size(), CV_8U);
   cudaMemcpy(img.data, dev_out, rows*cols, cudaMemcpyDeviceToHost);
   imshow("DISPARITY", img * 15);
   waitKey(0);
}