#include "opencv2/opencv.hpp"
#include "cuda_runtime.h"
using namespace cv;
using namespace std;
const int dpMax = 20;
__device__ int ad(int channels, uchar *left, uchar *right) {
int sum = 0;
for (int i = 0; i < channels; i++) {
sum += abs(left[i] - right[i]);
}
return sum / channels;
}
__global__ void kernel_ad(int d, int rows, int cols, int channels, uchar *left, uchar *right, uchar *out) {
int x = threadIdx.x + blockIdx.x*blockDim.x;
int y = threadIdx.y + blockIdx.y*blockDim.y;
int offset = x + y * cols;
if (x <d || x > cols || y > rows) return;
out[offset] = ad(channels, &left[offset*channels], &right[offset*channels - d * channels]);
}
__global__ void kernel_wta(int rows, int cols, uchar *dsi, uchar *out) {
int x = threadIdx.x + blockIdx.x*blockDim.x;
int y = threadIdx.y + blockIdx.y*blockDim.y;
int offset = x + y * cols;
int k = 0;
for (int i = 0; i < dpMax; i++) {
if (dsi[i*rows*cols + offset] < dsi[k*rows*cols + offset]) k = i;
}
out[offset] = k;
}
__global__ void boxFilter(int rows, int cols, int patSize, uchar* img, uchar *out) {
int x = threadIdx.x + blockIdx.x*blockDim.x;
int y = threadIdx.y + blockIdx.y*blockDim.y;
int offset = x + y * cols;
int pat[9];
for (int i = 0; i < patSize*patSize; i++) pat[i] = 1;
//pat[4] = 1;
int k = patSize / 2;
int sum = 0;
for (int row = 0; row < patSize; row++) {
for (int col = 0; col < patSize; col++) {
sum += img[offset + (row - k)*cols + (col - k)]*pat[row*patSize+col];
}
}
out[offset] = sum / (patSize*patSize);
}
int main() {
Mat left = imread("D:\\cones\\im0.ppm");
Mat right = imread("D:\\cones\\im1.ppm");
if (left.empty() == 1 && right.empty() == 1) return -1;
cout << left.size() << endl;
imshow("LEFT", left);
imshow("RIGHT", right);
int rows = left.rows;
int cols = left.cols;
int channels = left.channels();
uchar *dev_left, *dev_right, *dev_out, *dev_dsi;
cudaMalloc(&dev_left, rows*cols*channels);
cudaMalloc(&dev_right, rows*cols*channels);
cudaMalloc(&dev_out, rows*cols*channels);
cudaMalloc(&dev_dsi, rows*cols*dpMax);
cudaMemcpy(dev_left, left.data, rows*cols*channels, cudaMemcpyHostToDevice);
cudaMemcpy(dev_right, right.data, rows*cols*channels, cudaMemcpyHostToDevice);
uchar *dev_tmp; cudaMalloc(&dev_tmp, rows*cols*channels);
dim3 grid((cols + 31) / 32, (rows + 31) / 32);
dim3 block(32, 32);
for (int i = 0; i < dpMax; i++)
{
kernel_ad << <grid, block >> > (i, rows, cols, channels, dev_left, dev_right, dev_tmp);
boxFilter << <grid, block >> > (rows, cols, 3, dev_tmp, &dev_dsi[i*rows*cols]);
}
kernel_wta << <grid, block >> > (rows, cols, dev_dsi, dev_out);
Mat img(left.size(), CV_8U);
cudaMemcpy(img.data, dev_out, rows*cols, cudaMemcpyDeviceToHost);
imshow("DISPARITY", img * 15);
waitKey(0);
}
求助,怎么换代价函数和聚合函数,这个用的好像是ad的代价函数和均值聚合。
这些是代价函数和代价聚合吗?
代价函数好像有:
代价聚合: