//swap.cu
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <opencv2/core/cuda_devptrs.hpp>
using namespace cv;
using namespace cv::gpu;
//自定义内核函数
__global__ void swap_rb_kernel(const PtrStepSz<uchar3> src,PtrStep<uchar3> dst)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
if(x < src.cols && y < src.rows)
{
uchar3 v = src(y,x);
dst(y,x) = make_uchar3(v.z,v.y,v.x);
}
}
extern "C" void swap_rb_caller(const PtrStepSz<uchar3>& src,PtrStep<uchar3> dst,cudaStream_t stream)
{
dim3 block(32,8);
dim3 grid((src.cols + block.x - 1)/block.x,(src.rows + block.y - 1)/block.y);
swap_rb_kernel<<<grid,block,0,stream>>>(src,dst);
if(stream == 0)
cudaDeviceSynchronize();
}
//swap.cpp
#include <opencv2/gpu/gpu.hpp>
#include <opencv2/gpu/stream_accessor.hpp>
using namespace cv;
using namespace cv::gpu;
extern "C" void swap_rb_caller(const PtrStepSz<uchar3>& src,PtrStep<uchar3> dst,cudaStream_t stream);
extern "C" void swap_rb(const GpuMat& src,GpuMat& dst,Stream& stream = Stream::Null())
{
CV_Assert(src.type() == CV_8UC3);
dst.create(src.size(),src.type());
cudaStream_t s = StreamAccessor::getStream(stream);
swap_rb_caller(src,dst,s);
}
//main.cpp
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/gpu/gpu.hpp>
#pragma comment(lib,"opencv_gpu2410d.lib")
#pragma comment(lib,"opencv_core2410d.lib")
#pragma comment(lib,"opencv_highgui2410d.lib")
using namespace cv;
using namespace cv::gpu;
extern "C" void swap_rb(const GpuMat& src,GpuMat& dst,Stream& stream = Stream::Null());
int main()
{
Mat image = imread("lena.jpg");
imshow("src",image);
GpuMat gpuMat,output;
gpuMat.upload(image);
swap_rb(gpuMat,output);
output.download(image);
imshow("gpu",image);
getchar();
waitKey(0);
return 0;
}