#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h"
#include "device_functions.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "stdlib.h"
#include <string>
#include <cassert>
#include <iostream>
using namespace std;
#include <opencv2\opencv.hpp>
#include <opencv2/core.hpp>
#include <opencv2/highgui/highgui.hpp>
using namespace cv;
//500X500的像素
#define W 500
#define H 500
#define TX 32
#define TY 32
//将整形转换成字符
__device__ unsigned char clip(int n)
{
return n > 255 ? 255 : (n < 0 ? 0 : n);
}
__global__ void distanceKernel(uchar4*d_out, int w, int h, int2 pos)
{
const int c = blockIdx.x*blockDim.x + threadIdx.x;
const int r = blockIdx.y*blockDim.y + threadIdx.y;
const int i = r * w + c;
if (c >= w || r >= h)
{
return;
}
//const int d = sqrtf((c - pos.x)*(c - pos.x) + (r - pos.y)*(r - pos.y));
const int d = sqrtf((c - pos.x)*(c - pos.x) + (r - pos.y)*(r - pos.y));
const unsigned char intensity = clip(255 - d);
d_out[i].x = intensity;//red
d_out[i].y = intensity;//green
d_out[i].z = 0;//blue
d_out[i].w = 0;//全透明
}
int main()
{
uchar4*out = (uchar4*)calloc(W*H, sizeof(uchar4));
uchar4*d_out = 0;
cudaMalloc(&d_out, W*H * sizeof(uchar4));
const int2 pos = { 0,0 };
const dim3 blockSize(TX, TY);
const int bx = (W + TX - 1) / TX;
const int by = (H + TY - 1) / TY;
const dim3 gridSize = dim3(bx, by);
distanceKernel << <gridSize, blockSize >> > (d_out, W, H, pos);
cudaMemcpy(out, d_out, W*H * sizeof(uchar4), cudaMemcpyDeviceToHost);
cudaFree(d_out);
Mat imageShow(H, W, CV_8UC4, (void*)out);
imshow("successed", imageShow);
waitKey(0);
free(out);
}
结果: