“编完了一段小程序也是一种幸福”
“小确幸”
很少时候能真切地感受“数学之美”,当你遇见了“碎形几何”之后,便不再怀疑“数学之美”。高度抽象的数学公式,用软件程序在屏幕上打印出一幅幅看似天马行空的图案,却要不失规则之美,让人震撼。废话不再说,先上图:
以下是完成这幅图像的源程序代码(OpenCV3.3.0 + CUDA8.0):
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include<opencv2/opencv.hpp>
#include <stdio.h>
using namespace cv;
using namespace cv::cuda;
#define DIM 400
struct cuComplex
{
float r;
float i;
__device__ cuComplex(float a,float b):r(a),i(b){}
__device__ float magnitude2(void)
{
return r*r+i*i;
}
__device__ cuComplex operator*(const cuComplex& a)
{
return cuComplex(r*a.r-i*a.i, i*a.r+r*a.i);
}
__device__ cuComplex operator+(const cuComplex& a)
{
return cuComplex(r+a.r,i+a.i);
}
};
__device__ int julia(int x,int y)
{
const float scale = 1.2;
float jx = scale * (float)(DIM/2 - x)/(DIM/2);
float jy = scale * (float)(DIM/2 - y)/(DIM/2);
cuComplex c(-0.8,0.156);
cuComplex a(jx,jy);
int i = 0;
for(i=0; i<200; i++)
{
a = a*a + c;
if(a.magnitude2()>1000)
{
return 0;
}
}
return 1;
}
__global__ void kernel(cuda::PtrStepSz<uchar4> juliaMat)
{
int x = blockIdx.x;
int y = blockIdx.y;
int juliaValue = julia(x,y);
juliaMat(x,y) = make_uchar4(255*juliaValue, 0, 0,255);
}
cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);
__global__ void addKernel(int *c, const int *a, const int *b)
{
int i = threadIdx.x;
c[i] = a[i] + b[i];
}
int main()
{
cuda::GpuMat cu_juliaMat;
cu_juliaMat = cuda::GpuMat(DIM, DIM, CV_8UC4, Scalar(0, 0, 0,0));
Mat juliaMat = Mat(DIM,DIM, CV_8UC4, Scalar(0, 0,0,0));
dim3 grid(DIM,DIM);
kernel<<<grid,1>>>(cu_juliaMat);
cu_juliaMat.download(juliaMat);
imwrite("D:\\temp\\juliaMat.bmp",juliaMat);
imshow("Julia", juliaMat);
waitKey(0);
system("Pause");
return 0;
}
// Helper function for using CUDA to add vectors in parallel.
cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
{
int *dev_a = 0;
int *dev_b = 0;
int *dev_c = 0;
cudaError_t cudaStatus;
// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
goto Error;
}
// Allocate GPU buffers for three vectors (two input, one output) .
cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
// Launch a kernel on the GPU with one thread for each element.
addKernel<<<1, size>>>(dev_c, dev_a, dev_b);
// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}
// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
goto Error;
}
// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
goto Error;
}
Error:
cudaFree(dev_c);
cudaFree(dev_a);
cudaFree(dev_b);
return cudaStatus;
}