CUDA浅尝辄止-----一个有趣的例子（碎形几何Julia集）

本文链接：https://blog.csdn.net/zcm_bobo/article/details/81260559

“编完了一段小程序也是一种幸福”

“小确幸”

很少时候能真切地感受“数学之美”，当你遇见了“碎形几何”之后，便不再怀疑“数学之美”。高度抽象的数学公式，用软件程序在屏幕上打印出一幅幅看似天马行空的图案，却要不失规则之美，让人震撼。废话不再说，先上图：

以下是完成这幅图像的源程序代码（OpenCV3.3.0 + CUDA8.0）：
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include<opencv2/opencv.hpp>
#include <stdio.h>
using namespace cv;
using namespace cv::cuda;

#define DIM 400

struct cuComplex
{
float r;
float i;
__device__ cuComplex(float a,float b):r(a),i(b){}
__device__ float magnitude2(void)
{
  return r*r+i*i;
}
__device__ cuComplex operator*(const cuComplex& a)
{
  return cuComplex(r*a.r-i*a.i, i*a.r+r*a.i);
}
__device__ cuComplex operator+(const cuComplex& a)
{
  return cuComplex(r+a.r,i+a.i);
}
};

__device__ int julia(int x,int y)
{
const float scale = 1.2;
float jx = scale * (float)(DIM/2 - x)/(DIM/2);
float jy = scale * (float)(DIM/2 - y)/(DIM/2);

cuComplex c(-0.8,0.156);
cuComplex a(jx,jy);

int i = 0;
for(i=0; i<200; i++)
{
  a = a*a + c;
  if(a.magnitude2()>1000)
  {
   return 0;
  }
}

return 1;

}

__global__ void kernel(cuda::PtrStepSz<uchar4> juliaMat)
{
int x = blockIdx.x;
int y = blockIdx.y;
int juliaValue = julia(x,y);
juliaMat(x,y) = make_uchar4(255*juliaValue, 0, 0,255);
}

cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);

__global__ void addKernel(int *c, const int *a, const int *b)
{
int i = threadIdx.x;
c[i] = a[i] + b[i];
}

int main()
{
cuda::GpuMat cu_juliaMat;
cu_juliaMat = cuda::GpuMat(DIM, DIM, CV_8UC4, Scalar(0, 0, 0,0));
Mat juliaMat = Mat(DIM,DIM, CV_8UC4, Scalar(0, 0,0,0));
dim3 grid(DIM,DIM);
kernel<<<grid,1>>>(cu_juliaMat);

cu_juliaMat.download(juliaMat);
imwrite("D:\\temp\\juliaMat.bmp",juliaMat);
imshow("Julia", juliaMat);
waitKey(0);
system("Pause");
return 0;
}

// Helper function for using CUDA to add vectors in parallel.
cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
{
    int *dev_a = 0;
    int *dev_b = 0;
    int *dev_c = 0;
    cudaError_t cudaStatus;

    // Choose which GPU to run on, change this on a multi-GPU system.
    cudaStatus = cudaSetDevice(0);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
        goto Error;
    }

    // Allocate GPU buffers for three vectors (two input, one output)    .
    cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");
        goto Error;
    }

    cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");
        goto Error;
    }

    cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");
        goto Error;
    }

    // Copy input vectors from host memory to GPU buffers.
    cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

    cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

// Launch a kernel on the GPU with one thread for each element.
addKernel<<<1, size>>>(dev_c, dev_a, dev_b);

    // Check for any errors launching the kernel
    cudaStatus = cudaGetLastError();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        goto Error;
    }

    // cudaDeviceSynchronize waits for the kernel to finish, and returns
    // any errors encountered during the launch.
    cudaStatus = cudaDeviceSynchronize();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
        goto Error;
    }

    // Copy output vector from GPU buffer to host memory.
    cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

Error:
    cudaFree(dev_c);
    cudaFree(dev_a);
    cudaFree(dev_b);

    return cudaStatus;
}