CUDA浅尝辄止-----一个有趣的例子(碎形几何Julia集)

“编完了一段小程序也是一种幸福”

                                         “小确幸”

很少时候能真切地感受“数学之美”,当你遇见了“碎形几何”之后,便不再怀疑“数学之美”。高度抽象的数学公式,用软件程序在屏幕上打印出一幅幅看似天马行空的图案,却要不失规则之美,让人震撼。废话不再说,先上图:

以下是完成这幅图像的源程序代码(OpenCV3.3.0 + CUDA8.0):
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include<opencv2/opencv.hpp>
#include <stdio.h>
using namespace cv;
using namespace cv::cuda;

#define DIM 400

struct cuComplex
{
 float r;
 float i;
 __device__ cuComplex(float a,float b):r(a),i(b){}
 __device__ float magnitude2(void)
 {
  return r*r+i*i;
 }
 __device__ cuComplex operator*(const cuComplex& a)
 {
  return cuComplex(r*a.r-i*a.i, i*a.r+r*a.i);
 }
 __device__ cuComplex operator+(const cuComplex& a)
 {
  return cuComplex(r+a.r,i+a.i);
 }
};

__device__ int julia(int x,int y)
{
 const float scale = 1.2;
 float jx = scale * (float)(DIM/2 - x)/(DIM/2);
 float jy = scale * (float)(DIM/2 - y)/(DIM/2);

 cuComplex c(-0.8,0.156);
 cuComplex a(jx,jy);

 int i = 0;
 for(i=0; i<200; i++)
 {
  a = a*a + c;
  if(a.magnitude2()>1000)
  {
   return 0;
  }
 }

 return 1;

}

__global__ void kernel(cuda::PtrStepSz<uchar4> juliaMat)
{
 int x = blockIdx.x;
 int y = blockIdx.y;
 int juliaValue = julia(x,y);
 juliaMat(x,y) = make_uchar4(255*juliaValue, 0, 0,255);
}

cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);

__global__ void addKernel(int *c, const int *a, const int *b)
{
    int i = threadIdx.x;
    c[i] = a[i] + b[i];
}


int main()
{
 cuda::GpuMat cu_juliaMat;
 cu_juliaMat = cuda::GpuMat(DIM, DIM, CV_8UC4, Scalar(0, 0, 0,0));
 Mat juliaMat = Mat(DIM,DIM, CV_8UC4, Scalar(0, 0,0,0));
 dim3 grid(DIM,DIM);
 kernel<<<grid,1>>>(cu_juliaMat);

 cu_juliaMat.download(juliaMat); 
 imwrite("D:\\temp\\juliaMat.bmp",juliaMat);
 imshow("Julia", juliaMat);
 waitKey(0);
 system("Pause");
    return 0;
}

// Helper function for using CUDA to add vectors in parallel.
cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
{
    int *dev_a = 0;
    int *dev_b = 0;
    int *dev_c = 0;
    cudaError_t cudaStatus;

    // Choose which GPU to run on, change this on a multi-GPU system.
    cudaStatus = cudaSetDevice(0);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
        goto Error;
    }

    // Allocate GPU buffers for three vectors (two input, one output)    .
    cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");
        goto Error;
    }

    cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");
        goto Error;
    }

    cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMalloc failed!");
        goto Error;
    }

    // Copy input vectors from host memory to GPU buffers.
    cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

    cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

    // Launch a kernel on the GPU with one thread for each element.
    addKernel<<<1, size>>>(dev_c, dev_a, dev_b);

    // Check for any errors launching the kernel
    cudaStatus = cudaGetLastError();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
        goto Error;
    }
   
    // cudaDeviceSynchronize waits for the kernel to finish, and returns
    // any errors encountered during the launch.
    cudaStatus = cudaDeviceSynchronize();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
        goto Error;
    }

    // Copy output vector from GPU buffer to host memory.
    cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaMemcpy failed!");
        goto Error;
    }

Error:
    cudaFree(dev_c);
    cudaFree(dev_a);
    cudaFree(dev_b);
   
    return cudaStatus;
}

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值