#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <curand.h>
#include <curand_kernel.h>
#define MAX_THREAD 256
#define MAX_COUNT 2000000
/* 最大线程MAX_THREAD 256
每个线程生成随机点个数MAX_COUNT 2000000 */
/* GPU端圆周率函数piGenerator宣言 */
__global__ void piGenerator (float *res,int *count, int *time);
/* 主程序 */
int main (int argc, const char * argv[]){
/* CPU端圆周率h_pi、GPU端圆周率d_pi、最终圆周率(平均值)pi、偏差err */
float *h_pi, *d_pi, pi, err;
/* 最大线程数maxThread,随机点个数h_count/d_count,随机数初期值设定用时间h_time/d_time,for循环次数i */
int maxThread = MAX_THREAD, *h_count, *d_count, *h_time, *d_time, i;
/* CPU端h_pi、h_count、h_time内存确保 */
h_pi = (float *)malloc(sizeof(float) * maxThread);
h_count = (int *)malloc(sizeof(int) * 1);
h_time = (int *)malloc(sizeof(int) * 1);
/* GPU端d_pi、d_count、d_time内存确保 */
cudaMalloc((void **)&d_pi, sizeof(float) * maxThread);
cudaMalloc((void **)&d_count, sizeof(int) * 1);
cudaMalloc((void **)&d_time, sizeof(int) * 1);
/* CPU端随机点个数值h_count及时间值h_time带入 */
h_count[0] = MAX_COUNT;
h_time[0] = (int)time(NULL);
/* 将h_count及h_time的值带入GPU端d_count及d_time */
cudaMemcpy(d_count, h_count, sizeof(int) * 1, cudaMemcpyHostToDevice);
cudaMemcpy(d_time, h_time, sizeof(int) * 1, cudaMemcpyHostToDevice);
/* 运行时间测试 */
clock_t start, end;
/* 运行时间测试开始 */
start = clock();
/* GPU端函数piGenerator开始运行,1个block,maxThread =512个thread */
piGenerator<<<1, maxThread>>> (d_pi, d_count, d_time);
/* 将GPU生成的圆周率d_pi带入CPU端c_pi */
cudaMemcpy(h_pi, d_pi, sizeof(float) * maxThread,cudaMemcpyDeviceToHost);
/* 512个线程生成的圆周率求平均值*/
for (i = 0; i < maxThread; i++) {
pi += h_pi[i];
}
pi = pi / maxThread;
/* 运行时间测试停止 */
end = clock();
/* 求圆周率误差 */
err = pi - (float)M_PI;
if (err < 0) {
err = -err;
}
/* 结果输出 */
printf("Points: %d, Generated π: %f, Error: %.0fe-6\n",h_count[0] * maxThread, pi, err * 1000000);
printf("Timer: %f sec\n", (float)(end -start)/CLOCKS_PER_SEC);
/* CPU端内存释放 */
free(h_pi);
free(h_count);
free(h_time);
/* GPU端内存释放 */
cudaFree(d_pi);
cudaFree(d_count);
cudaFree(d_time);
/* 结束 */
return 0;
}
__global__ void piGenerator (float *res,int *count, int *time) {
/* 1/4圆内点个数a初期值=0,线程ID index_x=threadIdx.x,随机点个数n带入,时间值t带入,for循环次数i */
int a=0, index_x = threadIdx.x, n = *count, t = *time, i;
/* 随机点x、y宣言 */
float x, y;
/* 圆周率内存保存地址确认 */
res += index_x;
/* curand随机函数初期化 */
curandState s;
curand_init(t, index_x, 0, &s);
/* for循环次数=随机点生成个数 */
for (i = 1; i <= n; i++) {
/* 随机点x坐标、y坐标(0<x<1,0<y<1)的随机生成 */
x = curand_uniform(&s);
y = curand_uniform(&s);
/* 如果随机点在1/4圆圈以内,1/4圆内点个数a+1 */
if (pow(x, 2) + pow(y, 2) <= 1) {
a++;
}
/* 圆周率计算(1/4圆内点个数 / 随机点总数 * 4) */
*res = 4 * (float)a / (float)n;
/* 线程同步 */
__syncthreads();
}
}