#include<time.h>
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <iostream>
using namespace std;
#define dimGridx 10
#define dimGridy 10
#define dimGridz 10
#define ROWS 70*dimGridx*dimGridy*dimGridz //
#define COLS 7 //parameters
#define POINTS 1024 //points POINTS<=1024
#define CHECK(res) if(res!=cudaSuccess){exit(-1);}
///
struct minReturn
{
double param0;
double param1;
double param2;
double param3;
double param4;
double param5;
double param6;
double min_fit;
};
__device__ double dp_out_params[ROWS][COLS];
__device__ minReturn best_param_err[POINTS];
__device__ double fit_func(double *fit_params, double a)
{
double Hbo = *(fit_params + 0);
double Hb = *(fit_params + 1);
double Cmelanin = *(fit_params + 2);
double musp0 = *(fit_params + 3);
double SP = *(fit_params + 4);
double alpha = *(fit_params + 5);
double Hmelanin = *(fit_params + 6);
double func_result = sin(a*Hbo+Hb+Cmelanin+musp0+a*SP+a*alpha+a*Hmelanin) +a;//+ x*COLS + y;
return func_result;
}
//=================================================================================
// 求最小值函数
__device__ minReturn min(double a)
{
minReturn arr;
arr.min_fit = fit_func(dp_out_params[0],a);
for(int i=1;i<ROWS;i++)
{
if( fit_func(dp_out_params[i],a) < arr.min_fit)
{
arr.min_fit = fit_func(dp_out_params[i],a);
arr.param0 = *(dp_out_params[i]+0);
arr.param1 = *(dp_out_params[i]+1);
arr.param2 = *(dp_out_params[i]+2);
arr.param3 = *(dp_out_params[i]+3);
arr.param4 = *(dp_out_params[i]+4);
arr.param5 = *(dp_out_params[i]+5);
arr.param6 = *(dp_out_params[i]+6);
}
}
return arr;
}
///
// initial parameters
__global__ void init_params(unsigned int loop)
{
unsigned int blockId = gridDim.x*gridDim.y*blockIdx.z + gridDim.x*blockIdx.y + blockIdx.x;
unsigned int threadId = blockId*blockDim.x + threadIdx.x;
unsigned int row = threadId;
unsigned int col = threadIdx.y;
double min_Hbo = 0;
double max_Hbo = 1;
int stepNums_Hbo = 10;
double stepLen_Hbo = (max_Hbo-min_Hbo)/stepNums_Hbo;
double min_Hb = 0;
double max_Hb = 1;
int stepNums_Hb = 10;
double stepLen_Hb = (max_Hb-min_Hb)/stepNums_Hb;
double min_Cmelanin = 0;
double max_Cmelanin = 1;
int stepNums_Cmelanin = 10;
double stepLen_Cmelanin = (max_Cmelanin-min_Cmelanin)/stepNums_Cmelanin;
double min_musp0 = 0;
double max_musp0 = 1;
int stepNums_musp0 = 10;
double stepLen_musp0 = (max_musp0-min_musp0)/stepNums_musp0;
double min_SP = 0;
double max_SP = 1;
int stepNums_SP = 10;
double stepLen_SP = (max_SP-min_SP)/stepNums_SP;
double min_alpha = 0;
double max_alpha = 1;
int stepNums_alpha = 10;
double stepLen_alpha = (max_alpha-min_alpha)/stepNums_alpha;
double min_Hmelanin = 0;
double max_Hmelanin = 5;
int stepNums_Hmelanin = 10;
double stepLen_Hmelanin = (max_Hmelanin-min_Hmelanin)/stepNums_Hmelanin;
if (row < ROWS && col < COLS)
{
dp_out_params[row][col] = 0;
__syncthreads();
if( col== 6 )
{
int weight_Hmelanin = (row+ROWS*loop) % stepNums_Hmelanin;
dp_out_params[row][col] = min_Hmelanin + weight_Hmelanin * stepLen_Hmelanin;
}
__syncthreads();
if( col== 5 )
{
int weight_alpha = ( (row+ROWS*loop) / stepNums_Hmelanin ) % stepNums_alpha;
dp_out_params[row][col] = min_alpha + weight_alpha * stepLen_alpha;
}
__syncthreads();
if( col== 4 )
{
int weight_SP = ( (row+ROWS*loop) / stepNums_Hmelanin / stepNums_alpha ) % stepNums_SP;
dp_out_params[row][col] = min_SP + weight_SP * stepLen_SP;
}
__syncthreads();
if( col== 3 )
{
int weight_musp0 =( (row+ROWS*loop) / stepNums_Hmelanin / stepNums_alpha / stepNums_SP ) % stepNums_musp0;
dp_out_params[row][col] = min_musp0 + weight_musp0 * stepLen_musp0;
}
__syncthreads();
if( col== 2 )
{
int weight_Cmelanin =( (row+ROWS*loop) / stepNums_Hmelanin / stepNums_alpha / stepNums_SP / stepNums_musp0 ) % stepNums_Cmelanin;
dp_out_params[row][col] = min_Cmelanin + weight_Cmelanin * stepLen_Cmelanin;
}
__syncthreads();
if( col== 1 )
{
int weight_Hb =( (row+ROWS*loop) / stepNums_Hmelanin / stepNums_alpha / stepNums_SP / stepNums_musp0 / stepNums_Cmelanin ) % stepNums_Hb;
dp_out_params[row][col] = min_Hb + weight_Hb * stepLen_Hb;
}
__syncthreads();
if( col== 0 )
{
int weight_Hbo =( (row+ROWS*loop) / stepNums_Hmelanin / stepNums_alpha / stepNums_SP / stepNums_musp0 / stepNums_Cmelanin /stepNums_Hb ) % stepNums_Hbo;
dp_out_params[row][col] = min_Hbo + weight_Hbo * stepLen_Hbo;
}
__syncthreads();
}
}
__global__ void myPrintf()
{
printf("dp_out_param:\n");
for(int row=0; row<ROWS; row++)
{
for(int col=0; col<COLS; col++)
{
printf("%f ",dp_out_params[row][col]);
}
printf("\n");
}
}
///
__global__ void find()
{
double a= 0.01*threadIdx.x;
unsigned int point = threadIdx.x;
best_param_err[point] = min(a);
__syncthreads();
if(threadIdx.x>=1020 && threadIdx.x<=1023)
{
printf("BestParams=%f %f %f %f %f %f %f BestErr=%f\n",
best_param_err[point].param0,best_param_err[point].param1,best_param_err[point].param2,
best_param_err[point].param3,best_param_err[point].param4,best_param_err[point].param5,best_param_err[point].param6,
best_param_err[point].min_fit);
}
}
///
int main(int argc, char **argv)
{
time_t start,finish;
start = clock(); // 程序开始计时
dim3 dimBlock( 70, COLS, 1);
dim3 dimGrid( dimGridx, dimGridy, dimGridz);
for(unsigned int loop=0; loop<1; loop++)
{
init_params<<<dimGrid, dimBlock>>>(loop);
//myPrintf<<<1, 1>>>();
find<<<1, POINTS>>>();
cout<<"loop: "<<loop<<endl;
}
cudaDeviceSynchronize();
finish = clock(); // 程序计算结束
double duration = ((double)(finish-start))/CLOCKS_PER_SEC;
printf("程序计算耗时:%lf秒\n",duration);
printf("GOODBYE MAIN");
return 0;
}
结果:
loop: 0
BestParams=0.000000 0.000000 0.000000 0.100000 0.000000 0.300000 2.000000 BestErr=9.200002
BestParams=0.000000 0.000000 0.200000 0.900000 0.000000 0.200000 2.000000 BestErr=9.210000
BestParams=0.000000 0.000000 0.000000 0.200000 0.000000 0.400000 2.500000 BestErr=9.220025
BestParams=0.000000 0.000000 0.300000 0.900000 0.000000 0.300000 2.500000 BestErr=9.230001
程序计算耗时:1.080000秒
GOODBYE MAIN