void pair_gpu(const double *d_x, const double *d_y, const double *d_z,
unsigned long long int *d_g2, int numatm, int nconf,
const double xbox, const double ybox, const double zbox, int d_bin)
{
double r, cut, dx, dy, dz;
int ig2;
double box;
box = min(xbox, ybox);
box = min(box, zbox);
double del = box / (2.0 * d_bin);
cut = box * 0.5;
#pragma acc data copyin(d_x[:numatm*nconf], d_y[:numatm*nconf], d_z[:numatm*nconf]) copyout(d_g2[:d_bin])
{
#pragma acc parallel num_gangs( 100 ) num_workers( 32 ) vector_length( 32 )
#pragma acc loop gang private(dx,dy,dz,r,ig2)
for (int frame = 0; frame < nconf; frame++)
{
#pragma acc loop worker independent
for (int id1 = 0; id1 < numatm; id1++)
{
#pragma acc loop vector independent
for (int id2 = 0; id2 < numatm; id2++)
{
dx = d_x[frame * numatm + id1] - d_x[frame * numatm + id2];
dy = d_y[frame * numatm + id1] - d_y[frame * numatm + id2];
dz = d_z[frame * numatm + id1] - d_z[frame * numatm + id2];
dx = dx - xbox * (round(dx / xbox));
dy = dy - ybox * (round(dy / ybox));
dz = dz - zbox * (round(dz / zbox));
r = sqrtf(dx * dx + dy * dy + dz * dz);
if (r < cut)
{
ig2 = (int)(r / del);
#pragma acc atomic
d_g2[ig2] = d_g2[ig2] + 1;
}
}
}
}
}
}
Answer to assessment of Fundamentals of Accelerated Computing with OpenACC
最新推荐文章于 2024-06-12 18:28:23 发布