串行代码
#include <iostream>
#include<math.h>
#include<ctime>
using namespace std;
double* vBallvx;
double* vBallvy;
double* vBallax;
double* vBallay;
double* vBallpx;
double* vBallpy;
int N = 256; // 这里N不设置为常数,并且vBall等数据动态分配是为了方便之后改为MPI
const double r = 0.01; // 质点半径
const double GM = 6.67E-7; // G*M 用于计算加速度
const double delta_t = 0.01; // 表示每次update时经过的时间 starttime,endtime;
clock_t starttime,endtime;
void init() {
vBallvx = new double[N];
vBallvy = new double[N];
vBallax = new double[N];
vBallay = new double[N];
vBallpx = new double[N];
vBallpy = new double[N];
static int size = sqrt(N);
for (int i = 0; i < N; i++)
{
vBallpx[i] = 0.01 * (i % size);
vBallpy[i] = 0.01 * (i / size);
vBallvx[i] = 0;
vBallvy[i] = 0;
vBallax[i] = 0;
vBallay[i] = 0;
}
}
void force(int index) {
vBallax[index] = 0;
vBallay[index] = 0;
for (int i = 0; i < N; i++) {
if (i == index) continue;
double dx = vBallpx[index] - vBallpx[i];
double dy = vBallpy[index] - vBallpy[i];
double d = dx * dx + dy * dy;
if (d < r * r) d = r * r; // ^(3/2)
d *= sqrt(d);
vBallax[index] += GM * (dx) / d;
vBallay[index] += GM * (dy) / d;
}
}
void velocity(int index) {
vBallvx[index] += vBallax[index] * delta_t;
vBallvy[index] += vBallay[index] * delta_t;
}
void position(int index) {
static int size = sqrt(N) - 1;
vBallpx[index] += vBallvx[index] * delta_t;
if (vBallpx[index] > size / 100.0) vBallpx[index] = size / 100.0;
if (vBallpx[index] < 0) vBallpx[index] = 0;
vBallpy[index] += vBallvy[index] * delta_t;
if (vBallpy[index] > size / 100.0) vBallpy[index] = size / 100.0;
if (vBallpy[index] < 0) vBallpy[index] = 0;
}
void update() {
for (int i = 0; i < N; i++)
force(i);
for (int i = 0; i < N; i++)
velocity(i);
for (int i = 0; i < N; i++)
position(i);
}
int main() {
init();
int i = 0;
starttime=clock();
for(i=0;i<256;i++){
update();
}
endtime=clock();
double time=(double)(endtime-starttime)/CLOCKS_PER_SEC;
cout<<"Total time:"<<time<<endl;
//while (1)
//{
//update();
//i++;
//cout << "迭代" << ++i << "次" << endl;
// if(i==256)
// break;
//endtime=clock();
// double time=(double)(endtime-starttime)/CLOCKS_PER_SEC;
// cout<<"Total time:"<<time<<endl;
return 0;
}
参考资料:并行计算导论 MPI + OpenMP 解决简单的NBody / 多体问题_n 体求解器简化算法-CSDN博客
MPI代码
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <mpi.h>
typedef struct ball
{
double px,py;
double vx,vy;
double ax,ay;
}ball;
ball ball_list[256];
const int N=256;
const double GM=6.67E-7;//G*M
int timestep=100;
double delta_t=0.0;
double r=0.01;//球的半径
int cycle_times=1000;//周期数
int size=0;//方阵宽
void compute_force(int index)
{
ball_list[index].ax=0;
ball_list[index].ay=0;
for(int i=0;i<N;i++)
{
if(i!=index)
{
double dx=ball_list[i].px-ball_list[index].px;
double dy=ball_list[i].py-ball_list[index].py;
double d=(dx*dx+dy*dy);
if(d<r*r)d=r*r;
d*=sqrt(d);//^(3/2)
ball_list[index].ax+=GM*(dx)/d;
ball_list[index].ay+=GM*(dy)/d;
//printf("%lf %lf ",dx,dy);
}
}
//printf("%d a: %lf %lf\n",index,ball_list[index].ax,ball_list[index].ay);
}
void compute_velocities(int index)
{
ball_list[index].vx+=ball_list[index].ax*delta_t;
ball_list[index].vy+=ball_list[index].ay*delta_t;
//printf("%d v: %lf %lf\n",index,ball_list[index].vx,ball_list[index].vy);
}
void compute_positions(int index)
{
ball_list[index].px+=ball_list[index].vx*delta_t;
if(ball_list[index].px>((size-1)/100.0))ball_list[index].px=(size-1)/100.0;
if(ball_list[index].px<0)ball_list[index].px=0;
ball_list[index].py+=ball_list[index].vy*delta_t;
if(ball_list[index].py>((size-1)/100.0))ball_list[index].py=(size-1)/100.0;
if(ball_list[index].py<0)ball_list[index].py=0;
//printf("%d p: %lf %lf\n",index,ball_list[index].px,ball_list[index].py);
}
void main(int argc,char *argv[])
{
//init
delta_t=1.0/timestep;
//printf("%lf\n",delta_t);
size=(int)sqrt(N);
//printf("%d\n",size);
for(int i=0;i<N;i++)
{
ball_list[i].px=0.01*(i%size);
ball_list[i].py=0.01*(i/size);
ball_list[i].vx=0;
ball_list[i].vy=0;
ball_list[i].ax=0;
ball_list[i].ay=0;
}
int myid, numprocs;
double local_starttime,local_endtime,local_elapsed,elapsed;
int namelen;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
double* mpi_buffer=malloc(sizeof(double)*1000000);
MPI_Buffer_attach(mpi_buffer,sizeof(double)*1000000);
//模拟开始
local_starttime=MPI_Wtime();
for(int i=0;i<cycle_times;i++)
{
for(int j=0;j<numprocs;j++)
{
if(j!=myid)
MPI_Bsend((ball_list+(N/numprocs)*myid),sizeof(ball)*N/numprocs,MPI_BYTE,j,i*10+myid,MPI_COMM_WORLD);
}
for(int j=0;j<numprocs;j++)
{
if(j!=myid)
{
MPI_Status status;
MPI_Recv((ball_list+(N/numprocs)*j),sizeof(ball)*N/numprocs,MPI_BYTE,j,i*10+j,MPI_COMM_WORLD,&status);
}
}
for(int j=(N/numprocs)*myid;j<(N/numprocs)*(myid+1);j++)
{
compute_force(j);
}
MPI_Barrier(MPI_COMM_WORLD);
for(int j=(N/numprocs)*myid;j<(N/numprocs)*(myid+1);j++)
{
compute_velocities(j);
compute_positions(j);
}
//printf("\n");
MPI_Barrier(MPI_COMM_WORLD);
}
local_endtime=MPI_Wtime();
local_elapsed=local_endtime-local_starttime;
MPI_Reduce(&local_elapsed,&elapsed,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
//printf("id %d\n",myid);
if(myid!=0)
{
//printf("sid %d %d %d\n",myid,ball_list+(N/numprocs)*myid,sizeof(ball)*N/numprocs);
MPI_Send((ball_list+(N/numprocs)*myid),sizeof(ball)*N/numprocs,MPI_BYTE,0,myid,MPI_COMM_WORLD);
}
if(myid==0)
{
for(int i=1;i<numprocs;i++)
{
MPI_Status status;
//printf("rid %d %d %d\n",myid,ball_list+(N/numprocs)*myid,sizeof(ball)*N/numprocs);
MPI_Recv((ball_list+(N/numprocs)*i),sizeof(ball)*N/numprocs,MPI_BYTE,i,i,MPI_COMM_WORLD,&status);
}
printf("Elapsed time = %f seconds\n",elapsed);
}
MPI_Finalize();
}
参考资料:360doc登录
OpenMP代码
#include <iostream>
#include <cmath>
#include <omp.h>
using namespace std;
const int N = 256;
const double G = 6.674e-11;
const double delta_t = 0.01;
struct Body {
double px, py, vx, vy, ax, ay, mass;
};
Body bodies[N];
void init() {
#pragma omp parallel for
for (int i = 0; i < N; i++) {
bodies[i].px = 0.01 * (i % int(sqrt(N)));
bodies[i].py = 0.01 * (i / int(sqrt(N)));
bodies[i].vx = 0.0;
bodies[i].vy = 0.0;
bodies[i].ax = 0.0;
bodies[i].ay = 0.0;
bodies[i].mass = 1e15;
}
}
void compute_force(int i, int j) {
double dx = bodies[i].px - bodies[j].px;
double dy = bodies[i].py - bodies[j].py;
double distance = sqrt(dx*dx + dy*dy);
double force_magnitude = (G * bodies[i].mass * bodies[j].mass) / (distance * distance);
bodies[i].ax += force_magnitude * (dx / distance) / bodies[i].mass;
bodies[i].ay += force_magnitude * (dy / distance) / bodies[i].mass;
}
void update_positions(int i) {
bodies[i].vx += bodies[i].ax * delta_t;
bodies[i].vy += bodies[i].ay * delta_t;
bodies[i].px += bodies[i].vx * delta_t;
bodies[i].py += bodies[i].vy * delta_t;
}
void update() {
omp_set_num_threads(8);
#pragma omp parallel for
for (int i = 0; i < N; i++) {
bodies[i].ax = 0.0;
bodies[i].ay = 0.0;
#pragma omp parallel for
for (int j = 0; j < N; j++) {
if (i != j) {
compute_force(i, j);
}
}
update_positions(i);
}
}
int main() {
init();
int num_steps = 1000;
double start_time = omp_get_wtime();
for (int step = 0; step < num_steps; step++) {
update();
}
double end_time = omp_get_wtime();
// Print final positions
//for (int i = 0; i < N; i++) {
// cout << "Body " << i+1 << ": (" << bodies[i].px << ", " << bodies[i].py << ")\n";
//}
cout << "Execution time: " << end_time - start_time << " seconds" << endl;
return 0;
}