一、基于阻塞通信版本
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <unistd.h>
#define N 8
#define T 2
#define SIZE N / 4
void print_rows(int, float [][N]);
int main(int argc, char *argv[])
{
int rank, size;
float rows[SIZE + 2][N], temp[SIZE + 2][N], tmp[2][N], gather[N][N];
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
for (int i = 0; i < SIZE + 2; i++)
for (int j = 0; j < N; j++)
rows[i][j] = temp[i][j] = 0;
if (rank == 0)
{
for(int i = 0; i < N; i++)
rows[1][i] = 8.0;
}
if (rank == 3)
{
for (int i = 0; i < N; i++)
rows[2][i] = 8.0;
}
for (int i = 1; i < SIZE + 1; i++)
{
rows[i][0] = 8.0;
rows[i][N - 1] = 8.0;
}
for (int step = 0; step < T; step++)
{
if (rank < 3)
{
MPI_Send(&rows[SIZE][0], N, MPI_FLOAT, rank + 1, 0, MPI_COMM_WORLD);
}
if (rank > 0)
{
MPI_Recv(&rows[0][0], N, MPI_FLOAT, rank - 1, 0, MPI_COMM_WORLD, &status);
}
if (rank > 0)
{
MPI_Send(&rows[1][0], N, MPI_FLOAT, rank - 1, 1, MPI_COMM_WORLD);
}
if (rank < 3)
{
MPI_Recv(&rows[SIZE + 1][0], N, MPI_FLOAT, rank + 1, 1, MPI_COMM_WORLD, &status);
}
int begin_row = (0 == rank) ? 2 : 1;
int end_row = (3 == rank) ? 1 : 2;
for (int i = begin_row; i <= end_row; i++)
for (int j = 1; j < N - 1; j++)
temp[i][j] = 0.25 * (rows[i][j - 1] + rows[i][j + 1] + rows[i - 1][j] + rows[i + 1][j]);
for (int i = begin_row; i <= end_row; i++)
for (int j = 1; j < N - 1; j++)
rows[i][j] = temp[i][j];
sleep(rank);
print_rows(rank, rows);
if (rank == 3)
printf("\n");
}
for(int i = 1; i < SIZE + 1; i++)
for(int j = 0; j < N; j++)
tmp[i - 1][j] = rows[i][j];
MPI_Gather(tmp, 16, MPI_FLOAT, gather, 16, MPI_FLOAT, 0, MPI_COMM_WORLD);
sleep(1);
if (rank == 0)
{
printf("迭代后结果\n");
for(int i = 0; i < N; i++)
{
for(int j = 0; j < N; j++)
printf("%.3f\t", gather[i][j]);
printf("\n");
}
}
MPI_Finalize();
return 0;
}
void print_rows(int rank, float rows[][N])
{
printf("Result in process %d:\n", rank);
for (int i = 0; i < SIZE + 2; i++)
{
for (int j = 0; j < N; j++)
printf("%1.3f\t", rows[i][j]);
printf("\n");
}
MPI_Barrier(MPI_COMM_WORLD);
}
迭代两次后结果
优缺点:
1、代码比较繁琐。
2、如果通信比较多,并且次序安排的有问题的话,存在可能死锁的可能性。
二、基于捆绑消息发送接收版本
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <unistd.h>
#define N 8
#define T 2
#define SIZE N / 4
void print_rows(int, float [][N]);
int main(int argc, char *argv[])
{
int rank, size;
float rows[SIZE + 2][N], temp[SIZE + 2][N], tmp[2][N], gather[N][N];
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
for (int i = 0; i < SIZE + 2; i++)
for (int j = 0; j < N; j++)
rows[i][j] = temp[i][j] = 0;
if (rank == 0)
{
for (int i = 0; i < N; i++)
rows[1][i] = 8.0;
}
if (rank == 3)
{
for (int i = 0; i < N; i++)
rows[2][i] = 8.0;
}
for (int i = 1; i < SIZE + 1; i++)
{
rows[i][0] = 8.0;
rows[i][N - 1] = 8.0;
}
for (int step = 0; step < T; step++)
{
int up_proc_id = (rank == 0) ? MPI_PROC_NULL : (rank - 1);
int down_proc_id = (rank == 3) ? MPI_PROC_NULL : (rank + 1);
MPI_Sendrecv(&rows[SIZE][0], N, MPI_FLOAT, down_proc_id, 0, &rows[0][0], N, MPI_FLOAT, up_proc_id, 0, MPI_COMM_WORLD, &status);
MPI_Sendrecv(&rows[1][0], N, MPI_FLOAT, up_proc_id, 1, &rows[SIZE + 1][0], N, MPI_FLOAT, down_proc_id, 1, MPI_COMM_WORLD, &status);
int begin_row = (0 == rank) ? 2 : 1;
int end_row = (3 == rank) ? 1 : 2;
for (int i = begin_row; i <= end_row; i++)
for (int j = 1; j < N - 1; j++)
temp[i][j] = 0.25 * (rows[i][j - 1] + rows[i][j + 1] + rows[i - 1][j] + rows[i + 1][j]);
for (int i = begin_row; i <= end_row; i++)
for (int j = 1; j < N - 1; j++)
rows[i][j] = temp[i][j];
sleep(rank);
print_rows(rank, rows);
if (rank == 3)
printf("\n");
}
for(int i = 1; i < SIZE + 1; i++)
for(int j = 0; j < N; j++)
tmp[i - 1][j] = rows[i][j];
MPI_Gather(tmp, 16, MPI_FLOAT, gather, 16, MPI_FLOAT, 0, MPI_COMM_WORLD);
sleep(1);
if (rank == 0)
{
printf("迭代后结果\n");
for(int i = 0; i < N; i++)
{
for(int j = 0; j < N; j++)
printf("%.3f\t", gather[i][j]);
printf("\n");
}
}
MPI_Finalize();
return 0;
}
void print_rows(int rank, float rows[][N])
{
printf("Result in process %d:\n", rank);
for (int i = 0; i < SIZE + 2; i++)
{
for (int j = 0; j < N; j++)
printf("%1.3f\t", rows[i][j]);
printf("\n");
}
MPI_Barrier(MPI_COMM_WORLD);
}
迭代两次后结果
优缺点:
1、引入了虚拟进程,简化了代码。
2、通过MPI自行维护消息发送与接收的次序,避免死锁的可能性。
三、基于非阻塞消息通信版本
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <unistd.h>
#define N 16
#define SIZE N / 4
#define T 2
void print_rows(int, float [][N]);
int main(int argc, char *argv[])
{
int rank, size;
float rows[SIZE + 2][N], temp[SIZE + 2][N], tmp[4][N], gather[N][N];
MPI_Status status;
MPI_Request request[4];
MPI_Status txstatus[4];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
for (int i = 0; i < SIZE + 2; i++)
for (int j = 0; j < N; j++)
rows[i][j] = temp[i][j] = 0;
if (rank == 0)
{
for (int i = 0; i < N; i++)
rows[1][i] = 8.0;
}
if (rank == 3)
{
for (int i = 0; i < N; i++)
rows[SIZE][i] = 8.0;
}
for (int i = 1; i < SIZE + 1; i++)
{
rows[i][0] = 8.0;
rows[i][N - 1] = 8.0;
}
int up_proc_id = (rank == 0) ? MPI_PROC_NULL : (rank - 1);
int down_proc_id = (rank == 3) ? MPI_PROC_NULL : (rank + 1);
MPI_Sendrecv(&rows[SIZE][0], N, MPI_FLOAT, down_proc_id, 0, &rows[0][0], N, MPI_FLOAT, up_proc_id, 0, MPI_COMM_WORLD, &status);
MPI_Sendrecv(&rows[1][0], N, MPI_FLOAT, up_proc_id, 1, &rows[SIZE + 1][0], N, MPI_FLOAT, down_proc_id, 1, MPI_COMM_WORLD, &status);
for (int step = 0; step < T; step++)
{
if (rank == 0)
{
for (int i = 1; i < N - 1; i++)
temp[SIZE][i] = 0.25 * (rows[SIZE][i - 1] + rows[SIZE][i + 1] + rows[SIZE - 1][i] + rows[SIZE + 1][i]);
}
else if (rank == 3)
{
for (int i = 1; i < N - 1; i++)
temp[1][i] = 0.25 * (rows[1][i - 1] + rows[1][i + 1] + rows[0][i] + rows[2][i]);
}
else
{
for (int i = 1; i < N - 1; i++)
{
temp[1][i] = 0.25 * (rows[1][i - 1] + rows[1][i + 1] + rows[0][i] + rows[2][i]);
temp[SIZE][i] = 0.25 * (rows[SIZE][i - 1] + rows[SIZE][i + 1] + rows[SIZE - 1][i] + rows[SIZE + 1][i]);
}
}
MPI_Isend(&temp[SIZE][0], N, MPI_FLOAT, down_proc_id, 0, MPI_COMM_WORLD, &request[0]);
MPI_Irecv(&rows[0][0], N, MPI_FLOAT, up_proc_id, 0, MPI_COMM_WORLD, &request[1]);
MPI_Isend(&temp[1][0], N, MPI_FLOAT, up_proc_id, 1, MPI_COMM_WORLD, &request[2]);
MPI_Irecv(&rows[SIZE + 1][0], N, MPI_FLOAT, down_proc_id, 1, MPI_COMM_WORLD, &request[3]);
int begin_row = 2;
int end_row = SIZE - 1;
for (int i = begin_row; i <= end_row; i++)
for (int j = 1; j < N - 1; j++)
temp[i][j] = 0.25 * (rows[i][j - 1] + rows[i][j + 1] + rows[i - 1][j] + rows[i + 1][j]);
int begin = (0 == rank) ? 2 : 1;
int end = (3 == rank) ? SIZE - 1: SIZE;
for (int i = begin; i <= end; i++)
for (int j = 1; j < N - 1; j++)
rows[i][j] = temp[i][j];
MPI_Waitall(4, &request[0], &txstatus[0]);
sleep(rank);
print_rows(rank, rows);
if (rank == 3)
printf("\n");
}
for (int i = 1; i < SIZE + 1; i++)
for (int j = 0; j < N; j++)
tmp[i - 1][j] = rows[i][j];
MPI_Gather(tmp, 64, MPI_FLOAT, gather, 64, MPI_FLOAT, 0, MPI_COMM_WORLD);
sleep(1);
if (rank == 0)
{
printf("迭代后结果\n");
for(int i = 0; i < N; i++)
{
for(int j = 0; j < N; j++)
printf("%.3f\t", gather[i][j]);
printf("\n");
}
}
MPI_Finalize();
return 0;
}
void print_rows(int rank, float rows[][N])
{
printf("Result in process %d:\n", rank);
for (int i = 0; i < SIZE + 2; i++)
{
for (int j = 0; j < N; j++)
printf("%1.3f\t", rows[i][j]);
printf("\n");
}
MPI_Barrier(MPI_COMM_WORLD);
}
迭代两次后结果
优缺点:
1、优化性能,实现计算与通信的重叠。
2、整个流程复杂度高。
四、基于重复非阻塞消息通信版本
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <unistd.h>
#define N 16
#define T 2
#define SIZE N / 4
void print_rows(int, float [][N]);
int main(int argc, char *argv[])
{
int rank, size;
float rows[SIZE + 2][N], temp[SIZE + 2][N], tmp[4][N], gather[N][N];
MPI_Status status;
MPI_Request request[4];
MPI_Status txstatus[4];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
for (int i = 0; i < SIZE + 2; i++)
for (int j = 0; j < N; j++)
rows[i][j] = temp[i][j] = 0;
if (rank == 0)
{
for (int i = 0; i < N; i++)
rows[1][i] = 8.0;
}
if (rank == 3)
{
for (int i = 0; i < N; i++)
rows[SIZE][i] = 8.0;
}
for (int i = 1; i < SIZE + 1; i++)
{
rows[i][0] = 8.0;
rows[i][N - 1] = 8.0;
}
int up_proc_id = (0 == rank) ? MPI_PROC_NULL : (rank - 1);
int down_proc_id = (3 == rank)? MPI_PROC_NULL : (rank + 1);
MPI_Sendrecv(&rows[SIZE][0], N, MPI_FLOAT, down_proc_id, 0, &rows[0][0], N, MPI_FLOAT, up_proc_id, 0, MPI_COMM_WORLD, &status);
MPI_Sendrecv(&rows[1][0], N, MPI_FLOAT, up_proc_id, 1, &rows[SIZE + 1][0], N, MPI_FLOAT, down_proc_id, 1, MPI_COMM_WORLD, &status);
MPI_Send_init(&temp[SIZE][0], N, MPI_FLOAT, down_proc_id, 0, MPI_COMM_WORLD, &request[0]);
MPI_Recv_init(&rows[0][0], N, MPI_FLOAT, up_proc_id, 0, MPI_COMM_WORLD, &request[1]);
MPI_Send_init(&temp[1][0], N, MPI_FLOAT, up_proc_id, 1, MPI_COMM_WORLD, &request[2]);
MPI_Recv_init(&rows[SIZE + 1][0], N, MPI_FLOAT, down_proc_id, 1, MPI_COMM_WORLD, &request[3]);
for (int step = 0; step < T; step++)
{
if (0 == rank)
{
for (int i = 1; i < N - 1; i++)
temp[SIZE][i] = 0.25 * (rows[SIZE][i - 1] + rows[SIZE][i + 1] + rows[SIZE - 1][i] + rows[SIZE + 1][i]);
}
else if (3 == rank)
{
for (int i = 1; i < N - 1; i++)
temp[1][i] = 0.25 * (rows[1][i - 1] + rows[1][i + 1] + rows[0][i] + rows[2][i]);
}
else
{
for (int i = 1; i < N - 1; i++)
{
temp[1][i] = 0.25 * (rows[1][i - 1] + rows[1][i + 1] + rows[0][i] + rows[2][i]);
temp[SIZE][i] = 0.25 * (rows[SIZE][i - 1] + rows[SIZE][i + 1] + rows[SIZE - 1][i] + rows[SIZE + 1][i]);
}
}
MPI_Startall(4, &request[0]);
int begin_row = 2;
int end_row = SIZE - 1;
for (int i = begin_row; i <= end_row; i++)
for (int j = 1; j < N - 1; j++)
temp[i][j] = 0.25 * (rows[i][j - 1] + rows[i][j + 1] + rows[i - 1][j] + rows[i + 1][j]);
int begin = (0 == rank) ? 2 : 1;
int end = (3 == rank) ? SIZE - 1 : SIZE;
for (int i = begin; i <= end; i++)
for (int j = 1; j < N - 1; j++)
rows[i][j] = temp[i][j];
MPI_Waitall(4, &request[0], &txstatus[0]);
sleep(rank);
print_rows(rank, rows);
if (rank == 3)
printf("\n");
}
for (int i = 0 ; i < 4; i++) MPI_Request_free(&request[i]);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Gather(&rows[1][0], 64, MPI_FLOAT, gather, 64, MPI_FLOAT, 0, MPI_COMM_WORLD);
sleep(1);
if (rank == 0)
{
printf("迭代后结果\n");
for(int i = 0; i < N; i++)
{
for(int j = 0; j < N; j++)
printf("%.3f\t", gather[i][j]);
printf("\n");
}
}
MPI_Finalize();
return 0;
}
void print_rows(int rank, float rows[][N])
{
printf("Result in process %d:\n", rank);
for (int i = 0; i < SIZE + 2; i++)
{
for (int j = 0; j < N; j++)
printf("%1.3f\t", rows[i][j]);
printf("\n");
}
MPI_Barrier(MPI_COMM_WORLD);
}
迭代两次后结果
优缺点:
1、优化通信,提前初始化通信。