一 . 运行环境VS2017 + MPI
二 . 算法原理可见博客https://www.cnblogs.com/chihaoyuIsnotHere/p/10553617.html
三. 使用到的MPI关键技术:虚拟笛卡尔进程拓扑
四. 源代码:头文件Matrix.h与第三篇文章的类似,只需加上+号重载
#include<iostream>
#include"Matrix.h"
#include<cmath>
#include<ctime>
#include"mpi.h"
using namespace std;
const int N = 600;
int myid, numprocs,upRank,downRank,leftRank,rightRank,part,num;
int masterNode;
int coord[2];
double start, finish;
MPI_Comm Cart_Comm_World;
int isqrt(int x)
{
int ipart = sqrt(x);
if (ipart*ipart != x)
++ipart;
return ipart;
}
void MatrixScatter(Matrix &A)
{
double *memoryPool = nullptr;
if (coord[0] == 0 && coord[1] == 0)
{
Matrix tmp(N);
tmp.ranCreate();
//cout << tmp << endl;
memoryPool = new double[N*N];
int count = 0;
for (int k = 0; k < part; ++k)
for (int h = 0; h < part; ++h)
for (int i = k * num; i <(k+1)*num; ++i)
for (int j = h * num; j < (h+1)*num; ++j)
memoryPool[count++] = tmp(i, j);
}
MPI_Scatter(memoryPool, num*num, MPI_DOUBLE, &A(0, 0), num*num, MPI_DOUBLE, masterNode, Cart_Comm_World);
if (memoryPool)
delete[]memoryPool;
}
//进行初始位移
void cannonInit(Matrix &A, Matrix &B)
{
int x = coord[0], y = coord[1];//矩阵块的行号与列号
MPI_Status status;
//获得当前矩阵块横坐标方向相聚x的左右邻居
MPI_Cart_shift(Cart_Comm_World, 1, x, &leftRank, &rightRank);
if (leftRank != myid)
MPI_Sendrecv_replace(&A(0, 0), num*num, MPI_DOUBLE, leftRank, 0, rightRank, 0, Cart_Comm_World, &status);
MPI_Cart_shift(Cart_Comm_World, 0, y, &upRank, &downRank);
if (upRank != myid)
MPI_Sendrecv_replace(&B(0, 0), num*num, MPI_DOUBLE, upRank, 0, downRank, 0, Cart_Comm_World, &status);
}
void cannonSolver(Matrix &A,Matrix &B)
{
MPI_Cart_shift(Cart_Comm_World, 0, 1, &upRank, &downRank);
MPI_Cart_shift(Cart_Comm_World, 1, 1, &leftRank, &rightRank);
Matrix C(num);
MPI_Status status;
for (int i = 0; i < part; ++i)
{
C = C + A * B;
MPI_Sendrecv_replace(&A(0, 0), num*num, MPI_DOUBLE, leftRank, 0, rightRank, 0, Cart_Comm_World, &status);
MPI_Sendrecv_replace(&B(0, 0), num*num, MPI_DOUBLE, upRank, 0, downRank, 0, Cart_Comm_World, &status);
}
double *memoryPool = nullptr;
if (myid == masterNode)
memoryPool = new double[N*N];
MPI_Gather(&C(0, 0), num*num, MPI_DOUBLE, memoryPool, num*num, MPI_DOUBLE, masterNode, Cart_Comm_World);
if (myid == masterNode)
{
Matrix result(N);
int count = 0;
for (int k = 0; k < part; ++k)
for (int h = 0; h < part; ++h)
for (int i = k * num; i < (k + 1)*num; ++i)
for (int j = h * num; j < (h + 1)*num; ++j)
result(i, j) = memoryPool[count++];
//cout << result << endl;
delete[]memoryPool;
}
}
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
if (numprocs == 1)
{
Matrix A(N), B(N);
A.ranCreate();
B.ranCreate();
start = MPI_Wtime();//随机数的生成特别耗费时间,不应该计入
Matrix result = A * B;
//cout << A * B << endl;
finish = MPI_Wtime();
cout << finish - start << endl;
MPI_Finalize();
return 0;
}
part = isqrt(numprocs);//一个维度的方向上,矩阵块数;isqrt是对整型的求根,保证不错
num = N / part;//分块矩阵的维度
//创建虚拟笛卡尔拓扑,方便通信
int dims[2] = { part,part };
int periods[2] = { 1,1 };
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &Cart_Comm_World);
MPI_Comm_rank(Cart_Comm_World, &myid);
MPI_Cart_coords(Cart_Comm_World, myid, 2, coord);
int position[2] = { 0,0 };
MPI_Cart_rank(Cart_Comm_World, position, &masterNode);
Matrix A(num),B(num);
MatrixScatter(A);
MatrixScatter(B);
start = MPI_Wtime();
cannonInit(A, B);
cannonSolver(A,B);
finish = MPI_Wtime();
if(myid == 0)
cout << finish - start << endl;
MPI_Comm_free(&Cart_Comm_World);
MPI_Finalize();
return 0;
}
五.运行结果(dim = 600,cannon算法在问题规模较小的情况下仍能获得不错的加速比)
注:个人PC仅有8个计算核心,因而9核模拟的效果稍差