cannon算法的mpi_cannon算法的原理及MPI C语言实现

该博客介绍了Cannon算法的MPI版本,适用于并行计算中的矩阵乘法。文章首先阐述了算法的前提条件,包括矩阵大小、处理器分配等,并提到对齐的重要性。接着,展示了C语言代码实现,包括矩阵初始化、对齐操作、计算过程以及恢复对齐状态。然而,代码中存在一些缺陷,如未实现数据输入与输出、结果收集和高效串行矩阵乘法。代码适用于处理器数目为平方数且矩阵大小可被处理器数量开根号整除的情况。
摘要由CSDN通过智能技术生成

前提(可以看了下面回来再看):

1. 矩阵A是一个n*n方阵

2. 有p个处理器,每个处理器得到n/sqrt(p)个数据,(注意:此处好像要求n是一个平方数。因为cannon要求每个分块大小一样)

3. 一个高效的串行矩阵乘法算法(dgemm,sgemm),用于计算块与块之间相乘

看下面链接

关于对齐再补充一点方便理解:

…对齐的目的就是对齐,不对齐就没法算

实现见下,缺陷有:

1. 没有实现读入数据和数据的分发

2. 没有实现数据的收集和整合(或者好看的打印)

3. 使用的串行矩阵计算代码效率较低

注意:processor数目要求是平方数,n要求是可以被sqrt(processor数量)乘除的数

本代码用课程ppt上的代码改的,需要的随便用

#include

#include

#include

#include "mpi.h"

#define DEBUG 1

void MatrixMultiplyAgg(int n, double *a, double *b, double *c);

main(int argc, char *argv[])

{

int i, j, k, m, p;

int n, nlocal;

double *a, *b, *c;

int npes, dims[2], periods[2];

int myrank, my2drank, mycoords[2];

int shiftsource, shiftdest;

int rightrank, leftrank, downrank, uprank;

MPI_Status status;

MPI_Comm comm_2d;

// 进入并行代码

MPI_Init(&argc, &argv);

MPI_Comm_size(MPI_COMM_WORLD, &npes);

MPI_Comm_rank(MPI_COMM_WORLD, &myrank);

if (myrank == 0)

printf("%d processors\n", npes);

if (argc != 2)

{

if (myrank == 0)

printf("Usage: %s \n", argv[0]);

MPI_Finalize();

exit(0);

}

// 小块的行列等于sqrt(处理器数目),处理器数目要求是平方数

dims[0] = sqrt(npes);

dims[1] = npes / dims[0];

if (dims[0] != dims[1])

{

if (myrank == 0)

printf("The number of processes must be perfect square.\n");

MPI_Finalize();

exit(0);

}

// logical array of size ndims specifying whether the grid is

// periodic (true) or not (false) in each dimension

periods[0] = periods[1] = 1;

MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &comm_2d);

MPI_Comm_rank(comm_2d, &my2drank);

MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);

n = atoi(argv[1]); // n 矩阵行列长度

nlocal = n / dims[0]; // nlocal 分块行列长度

a = (double*)malloc(nlocal*nlocal * sizeof(double));

b = (double *)malloc(nlocal*nlocal * sizeof(double));

c = (double *)malloc(nlocal*nlocal * sizeof(double));

if (DEBUG)

printf("%d: init matrix\n", myrank);

// 初始化矩阵内容

for (i = 0; i < nlocal*nlocal; i++) {

a[i] = myrank;

b[i] = myrank;

c[i] = 0.0;

}

if (DEBUG)

printf("%d: done initing matrix\n", myrank);

MPI_Barrier(MPI_COMM_WORLD);

// 对齐

if (DEBUG) {

printf("%d: x:%d,y:%d\n", myrank, mycoords[0], mycoords[1]);

}

MPI_Cart_shift(comm_2d, 0, -mycoords[1], &shiftsource, &shiftdest);

if (DEBUG) {

printf("%d: dest:%d,source:%d\n", myrank, shiftdest, shiftsource);

}

MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest, 1, shiftsource, 1, comm_2d, &status);

MPI_Barrier(comm_2d);

MPI_Cart_shift(comm_2d, 1, -mycoords[0], &shiftsource, &shiftdest);

if (DEBUG) {

printf("%d: dest:%d,source:%d\n", myrank, shiftdest, shiftsource);

}

MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE, shiftdest, 1, shiftsource, 1, comm_2d, &status);

if (DEBUG)

{

printf("%d: ready to start calculating\n", myrank);

}

MPI_Barrier(comm_2d);

// 计算

MPI_Cart_shift(comm_2d, 0, -1, &rightrank, &leftrank);

MPI_Cart_shift(comm_2d, 1, -1, &downrank, &uprank);

if (DEBUG)

{

printf("%d: right:%d, left:%d, up:%d, down:%d\n",

myrank, rightrank, leftrank, uprank, downrank);

}

for (i = 0; i < dims[0]; i++)

{

MPI_Barrier(comm_2d);

MatrixMultiplyAgg(nlocal, a, b, c);

MPI_Sendrecv_replace(a, nlocal*nlocal,

MPI_DOUBLE, leftrank, 1, rightrank, 1, comm_2d, &status);

MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE, uprank, 1, downrank, 1, comm_2d, &status);

}

MPI_Barrier(comm_2d);

// 从对齐后状态复原

MPI_Cart_shift(comm_2d, 0, -mycoords[1], &shiftsource, &shiftdest);

MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest, 1, shiftsource, 1, comm_2d, &status);

MPI_Barrier(comm_2d);

MPI_Cart_shift(comm_2d, 1, -mycoords[0], &shiftsource, &shiftdest);

MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE, shiftdest, 1, shiftsource, 1, comm_2d, &status);

MPI_Barrier(comm_2d);

MPI_Comm_free(&comm_2d);

if (DEBUG)

printf("%d: finish calculating\n",myrank);

MPI_Barrier(MPI_COMM_WORLD);

int rank = 0;

while (rank < npes) {

if (myrank == rank) {

printf("my rank: %d\n", myrank);

//printf("x:%d,y:%d\n", mycoords[0], mycoords[1]);

puts("Random Matrix A");

for (i = 0; i < nlocal; i++)

{

for (j = 0; j < nlocal; j++)

printf("%6.3f ", a[i*nlocal + j]);

printf("\n");

}

puts("Random Matrix B");

for (i = 0; i < nlocal; i++)

{

for (j = 0; j < nlocal; j++)

printf("%6.3f ", b[i*nlocal + j]);

printf("\n");

}

puts("Matrix C = A*B");

for (i = 0; i < nlocal; i++)

{

for (j = 0; j < nlocal; j++)

printf("%6.3f ", c[i*nlocal + j]);

printf("\n");

}

free(a);

free(b);

free(c);

}

rank++;

MPI_Barrier(MPI_COMM_WORLD);

}

MPI_Finalize();

return 0;

}

void MatrixMultiplyAgg(int n, double *a, double *b, double *c)

{

int i, j, k;

for (i = 0; i < n; i++)

for (j = 0; j < n; j++)

for (k = 0; k < n; k++)

c[i*n + j] += a[i*n + k] * b[k*n + j];

}

[email protected]:~/code/mpi$ mpiexec -n 4 ./cannon 4

4 processors

0: init matrix

1: init matrix

1: done initing matrix

2: init matrix

2: done initing matrix

3: init matrix

3: done initing matrix

0: done initing matrix

0: x:0,y:0

1: x:0,y:1

1: dest:3,source:3

2: x:1,y:0

2: dest:2,source:2

0: dest:0,source:0

3: x:1,y:1

3: dest:1,source:1

0: dest:0,source:0

1: dest:1,source:1

1: ready to start calculating

2: dest:3,source:3

0: ready to start calculating

2: ready to start calculating

3: dest:2,source:2

3: ready to start calculating

0: right:2, left:2, up:1, down:1

1: right:3, left:3, up:0, down:0

2: right:0, left:0, up:3, down:3

3: right:1, left:1, up:2, down:2

0: finish calculating

1: finish calculating

2: finish calculating

3: finish calculating

my rank: 0

Random Matrix A

0.000  0.000

0.000  0.000

Random Matrix B

0.000  0.000

0.000  0.000

Matrix C = A*B

4.000  4.000

4.000  4.000

my rank: 1

Random Matrix A

1.000  1.000

1.000  1.000

Random Matrix B

1.000  1.000

1.000  1.000

Matrix C = A*B

6.000  6.000

6.000  6.000

my rank: 2

Random Matrix A

2.000  2.000

2.000  2.000

Random Matrix B

2.000  2.000

2.000  2.000

Matrix C = A*B

12.000 12.000

12.000 12.000

my rank: 3

Random Matrix A

3.000  3.000

3.000  3.000

Random Matrix B

3.000  3.000

3.000  3.000

Matrix C = A*B

22.000 22.000

22.000 22.000

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值