cannon算法的mpi_cannon算法的原理及MPI C语言实现

最新推荐文章于 2022-06-01 16:14:50 发布

Gee Law

最新推荐文章于 2022-06-01 16:14:50 发布

阅读量739

点赞数

文章标签： cannon算法的mpi

本文链接：https://blog.csdn.net/weixin_42312133/article/details/112041830

版权

该博客介绍了Cannon算法的MPI版本，适用于并行计算中的矩阵乘法。文章首先阐述了算法的前提条件，包括矩阵大小、处理器分配等，并提到对齐的重要性。接着，展示了C语言代码实现，包括矩阵初始化、对齐操作、计算过程以及恢复对齐状态。然而，代码中存在一些缺陷，如未实现数据输入与输出、结果收集和高效串行矩阵乘法。代码适用于处理器数目为平方数且矩阵大小可被处理器数量开根号整除的情况。

摘要由CSDN通过智能技术生成

前提(可以看了下面回来再看)：

1. 矩阵A是一个n*n方阵

2. 有p个处理器，每个处理器得到n/sqrt(p)个数据，(注意：此处好像要求n是一个平方数。因为cannon要求每个分块大小一样)

3. 一个高效的串行矩阵乘法算法(dgemm，sgemm)，用于计算块与块之间相乘

看下面链接

关于对齐再补充一点方便理解：

…对齐的目的就是对齐，不对齐就没法算

实现见下，缺陷有：

1. 没有实现读入数据和数据的分发

2. 没有实现数据的收集和整合(或者好看的打印)

3. 使用的串行矩阵计算代码效率较低

注意：processor数目要求是平方数，n要求是可以被sqrt(processor数量)乘除的数

本代码用课程ppt上的代码改的，需要的随便用

#include

#include "mpi.h"

#define DEBUG 1

void MatrixMultiplyAgg(int n, double *a, double *b, double *c);

main(int argc, char *argv[])

{

int i, j, k, m, p;

int n, nlocal;

double *a, *b, *c;

int npes, dims[2], periods[2];

int myrank, my2drank, mycoords[2];

int shiftsource, shiftdest;

int rightrank, leftrank, downrank, uprank;

MPI_Status status;

MPI_Comm comm_2d;

// 进入并行代码

MPI_Init(&argc, &argv);

MPI_Comm_size(MPI_COMM_WORLD, &npes);

MPI_Comm_rank(MPI_COMM_WORLD, &myrank);

if (myrank == 0)

printf("%d processors\n", npes);

if (argc != 2)

{

if (myrank == 0)

printf("Usage: %s \n", argv[0]);

MPI_Finalize();

exit(0);

}

// 小块的行列等于sqrt(处理器数目)，处理器数目要求是平方数

dims[0] = sqrt(npes);

dims[1] = npes / dims[0];

if (dims[0] != dims[1])

{

if (myrank == 0)

printf("The number of processes must be perfect square.\n");

MPI_Finalize();

exit(0);

}

// logical array of size ndims specifying whether the grid is

// periodic (true) or not (false) in each dimension

periods[0] = periods[1] = 1;

MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &comm_2d);

MPI_Comm_rank(comm_2d, &my2drank);

MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);

n = atoi(argv[1]); // n 矩阵行列长度

nlocal = n / dims[0]; // nlocal 分块行列长度

a = (double*)malloc(nlocal*nlocal * sizeof(double));

b = (double *)malloc(nlocal*nlocal * sizeof(double));

c = (double *)malloc(nlocal*nlocal * sizeof(double));

if (DEBUG)

printf("%d: init matrix\n", myrank);

// 初始化矩阵内容

for (i = 0; i < nlocal*nlocal; i++) {

a[i] = myrank;

b[i] = myrank;

c[i] = 0.0;

}

if (DEBUG)

printf("%d: done initing matrix\n", myrank);

MPI_Barrier(MPI_COMM_WORLD);

// 对齐

if (DEBUG) {

printf("%d: x:%d,y:%d\n", myrank, mycoords[0], mycoords[1]);

}

MPI_Cart_shift(comm_2d, 0, -mycoords[1], &shiftsource, &shiftdest);

if (DEBUG) {

printf("%d: dest:%d,source:%d\n", myrank, shiftdest, shiftsource);

}

MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest, 1, shiftsource, 1, comm_2d, &status);

MPI_Barrier(comm_2d);

MPI_Cart_shift(comm_2d, 1, -mycoords[0], &shiftsource, &shiftdest);

if (DEBUG) {

printf("%d: dest:%d,source:%d\n", myrank, shiftdest, shiftsource);

}

MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE, shiftdest, 1, shiftsource, 1, comm_2d, &status);

if (DEBUG)

{

printf("%d: ready to start calculating\n", myrank);

}

MPI_Barrier(comm_2d);

// 计算

MPI_Cart_shift(comm_2d, 0, -1, &rightrank, &leftrank);

MPI_Cart_shift(comm_2d, 1, -1, &downrank, &uprank);

if (DEBUG)

{

printf("%d: right:%d, left:%d, up:%d, down:%d\n",

myrank, rightrank, leftrank, uprank, downrank);

}

for (i = 0; i < dims[0]; i++)

{

MPI_Barrier(comm_2d);

MatrixMultiplyAgg(nlocal, a, b, c);

MPI_Sendrecv_replace(a, nlocal*nlocal,

MPI_DOUBLE, leftrank, 1, rightrank, 1, comm_2d, &status);

MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE, uprank, 1, downrank, 1, comm_2d, &status);

}

MPI_Barrier(comm_2d);

// 从对齐后状态复原

MPI_Cart_shift(comm_2d, 0, -mycoords[1], &shiftsource, &shiftdest);

MPI_Sendrecv_replace(a, nlocal*nlocal, MPI_DOUBLE, shiftdest, 1, shiftsource, 1, comm_2d, &status);

MPI_Barrier(comm_2d);

MPI_Cart_shift(comm_2d, 1, -mycoords[0], &shiftsource, &shiftdest);

MPI_Sendrecv_replace(b, nlocal*nlocal, MPI_DOUBLE, shiftdest, 1, shiftsource, 1, comm_2d, &status);

MPI_Barrier(comm_2d);

MPI_Comm_free(&comm_2d);

if (DEBUG)

printf("%d: finish calculating\n",myrank);

MPI_Barrier(MPI_COMM_WORLD);

int rank = 0;

while (rank < npes) {

if (myrank == rank) {

printf("my rank: %d\n", myrank);

//printf("x:%d,y:%d\n", mycoords[0], mycoords[1]);

puts("Random Matrix A");

for (i = 0; i < nlocal; i++)

{

for (j = 0; j < nlocal; j++)

printf("%6.3f ", a[i*nlocal + j]);

printf("\n");

}

puts("Random Matrix B");

for (i = 0; i < nlocal; i++)

{

for (j = 0; j < nlocal; j++)

printf("%6.3f ", b[i*nlocal + j]);

printf("\n");

}

puts("Matrix C = A*B");

for (i = 0; i < nlocal; i++)

{

for (j = 0; j < nlocal; j++)

printf("%6.3f ", c[i*nlocal + j]);

printf("\n");

}

free(a);

free(b);

free(c);

}

rank++;

MPI_Barrier(MPI_COMM_WORLD);

}

MPI_Finalize();

return 0;

}

void MatrixMultiplyAgg(int n, double *a, double *b, double *c)

{

int i, j, k;

for (i = 0; i < n; i++)

for (j = 0; j < n; j++)

for (k = 0; k < n; k++)

c[i*n + j] += a[i*n + k] * b[k*n + j];

}

[email protected]:~/code/mpi$ mpiexec -n 4 ./cannon 4

4 processors

0: init matrix

1: init matrix

1: done initing matrix

2: init matrix

2: done initing matrix

3: init matrix

3: done initing matrix

0: done initing matrix

0: x:0,y:0

1: x:0,y:1

1: dest:3,source:3

2: x:1,y:0

2: dest:2,source:2

0: dest:0,source:0

3: x:1,y:1

3: dest:1,source:1

0: dest:0,source:0

1: dest:1,source:1

1: ready to start calculating

2: dest:3,source:3

0: ready to start calculating

2: ready to start calculating

3: dest:2,source:2

3: ready to start calculating

0: right:2, left:2, up:1, down:1

1: right:3, left:3, up:0, down:0

2: right:0, left:0, up:3, down:3

3: right:1, left:1, up:2, down:2

0: finish calculating

1: finish calculating

2: finish calculating

3: finish calculating

my rank: 0

Random Matrix A

0.000 0.000

Random Matrix B

0.000 0.000

Matrix C = A*B

4.000 4.000

my rank: 1

Random Matrix A

1.000 1.000

Random Matrix B

1.000 1.000

Matrix C = A*B

6.000 6.000

my rank: 2

Random Matrix A

2.000 2.000

Random Matrix B

2.000 2.000

Matrix C = A*B

12.000 12.000

my rank: 3

Random Matrix A

3.000 3.000

Random Matrix B

3.000 3.000

Matrix C = A*B

22.000 22.000

Gee Law

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫