MPI和OpenMP实现矩阵相乘
一、MPI实现矩阵相乘
-
首先要配置要MPI多节点集群环境,没有配置好的可以参考我的上一篇文章
-
原理:
-
A × B = C A\times B=C A×B=C ,这里矩阵 A , B , C A,B,C A,B,C 大小都为 N × N N \times N N×N ,代码中默认为128
-
main节点将矩阵 A A A 按行分成多段后,发给salve节点
-
main节点将矩阵 B B B 整体发给slave节点
-
salve节点接受main节点传过来的 A , B A,B A,B 然后利用下面公式计算 C i j C_{ij} Cij 然后传回给main节点
-
C
i
j
=
∑
k
=
1
N
A
i
k
×
B
k
j
C_{ij} = \sum_{k=1}^{N}A_{ik}\times B_{kj}
Cij=k=1∑NAik×Bkj
3. 运行:
- 编写一个profile文件,里面是关于节点的配置,仅main节点需要,内容如下
node01:2
node02:2
-
编写源代码文件gemm.cpp,两个虚拟机都需要
-
编译gemm.cpp文件,两个虚拟机都需要编译
mpic++ gemm.cpp -o gemm
- mpi多节点运行,
-n
参数的不同可能导致错误,多换几个数,推荐为2
mpiexec -f profile -n 2 ./gemm
- 结果展示
more ans.txt
6. 代码
#include <iostream>
#include <fstream>
#include "mpi.h"
using namespace std;
const int maxn = 128; //定义矩阵大小
MPI_Status status;
double A[maxn][maxn], B[maxn][maxn], C[maxn][maxn];
int main(int argc, char **argv) {
int pCnt, pId, slaves, source, dest, rows, offset;
// 初始化MPI
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &pId);
MPI_Comm_size(MPI_COMM_WORLD, &pCnt);
// pId=0就是主节点
if (pId == 0) {
srand(time(NULL));
for (int i = 0; i < maxn; i++) {
for (int j = 0; j < maxn; j++) {
A[i][j] = rand() % 20;
B[i][j] = rand() % 20;
}
}
slaves = pCnt - 1;
rows = maxn / slaves;
offset = 0;
// 将数据传到slave节点
for (dest = 1; dest <= slaves; dest++) {
MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
MPI_Send(&A[offset][0], rows * maxn, MPI_DOUBLE, dest, 1, MPI_COMM_WORLD);
MPI_Send(&B, maxn * maxn, MPI_DOUBLE, dest, 1, MPI_COMM_WORLD);
offset = offset + rows;
}
// 从slave节点接受计算后的数据
for (int i = 1; i <= slaves; i++) {
source = i;
MPI_Recv(&offset, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
MPI_Recv(&C[offset][0], rows * maxn, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, &status);
}
// 定向输出到ans.txt文件
ofstream cout("ans.txt");
for (int i = 0; i < maxn; i++) {
for (int j = 0; j < maxn; j++)
cout << C[i][j] << "\t";
cout << endl;
}
}
// pId>0就是slave节点
if (pId > 0) {
source = 0;
// 从main节点接收数据
MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&A, rows * maxn, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
MPI_Recv(&B, maxn * maxn, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
// 矩阵相乘的计算
for (int k = 0; k < maxn; k++)
for (int i = 0; i < rows; i++) {
C[i][k] = 0.0;
for (int j = 0; j < maxn; j++)
C[i][k] = C[i][k] + A[i][j] * B[j][k];
}
// 将计算的结果发送回主节点
MPI_Send(&offset, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
MPI_Send(&C, rows * maxn, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
}
// 结束MPI
MPI_Finalize();
}
二、OpenMP实现矩阵相乘
-
不需要MPI集群环境,只需要一个节点就行
-
原理
- A × B = C A\times B=C A×B=C ,这里矩阵 A , B , C A,B,C A,B,C 大小都为 N × N N \times N N×N ,代码中默认为4
- 利用下面公式,其中线程间 i , j , k i,j,k i,j,k 私有, A , B , C A,B,C A,B,C 共享
C
i
j
=
A
i
k
×
B
k
j
C_{ij}=A_{ik}\times B_{kj}
Cij=Aik×Bkj
3. 编译运行
g++ ommp.cpp -fopenmp -o ommp
./ommp
4.结果展示
5.代码
#include <iostream>
#include <pthread.h>
#include <omp.h>
#include <sys/time.h>
#include <ctime>
using namespace std;
const int maxn = 4;
int A[maxn][maxn], B[maxn][maxn], C[maxn][maxn];
int main() {
int i, j, k;
omp_set_num_threads(omp_get_num_procs());
srand(time(NULL));
for (i = 0; i < maxn; i++)
for (j = 0; j < maxn; j++) {
A[i][j] = rand() % 10;
B[i][j] = rand() % 10;
}
#pragma omp parallel for private(i,j,k) shared(A,B,C)
for (i = 0; i < maxn; ++i)
for (j = 0; j < maxn; ++j)
for (k = 0; k < maxn; ++k)
C[i][j] += A[i][k] * B[k][j];
for (i = 0; i < maxn; i++) {
for (j = 0; j < maxn; j++)
cout << C[i][j] << "\t";
cout << endl;
}
}