MPI+Visual Studio实现矩阵相乘

最新推荐文章于 2022-10-19 17:55:10 发布

T Lai

最新推荐文章于 2022-10-19 17:55:10 发布

阅读量1.4k

点赞数 1

分类专栏：高性能学习

本文链接：https://blog.csdn.net/weixin_41784648/article/details/105341686

版权

高性能学习专栏收录该内容

1 篇文章 0 订阅

订阅专栏

为了完成高性能计算课程作业,学习了一下以前从来没有接触过的MPI、Pthread和OpenMP，现将MPI实现矩阵乘法的过程记录如下：

运行环境：win10系统 cpu:AMD E2-3800 APU with Radeon(TM) HD Grapics 1.30GHZ 四核四线程 (家里的台式机十年前买的，配置十分垃圾请谅解)。

使用语言：C++

使用工具：Visual Studio 2019

一、配置Visual Studio 2019

详细过程请自行百度，我这里只说一下大致过程：先下载Visual studio、MPI然后安装好——>配置Visual Studio：项目属性-配置属性-VC++目录，在包含目录和库目录中，分别添加MPI安装目录中的include和lib文件夹。

二、编程运行

运行结果：

矩阵：A[500]xB[100]

可发现当进程数增加为10后，运行时间反而增加了。原因是因为当矩阵较小时，用于将切分好的矩阵广播出去的

for循环次数会随着进程的数量增加而增加，进程越多，循环次数越多。for循环浪费的时间大于进程计算节约

出来的时间。所以增加矩阵大小。

矩阵：A[1000]xB[100]

矩阵：A[2000]xB[100]

果然，当矩阵的行数增加到2000时，用10个进程计算的时间要小于用5个进程计算的时间。

对比可以说明了多进程计算比少量进程速度快的结果。

三、程序

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include "random"
#include <fstream>
#include <iostream>
using namespace std;

const int rows = 500;  //the rows of matrix
const int cols = 100;  //the cols of matrix

int main(int argc, char* argv[])
{
    int i, j, k, myid, numprocs, anstag;
    int A[rows][cols], B[cols], C[rows];
    int masterpro, buf[cols], ans, cnt;
    double starttime, endtime;
    double tmp, totaltime;

    MPI_Status status;
    masterpro = 0;
    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
   
    if (numprocs < 2) {
        printf("Error:Too few processes!\n");
        MPI_Abort(MPI_COMM_WORLD, 99);
    }
    starttime = MPI_Wtime();
    if (myid == masterpro) {
            
        default_random_engine engine;

        uniform_real_distribution<double> u(0.0, 1.0);

        for (int i = 0; i < rows; ++i) {

            for (int j = 0; j < cols; ++j) {
                A[i][j] = u(engine);
            }
        }
        for (int i = 0; i < cols; ++i) {
                    B[i] = u(engine);
        }
        
        //bcast the B vector to all slave processor
        MPI_Bcast(B, cols, MPI_INT, masterpro, MPI_COMM_WORLD);
        //partition the A matrix to all slave processor
        for (i = 1; i < numprocs; i++)
        {
            for (k = i - 1; k < rows; k += numprocs - 1)
            {
                for (j = 0; j < cols; j++)
                {
                    buf[j] = A[k][j];
                }
                MPI_Send(buf, cols, MPI_INT, i, k, MPI_COMM_WORLD);
            }
        }
    }
    else {
        //starttime = MPI_Wtime();
        MPI_Bcast(B, cols, MPI_INT, masterpro, MPI_COMM_WORLD);
        //every processor receive the part of A matrix,and make Mul operator with B vector
        for (i = myid - 1; i < rows; i += numprocs - 1) {
            MPI_Recv(buf, cols, MPI_INT, masterpro, i, MPI_COMM_WORLD, &status);
            ans = 0;

            for (j = 0; j < cols; j++)
            {
                ans += buf[j] * B[j];
            }
            //send back the result
            MPI_Send(&ans, 1, MPI_INT, masterpro, i, MPI_COMM_WORLD);
        }
        //endtime = MPI_Wtime();
        //tmp = endtime-starttime;
    }
    if (myid == masterpro) {
        //receive the result from all slave processor
        for (i = 0; i < rows; i++)
        {
            MPI_Recv(&ans, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
            //sender = status.MPI_SOURCE;
            anstag = status.MPI_TAG;
            C[anstag] = ans;
        }
        
    }
   
    endtime = MPI_Wtime();
    totaltime = endtime - starttime;
    if (myid == masterpro)
        printf("total time:%f s.\n", totaltime);
    MPI_Finalize();
    return 0;
}