矩阵乘的MPI并行程序

最新推荐文章于 2023-11-29 04:36:46 发布

zhuliting

最新推荐文章于 2023-11-29 04:36:46 发布

阅读量5.4k

点赞数

分类专栏：并行/高性能计算 C/C++ MPI 文章标签： matrix processing null algorithm parallel

本文链接：https://blog.csdn.net/zhuliting/article/details/7482996

版权

C/C++ 同时被 3 个专栏收录

47 篇文章 0 订阅

订阅专栏

MPI

29 篇文章 1 订阅

订阅专栏

并行/高性能计算

22 篇文章 3 订阅

订阅专栏

计算A * B = C，其中矩阵A, B均为方阵，采用主从式程序设计模式，用户从键盘输入矩阵规模N，然后随机数给矩阵赋值。

我的并行化方法很简单，将矩阵A按行划分，由主进程将矩阵A散发给各个子进程，各子进程把计算结果发送给主进程汇总。

其中的矩阵都是用一维数组来模拟的，代码附上，欢迎拍砖~

毕业了，才发现很喜欢并行计算啊~

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>

#define idx(row, col, N) ((row*N) + col)
#define PRN  40

void generate_data (int array[], int N) {

    int i;

    //srand((unsigned) time(NULL));
    for (i = 0; i < N*N; ++i) {
        array[i] = rand() % 10;
    }
}

void part (int dp[], int count[], int row) {
// myid: dp[myid-1]---dp[myid] -1, count[myid]
    int i, myid, numprocs;

    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    //if (myid == 0) printf("row = %d\n",row);
    // partition the domain
    int delta = row / (numprocs - 1);
    int res = row % (numprocs - 1);
    //if (myid == 0) printf("delta:%d, res:%d\n", delta, res);

    memset(dp, 0, sizeof(dp));
    for (i = 1; i < numprocs; ++i) {
        dp[i] = dp[i-1] + delta;
        if (i <= res) dp[i] += 1;
    }
    dp[numprocs - 1]= row;

    memset(count, 0, sizeof(count));
    for (i = 1; i < numprocs; ++i) {
        count[i] = dp[i] - dp[i-1];
    }

}

void print_matrix(int array[], int row, int col) {

    int i;
    for (i = 0; i < row * col; ++i) {
        printf("%d ", array[i]);
        if ((i+1) % col == 0) printf("\n");
    }
    printf("\n");
}

void matrix_mul (int a[], int b[], int  c[], int row, int col) {
    int i, j, k;
    for (i = 0; i < row; ++i)
        for (j = 0; j < col; ++j)
            for (k = 0; k < col; ++k) {
                c[idx(i,j,col)] += a[idx(i,k,col)] * b[idx(k,j,col)];
            }
}

int master () {

    int i;
    int myid, numprocs;
    int N; 
    int dp[100], count[100];

    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    // Read the size of square matrix from the keyboard
    printf("Please input the size of square matrix: ");
    scanf("%d", &N);
    MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
    //printf("Master process started !\n");

    MPI_Status *status= (MPI_Status *) malloc(sizeof(MPI_Status) * (numprocs -1));
    MPI_Request *req = (MPI_Request *) malloc(sizeof(MPI_Request) * (numprocs -1));

    part(dp, count, N);

    // Matrix A
    int *a = (int *) malloc(sizeof(int) * N * N);
    if  (NULL == a) {
        printf("Not enough memory for matrix A!\n");
        MPI_Abort(MPI_COMM_WORLD, 98);
    }
    generate_data(a, N);
    if (N <= PRN) {
        printf("Process Master generated matrix A:\n");
        print_matrix(a, N, N);
    }

    // Matrix B
    int *b = (int *) malloc(sizeof(int) * N * N);
    if  (NULL == b) {
        printf("Not enough memory for matrix B!\n");
        MPI_Abort(MPI_COMM_WORLD, 98);
    }
    generate_data(b, N);
    MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
    if (N <= PRN) {
        printf("Process Master generated matrix B:\n");
        print_matrix(b, N, N);
    }

    int *c = (int *) malloc(sizeof(int) * N * N);
    if  (NULL == c) {
        printf("Not enough memory for matrix C!\n");
        MPI_Abort(MPI_COMM_WORLD, 98);
    }
    memset(c, 0, N * N);

    // serial
    double start_time = MPI_Wtime();
    matrix_mul(a, b, c, N, N);
    double end_time = MPI_Wtime();
    printf("Processing time of serial algorithm: %lf s\n", end_time - start_time);
    if (N <= PRN) {
        printf("serial---A * B = :\n");
        print_matrix(c, N, N);
    }

    memset(c, 0, N * N);
    // parallel
    start_time = MPI_Wtime();

    for (i = 1; i < numprocs; ++i) {
        MPI_Isend(a+dp[i-1]*N, count[i]*N, MPI_INT, i, i, MPI_COMM_WORLD, &req[i-1]);
    }
    MPI_Waitall(numprocs -1, req, status);

    for (i = 1; i < numprocs; ++i) {
        MPI_Irecv(c+dp[i-1]*N, count[i]*N, MPI_INT, i, i, MPI_COMM_WORLD, &req[i-1]);
    }
    MPI_Waitall(numprocs -1, req, status);

    end_time = MPI_Wtime();
    printf("Processing time of parallel algorithm: %lf s\n", end_time - start_time);

    if (N <= PRN) {
        printf("parallel---A * B = :\n");
        print_matrix(c, N, N);
    }

    //printf("Free mameory...\n");
    free(status);
    free(req);
    free(a);
    free(b);
    free(c);

    return 0;
}

int slave () {

    int myid, numprocs;
    MPI_Status status;
    MPI_Request req;
    int dp[100], count[100];
    int N;

    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);

    part(dp, count, N);

    int *a = (int *) malloc(sizeof(int)*N*count[myid]);
    if  (NULL == a) {
        printf("Not enough memory for matrix A!\n");
        MPI_Abort(MPI_COMM_WORLD, 98);
    }
    memset(a, 0, sizeof(int)*N*count[myid]);

    int *c = (int *) malloc(sizeof(int)*N*count[myid]);
    if  (NULL == c) {
        printf("Not enough memory for matrix C!\n");
        MPI_Abort(MPI_COMM_WORLD, 98);
    }
    memset(c, 0, sizeof(int)*N*count[myid]);

    int *b = (int *) malloc(sizeof(int) * N * N);
    if  (NULL == b) {
        printf("Not enough memory for matrix B!\n");
        MPI_Abort(MPI_COMM_WORLD, 98);
    }
    MPI_Bcast(b, N * N, MPI_INT, 0, MPI_COMM_WORLD);
    //printf("Process %d received matrix B:\n", myid);
    /*if (N <= PRN) {
        print_matrix(b, N, N);
    }*/

    MPI_Irecv(a, count[myid]*N, MPI_INT, 0, myid, MPI_COMM_WORLD, &req);
    MPI_Wait(&req, &status);
    /*if (N <= PRN) {
        printf("Process %d received matrix A:\n", myid);
        print_matrix(a, count[myid], N);
    }*/

    matrix_mul(a, b, c, count[myid], N);
    /*
    if (N < MAXN) {
        printf("Process %d send  matrix C:\n", myid);
        print_matrix(c, count[myid], N);
    }
    */

    MPI_Isend(c, count[myid]*N, MPI_INT, 0, myid, MPI_COMM_WORLD, &req);
    MPI_Wait(&req, &status);
    //printf("Slave process %d started ! N = %d\n", myid, N);

    free(a);
    free(b);
    free(c);

    return 0;
}

int main(int argc, char **argv)
{
    int myid, numprocs;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    if (numprocs < 2) {
        printf("Processes must >= 2! \n");
        MPI_Abort(MPI_COMM_WORLD, 99);
    }

    if (0 == myid) { // process 0 as master process
        master();
    }
    else { // slave process
        slave();
    }
	
    MPI_Finalize();

    return 0;
}

zhuliting

关注

0
点赞
踩
10

收藏

觉得还不错? 一键收藏
6
评论
矩阵乘的MPI并行程序

计算A * B = C，其中矩阵A, B均为方阵，采用主从式程序设计模式，用户从键盘输入矩阵规模N，然后随机数给矩阵赋值。我的并行化方法很简单，将矩阵A按行划分，由主进程将矩阵A散发给各个子进程，各子进程把计算结果发送给主进程汇总。其中的矩阵都是用一维数组来模拟的，代码附上，欢迎拍砖~毕业了，才发现很喜欢并行计算啊~ #include "mpi.h"#include
复制链接

扫一扫