计算A * B = C, 其中矩阵A, B均为方阵,采用主从式程序设计模式,用户从键盘输入矩阵规模N,然后随机数给矩阵赋值。
我的并行化方法很简单,将矩阵A按行划分,由主进程将矩阵A散发给各个子进程,各子进程把计算结果发送给主进程汇总。
其中的矩阵都是用一维数组来模拟的,代码附上,欢迎拍砖~
毕业了,才发现很喜欢并行计算啊~
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define idx(row, col, N) ((row*N) + col)
#define PRN 40
void generate_data (int array[], int N) {
int i;
//srand((unsigned) time(NULL));
for (i = 0; i < N*N; ++i) {
array[i] = rand() % 10;
}
}
void part (int dp[], int count[], int row) {
// myid: dp[myid-1]---dp[myid] -1, count[myid]
int i, myid, numprocs;
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
//if (myid == 0) printf("row = %d\n",row);
// partition the domain
int delta = row / (numprocs - 1);
int res = row % (numprocs - 1);
//if (myid == 0) printf("delta:%d, res:%d\n", delta, res);
memset(dp, 0, sizeof(dp));
for (i = 1; i < numprocs; ++i) {
dp[i] = dp[i-1] + delta;
if (i <= res) dp[i] += 1;
}
dp[numprocs - 1]= row;
memset(count, 0, sizeof(count));
for (i = 1; i < numprocs; ++i) {
count[i] = dp[i] - dp[i-1];
}
}
void print_matrix(int array[], int row, int col) {
int i;
for (i = 0; i < row * col; ++i) {
printf("%d ", array[i]);
if ((i+1) % col == 0) printf("\n");
}
printf("\n");
}
void matrix_mul (int a[], int b[], int c[], int row, int col) {
int i, j, k;
for (i = 0; i < row; ++i)
for (j = 0; j < col; ++j)
for (k = 0; k < col; ++k) {
c[idx(i,j,col)] += a[idx(i,k,col)] * b[idx(k,j,col)];
}
}
int master () {
int i;
int myid, numprocs;
int N;
int dp[100], count[100];
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
// Read the size of square matrix from the keyboard
printf("Please input the size of square matrix: ");
scanf("%d", &N);
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
//printf("Master process started !\n");
MPI_Status *status= (MPI_Status *) malloc(sizeof(MPI_Status) * (numprocs -1));
MPI_Request *req = (MPI_Request *) malloc(sizeof(MPI_Request) * (numprocs -1));
part(dp, count, N);
// Matrix A
int *a = (int *) malloc(sizeof(int) * N * N);
if (NULL == a) {
printf("Not enough memory for matrix A!\n");
MPI_Abort(MPI_COMM_WORLD, 98);
}
generate_data(a, N);
if (N <= PRN) {
printf("Process Master generated matrix A:\n");
print_matrix(a, N, N);
}
// Matrix B
int *b = (int *) malloc(sizeof(int) * N * N);
if (NULL == b) {
printf("Not enough memory for matrix B!\n");
MPI_Abort(MPI_COMM_WORLD, 98);
}
generate_data(b, N);
MPI_Bcast(b, N*N, MPI_INT, 0, MPI_COMM_WORLD);
if (N <= PRN) {
printf("Process Master generated matrix B:\n");
print_matrix(b, N, N);
}
int *c = (int *) malloc(sizeof(int) * N * N);
if (NULL == c) {
printf("Not enough memory for matrix C!\n");
MPI_Abort(MPI_COMM_WORLD, 98);
}
memset(c, 0, N * N);
// serial
double start_time = MPI_Wtime();
matrix_mul(a, b, c, N, N);
double end_time = MPI_Wtime();
printf("Processing time of serial algorithm: %lf s\n", end_time - start_time);
if (N <= PRN) {
printf("serial---A * B = :\n");
print_matrix(c, N, N);
}
memset(c, 0, N * N);
// parallel
start_time = MPI_Wtime();
for (i = 1; i < numprocs; ++i) {
MPI_Isend(a+dp[i-1]*N, count[i]*N, MPI_INT, i, i, MPI_COMM_WORLD, &req[i-1]);
}
MPI_Waitall(numprocs -1, req, status);
for (i = 1; i < numprocs; ++i) {
MPI_Irecv(c+dp[i-1]*N, count[i]*N, MPI_INT, i, i, MPI_COMM_WORLD, &req[i-1]);
}
MPI_Waitall(numprocs -1, req, status);
end_time = MPI_Wtime();
printf("Processing time of parallel algorithm: %lf s\n", end_time - start_time);
if (N <= PRN) {
printf("parallel---A * B = :\n");
print_matrix(c, N, N);
}
//printf("Free mameory...\n");
free(status);
free(req);
free(a);
free(b);
free(c);
return 0;
}
int slave () {
int myid, numprocs;
MPI_Status status;
MPI_Request req;
int dp[100], count[100];
int N;
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);
part(dp, count, N);
int *a = (int *) malloc(sizeof(int)*N*count[myid]);
if (NULL == a) {
printf("Not enough memory for matrix A!\n");
MPI_Abort(MPI_COMM_WORLD, 98);
}
memset(a, 0, sizeof(int)*N*count[myid]);
int *c = (int *) malloc(sizeof(int)*N*count[myid]);
if (NULL == c) {
printf("Not enough memory for matrix C!\n");
MPI_Abort(MPI_COMM_WORLD, 98);
}
memset(c, 0, sizeof(int)*N*count[myid]);
int *b = (int *) malloc(sizeof(int) * N * N);
if (NULL == b) {
printf("Not enough memory for matrix B!\n");
MPI_Abort(MPI_COMM_WORLD, 98);
}
MPI_Bcast(b, N * N, MPI_INT, 0, MPI_COMM_WORLD);
//printf("Process %d received matrix B:\n", myid);
/*if (N <= PRN) {
print_matrix(b, N, N);
}*/
MPI_Irecv(a, count[myid]*N, MPI_INT, 0, myid, MPI_COMM_WORLD, &req);
MPI_Wait(&req, &status);
/*if (N <= PRN) {
printf("Process %d received matrix A:\n", myid);
print_matrix(a, count[myid], N);
}*/
matrix_mul(a, b, c, count[myid], N);
/*
if (N < MAXN) {
printf("Process %d send matrix C:\n", myid);
print_matrix(c, count[myid], N);
}
*/
MPI_Isend(c, count[myid]*N, MPI_INT, 0, myid, MPI_COMM_WORLD, &req);
MPI_Wait(&req, &status);
//printf("Slave process %d started ! N = %d\n", myid, N);
free(a);
free(b);
free(c);
return 0;
}
int main(int argc, char **argv)
{
int myid, numprocs;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
if (numprocs < 2) {
printf("Processes must >= 2! \n");
MPI_Abort(MPI_COMM_WORLD, 99);
}
if (0 == myid) { // process 0 as master process
master();
}
else { // slave process
slave();
}
MPI_Finalize();
return 0;
}