#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<string.h>
#include <mpi.h>
#include <malloc.h>
#include <math.h>
#include <time.h>
#define N 4
// Improvement 2: Block Multiplication, based on Original
// Main process: process 0, data distribution & collect calculate results, no calculation
// Others: calculate for a block of A * B
void matGene(int *A, int size, int actual_size) {
// actual size: the matrix we use may have a larger dimension than n * n
for (int i = 0; i < actual_size; i++) {
for (int j = 0; j < actual_size; j++) {
A[i * actual_size + j] = (int)(rand() % 10 + 1);
}
}
}
void printMatrix(int *array, int row, int col, int n)
{
int *p = array;
for (int y = 0; y < row; y++)
{
for (int x = 0; x < col; x++)
{
printf("%5d", p[x]);
}
p = p + n;
printf("\n");
}
}
void multiplicateMatrix(int *array_A, int *array_B, int *array_C, int MM, int KK, int NN,int extend)
{
/*
printf("矩阵乘法中extend=%d,kk=%d", extend,KK);
*/
for (int i = 0; i < MM; i++)
{
for (int j = 0; j < NN; j++)
{
int sum = 0;
for (int k = 0; k < KK; k++)
{
/*进行矩阵乘法测试 重点在 extend,正常应该是kk,即有多少列*/
/*
printf("i=%d,extend=%d\n", i, extend);
printf("%d + array_A[%d] * array_B[%d] =%d+ %d* %d\n",sum, i*extend + k, k*NN + j,sum, array_A[i*extend + k], array_B[k*NN + j]);
*/
sum =sum + array_A[i*extend + k] * array_B[k*NN + j];
}
array_C[i*NN + j] = sum; /*结果矩阵和*/
// printf("array_C[%d]=%d\n", i*NN + j, array_C[i*NN + j]);
}
}
}
int factor(int n) {
// return the largest factor that is not larger than sqrt(n)
double sqr_root = sqrt(n);
for (int i = sqr_root; i >= 1; i--) {
if (n % i == 0) return i;
}
}
int main(int argc, char *argv[]) {
// Only Deal With Square Matrixs
// Calculate Parameters Definition
int n = N; // matrix dimension
// int beginRow, endRow; // the range of rows calculating in certain process
double beginTime, endTime; // time record
srand(time(NULL));
double start, finish, time;
// MPI Common Head
int my_rank = 0, comm_sz = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
MPI_Status status;
int *A, *B, *C;
if (comm_sz == 1) { // no parallel
A = (int*)malloc(n * n * sizeof(int));
B = (int*)malloc(n * n * sizeof(int));
C = (int*)malloc(n * n * sizeof(int));
int saveN = n;
matGene(A, saveN, n);
matGene(B, saveN, n);
printf("\n");
printf(" Matrix_A: (%d×%d)\n", saveN, saveN);
// printMatrix(A, saveN, saveN,n);
printf(" Matrix_B: (%d×%d)\n", saveN, saveN);
// printMatrix(B, saveN, saveN,n);
printf("\n");
printf(" ------------------------------------------------------------------------------------\n");
printf(" Computing matrix product using multiplicateMatrix \n");
printf(" ------------------------------------------------------------------------------------\n\n");
start = clock();
multiplicateMatrix(A, B, C, n, n, n, n);
finish = clock();
time = (double)(finish - start) / CLOCKS_PER_SEC;
printf(" Matrix_C: (%d×%d) CPU运行时间为:%lfs\n\n\n", saveN, saveN, time);
// printMatrix(C, saveN, saveN,n);
free(A);
free(B);
free(C);
}
else { // parallel: main process collect the result and also involve in calculation
int saveN = n;
// must equal scatter: actual n is bigger than input
if (n % (comm_sz - 1) != 0) {
n -= n % (comm_sz - 1);
n += (comm_sz - 1);
}
int addnum = n - saveN;
int a = (comm_sz - 1) / factor(comm_sz - 1);
int b = factor(comm_sz - 1);
int each_row = n / a;
int each_column = n / b;
if (my_rank == 0)
{
printf("a=%d ", a);
printf("b=%d ", b);
printf("each_row=%d ", each_row);
printf("each_column=%d ", each_column);
printf("extend n=%d ", n);
printf("addnum=%d\n", addnum);
printf("comm_sz=%d \n", comm_sz);
}
if (my_rank == 0) {
A = (int*)malloc(n * n * sizeof(int));
B = (int*)malloc(n * n * sizeof(int));
C = (int*)malloc(n * n * sizeof(int));
matGene(A, saveN, n);
matGene(B, saveN, n);
printf("\n");
printf(" Matrix_A: (%d×%d)\n", saveN, saveN);
// printMatrix(A, saveN, saveN, n);
printf(" Matrix_B: (%d×%d)\n", saveN, saveN);
// printMatrix(B, saveN, saveN, n);
printf("\n");
printf(" ------------------------------------------------------------------------------------\n");
printf(" Computing matrix product using multiplicateMatrix_block \n");
printf(" ------------------------------------------------------------------------------------\n\n");
// Prepare data
beginTime = MPI_Wtime();
// include begin not end
// beginRow = ((my_rank - 1) / b) * each_row, endRow = beginRow + each_row;
// beginColumn = ((my_rank - 1) % b) * each_column, endColumn = beginColumn + each_column;
// Send: A[beginRow:endRow, :], B[:, beginColumn:endColumn]
for (int i = 1; i < comm_sz; i++)
{
int beginRow = ((i - 1) / b) * each_row;
int beginColumn = ((i - 1) % b) * each_column;
// A: beginRow ~ endRow
MPI_Send(&A[beginRow * n], each_row * n, MPI_INT, i, i, MPI_COMM_WORLD);
// 首地址 字节数 类型 目标 消息标识符 通信域
// B: n times, once a row
for (int j = 0; j < n; j++)
{
MPI_Send(&B[j * n + beginColumn], each_column, MPI_INT, i, i * n + j + comm_sz, MPI_COMM_WORLD);
}
}
// Recv: C[beginRow:endRow, beginColumn:endColumn]
for (int i = 1; i < comm_sz; i++) {
int beginRow = ((i - 1) / b) * each_row;
int endRow = beginRow + each_row;
int beginColumn = ((i - 1) % b) * each_column;
for (int j = beginRow; j < endRow; j++)
{
MPI_Recv(&C[j * n + beginColumn], each_column, MPI_INT, i, each_row * i + (j - beginRow), MPI_COMM_WORLD, &status);
}
}
endTime = MPI_Wtime();
printf(" Matrix_C: (%d×%d) CPU运行时间为:%lfs\n\n\n", saveN, saveN, endTime - beginTime);
// printf("Matrix_C: (%d×%d)\n", saveN, saveN);
// printMatrix(C, saveN, saveN, n);
free(A);
free(B);
free(C);
}
else {
int *partA, *partB, *partC;
partA = (int*)malloc(each_row * n * sizeof(int));
partB = (int*)malloc(n * each_column * sizeof(int));
partC = (int*)malloc(each_row * each_column * sizeof(int));
// include begin not end
// beginRow = ((my_rank - 1) / b) * each_row, endRow = beginRow + each_row;
// beginColumn = ((my_rank - 1) % b) * each_column, endColumn = beginColumn + each_column;
// Recv: partA, partB
MPI_Recv(&partA[0 * n + 0], each_row * n, MPI_INT, 0, my_rank, MPI_COMM_WORLD, &status);
/* 收到A的 2行值
printf("\n--partA----进程号:%d-----\n", my_rank);
for (int i = 0; i < each_row; i++)
{
for (int j = 0; j < n; j++)
{
printf("%d ", partA[i*n+j]);
}
printf("\n");
}
*/
for (int j = 0; j < n; j++)
{
MPI_Recv(&partB[j * each_column], each_column, MPI_INT, 0, my_rank * n + j + comm_sz, MPI_COMM_WORLD, &status);
}
/*
// 收到B的值
printf("\n--partB----进程号:%d-----\n", my_rank);
for (int i = 0; i < n; i++)
{
for (int j = 0; j < each_column; j++)
{
printf("%d ", partB[i * each_column + j]);
}
printf("\n");
}
*/
multiplicateMatrix(partA, partB, partC, each_row, n-addnum, each_column,n);
/*
printf("\n--partc----进程号:%d-----\n", my_rank);
for (int i = 0; i < each_row; i++)
{
for (int j = 0; j < each_column; j++)
{
printf("%d ", partC[i * each_column + j]);
}
printf("\n");
}
*/
// Send: partC
for (int j = 0; j < each_row; j++) {
MPI_Send(&partC[j * each_column], each_column, MPI_INT, 0, each_row * my_rank + j, MPI_COMM_WORLD);
}
free(partA);
free(partB);
free(partC);
}
}
MPI_Finalize();
return 0;
}
矩阵乘法-MPI分块
最新推荐文章于 2022-05-02 21:59:02 发布