矩阵乘法-MPI分块

最新推荐文章于 2022-05-02 21:59:02 发布

Rocky_myx

最新推荐文章于 2022-05-02 21:59:02 发布

阅读量1.2k

点赞数 1

分类专栏：并行计算（MPI） # 矩阵乘法文章标签： c语言并行计算

本文链接：https://blog.csdn.net/qq_33562636/article/details/106356902

版权

并行计算（MPI）同时被 2 个专栏收录

9 篇文章 1 订阅

订阅专栏

矩阵乘法

7 篇文章 0 订阅

订阅专栏

#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<string.h>
#include <mpi.h>
#include <malloc.h>
#include <math.h>
#include <time.h>

#define N 4



// Improvement 2: Block Multiplication, based on Original
// Main process: process 0, data distribution & collect calculate results, no calculation
// Others: calculate for a block of A * B


void matGene(int *A, int size, int actual_size) {
	// actual size: the matrix we use may have a larger dimension than n * n
	for (int i = 0; i < actual_size; i++) {
		for (int j = 0; j < actual_size; j++) {

			A[i * actual_size + j] = (int)(rand() % 10 + 1);

		}
	}
}



void printMatrix(int *array, int row, int col, int n)
{
	int *p = array;
	for (int y = 0; y < row; y++)
	{
		for (int x = 0; x < col; x++)
		{
			printf("%5d", p[x]);
		}
		p = p + n;
		printf("\n");
	}

}





void  multiplicateMatrix(int *array_A, int *array_B, int *array_C, int MM, int KK, int NN,int extend)
{
	/*
	printf("矩阵乘法中extend=%d,kk=%d", extend,KK);
	*/
	for (int i = 0; i < MM; i++)
	{
		for (int j = 0; j < NN; j++)
		{
			int sum = 0;
			for (int k = 0; k < KK; k++)
			{
				/*进行矩阵乘法测试 重点在 extend,正常应该是kk,即有多少列*/

				/*
				printf("i=%d,extend=%d\n", i, extend);
				printf("%d + array_A[%d] * array_B[%d] =%d+ %d* %d\n",sum, i*extend + k, k*NN + j,sum, array_A[i*extend + k], array_B[k*NN + j]);
				*/

				sum =sum + array_A[i*extend + k] * array_B[k*NN + j];
			}
			array_C[i*NN + j] = sum;              /*结果矩阵和*/
		//	printf("array_C[%d]=%d\n", i*NN + j, array_C[i*NN + j]);
		}
	}
}

int factor(int n) {
	// return the largest factor that is not larger than sqrt(n)
	double sqr_root = sqrt(n);
	for (int i = sqr_root; i >= 1; i--) {
		if (n % i == 0) return i;
	}
}

int main(int argc, char *argv[]) {
	// Only Deal With Square Matrixs

	// Calculate Parameters Definition
	int n = N; // matrix dimension
	// int beginRow, endRow; // the range of rows calculating in certain process
	double beginTime, endTime; // time record
	srand(time(NULL));
	double start, finish, time;
	// MPI Common Head
	int my_rank = 0, comm_sz = 0;
	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
	MPI_Status status;



	int *A, *B, *C;




	if (comm_sz == 1) { // no parallel

		A = (int*)malloc(n * n * sizeof(int));
		B = (int*)malloc(n * n * sizeof(int));
		C = (int*)malloc(n * n * sizeof(int));

		int saveN = n;

		matGene(A, saveN, n);
		matGene(B, saveN, n);
		printf("\n");
		printf("   Matrix_A: (%d×%d)\n", saveN, saveN);
		//		printMatrix(A, saveN, saveN,n);
		printf("   Matrix_B: (%d×%d)\n", saveN, saveN);
		//		printMatrix(B, saveN, saveN,n);
	

		printf("\n");
		printf("   ------------------------------------------------------------------------------------\n");
		printf("   Computing matrix product using multiplicateMatrix \n");
		printf("   ------------------------------------------------------------------------------------\n\n");



		start = clock();
		multiplicateMatrix(A, B, C, n, n, n, n);
		finish = clock();
		time = (double)(finish - start) / CLOCKS_PER_SEC;
		printf("   Matrix_C: (%d×%d)  CPU运行时间为：%lfs\n\n\n", saveN, saveN, time);
		

//		printMatrix(C, saveN, saveN,n);


		free(A);
		free(B);
		free(C);
	}

	else { // parallel: main process collect the result and also involve in calculation
		

		int saveN = n;



		// must equal scatter: actual n is bigger than input
		
		if (n % (comm_sz - 1) != 0) {
			n -= n % (comm_sz - 1);
			n += (comm_sz - 1);
		}

		int addnum = n - saveN;
		int a = (comm_sz - 1) / factor(comm_sz - 1);
		int b = factor(comm_sz - 1);
		int each_row = n / a;
		int each_column = n / b;


		if (my_rank == 0)
		{
			printf("a=%d  ", a);
			printf("b=%d  ", b);
			printf("each_row=%d  ", each_row);
			printf("each_column=%d  ", each_column);
			printf("extend n=%d  ", n);
			printf("addnum=%d\n", addnum);
			printf("comm_sz=%d  \n", comm_sz);
		}

		if (my_rank == 0) {


			A = (int*)malloc(n * n * sizeof(int));
			B = (int*)malloc(n * n * sizeof(int));
			C = (int*)malloc(n * n * sizeof(int));

			matGene(A, saveN, n);
			matGene(B, saveN, n);


			printf("\n");
			printf("   Matrix_A: (%d×%d)\n", saveN, saveN);
			//			printMatrix(A, saveN, saveN, n);
			printf("   Matrix_B: (%d×%d)\n", saveN, saveN);
			//			printMatrix(B, saveN, saveN, n);



			printf("\n");
			printf("   ------------------------------------------------------------------------------------\n");
			printf("   Computing matrix product using multiplicateMatrix_block \n");
			printf("   ------------------------------------------------------------------------------------\n\n");

			// Prepare data
		
			beginTime = MPI_Wtime();

			// include begin not end
			// beginRow = ((my_rank - 1) / b) * each_row, endRow = beginRow + each_row;
			// beginColumn = ((my_rank - 1) % b) * each_column, endColumn = beginColumn + each_column;

			// Send: A[beginRow:endRow, :], B[:, beginColumn:endColumn]
			

			for (int i = 1; i < comm_sz; i++)
			{
				int beginRow = ((i - 1) / b) * each_row;
				int beginColumn = ((i - 1) % b) * each_column;

				// A: beginRow ~ endRow
				MPI_Send(&A[beginRow * n], each_row * n, MPI_INT, i, i, MPI_COMM_WORLD);
				//  首地址                  字节数           类型    目标  消息标识符  通信域
				// B: n times, once a row
				for (int j = 0; j < n; j++)
				{
					MPI_Send(&B[j * n + beginColumn], each_column, MPI_INT, i, i * n + j + comm_sz, MPI_COMM_WORLD);
				}
			}




			// Recv: C[beginRow:endRow, beginColumn:endColumn]
			for (int i = 1; i < comm_sz; i++) {
				int beginRow = ((i - 1) / b) * each_row;
				int endRow = beginRow + each_row;
				int beginColumn = ((i - 1) % b) * each_column;


				for (int j = beginRow; j < endRow; j++)
				{
					MPI_Recv(&C[j * n + beginColumn], each_column, MPI_INT, i, each_row * i + (j - beginRow), MPI_COMM_WORLD, &status);
				}


			}

			endTime = MPI_Wtime();
		
			printf("   Matrix_C: (%d×%d)  CPU运行时间为：%lfs\n\n\n", saveN, saveN, endTime - beginTime);
		


//			printf("Matrix_C: (%d×%d)\n", saveN, saveN);
//			printMatrix(C, saveN, saveN, n);
	

			free(A);
			free(B);
			free(C);


		}

		else {
			int *partA, *partB, *partC;


			partA = (int*)malloc(each_row * n * sizeof(int));
			partB = (int*)malloc(n * each_column * sizeof(int));
			partC = (int*)malloc(each_row * each_column * sizeof(int));

			// include begin not end
			// beginRow = ((my_rank - 1) / b) * each_row, endRow = beginRow + each_row;
			// beginColumn = ((my_rank - 1) % b) * each_column, endColumn = beginColumn + each_column;

			// Recv: partA, partB
			MPI_Recv(&partA[0 * n + 0], each_row * n, MPI_INT, 0, my_rank, MPI_COMM_WORLD, &status);
			
			/*   收到A的 2行值
			printf("\n--partA----进程号：%d-----\n", my_rank);
			for (int i = 0; i < each_row; i++)
			{
				for (int j = 0; j < n; j++)
				{
					printf("%d  ", partA[i*n+j]);
				}
				printf("\n");
			}
			*/


			
			for (int j = 0; j < n; j++)
			{	
				MPI_Recv(&partB[j * each_column], each_column, MPI_INT, 0, my_rank * n + j + comm_sz, MPI_COMM_WORLD, &status);
			}

			/*
			//  收到B的值
			printf("\n--partB----进程号：%d-----\n", my_rank);
		
			for (int i = 0; i < n; i++)
			{
				for (int j = 0; j < each_column; j++)
				{
					printf("%d  ", partB[i * each_column + j]);
				}
				printf("\n");
			}
			*/


			multiplicateMatrix(partA, partB, partC, each_row, n-addnum, each_column,n);
			

		/*
			printf("\n--partc----进程号：%d-----\n", my_rank);
			
			for (int i = 0; i < each_row; i++)
			{
				for (int j = 0; j < each_column; j++)
				{
					printf("%d  ", partC[i * each_column + j]);
				}
				printf("\n");
			}
			*/
			

			





			// Send: partC
			for (int j = 0; j < each_row; j++) {
				MPI_Send(&partC[j * each_column], each_column, MPI_INT, 0, each_row * my_rank + j, MPI_COMM_WORLD);
			}

			free(partA);
			free(partB);
			free(partC);
		}

	}

	MPI_Finalize();
	return 0;
}

在这里插入图片描述

Rocky_myx

关注

1
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
矩阵乘法-MPI分块

#define _CRT_SECURE_NO_WARNINGS#include<stdio.h>#include<string.h>#include <mpi.h>#include <malloc.h>#include <math.h>#include <time.h>#define N 4// Improvement 2: Block Multiplication, based on Original// M
复制链接

扫一扫