mpi矩阵向量乘

平台: 银河二超算系统
CPU 型号 Intel Xeon E5-2692 12C 2.200GHz
采用 TH Express-2 高速互连
Mpi版本:MPI/Intel/MPICH/3.2-icc14-dyn

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mpi.h"
#include <cmath>

#define MATRIX_ROW pow(2,12)
#define MATRIX_COM pow(2,12)

void Mat_vect_mul(

	double local_A[], // in

	double local_x[], // in

	double local_y[], // out

	size_t local_m, // in

	size_t n, // in

	size_t local_n, // in

	MPI_Comm comm // in

)

{
	double* x = (double*)malloc(n * sizeof(double));
	size_t local_i, j;
	size_t local_ok = 1;
	MPI_Allgather(local_x, local_n, MPI_DOUBLE, x, local_n, MPI_DOUBLE, comm);

	for (local_i = 0; local_i < local_m; local_i++) {
		local_y[local_i] = 0.0;

		for (j = 0; j < n; j++)
			local_y[local_i] += local_A[local_i * n + j] * x[j];
	}
	free(x);
}

void generateA(double* global_A, size_t size)
{
	double temp = 1.0;
	size_t i = 0;
	for (i = 0; i < size; i++)
	{
		global_A[i] = temp;
		temp += 0.1;
	}
}

void generateLocalA2(double* local_A, int my_rank, size_t size)
{
	double temp = 1.0;
	size_t i = 0;
	for (i = 0; i < size; i++)
	{
		local_A[i] = temp;
		temp += 0.1;
	}
}

void generateX(double* global_x, size_t size)
{
	double temp = 1.0;
	size_t i = 0;
	for (i = 0; i < size; i++)
		global_x[i] = temp;
}

void generateLocalA(double* local_A, double* global_A, size_t my_rank, size_t n, size_t local_m) {
	size_t j = 0;
	size_t i = 0;
	for (i = my_rank * n; i < (my_rank + local_m) * n; i++)
	{
		local_A[j] = global_A[i];

		j++;
	}

}

void generateLocalX(double* local_x, double* global_x, size_t my_rank, size_t local_n) {
	size_t j = 0;
	size_t i = 0;
	for (i = my_rank * local_n; i < (my_rank + 1) * local_n; i++)
	{
		local_x[j] = global_x[i];

		j++;
	}
}

int main(void) {
	int comm_size;
	int my_rank;

	MPI_Init(NULL, NULL);
	MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

	// global values
	size_t m = MATRIX_ROW;
	size_t n = MATRIX_COM;
	// double* global_A = (double*)malloc(m * n * sizeof(double));
	// if (!global_A) { perror("malloc arr"); exit(EXIT_FAILURE); };
	double* global_x = (double*)malloc(n * sizeof(double));
	if (!global_x) { perror("malloc arr"); exit(EXIT_FAILURE); };
	double* global_y = (double*)malloc(m * sizeof(double));
	if (!global_y) { perror("malloc arr"); exit(EXIT_FAILURE); };

	// generateA(global_A, m * n);
	generateX(global_x, n);


	// local values
	size_t local_m = m / comm_size;
	size_t local_n = n / comm_size;
	double* local_A = (double*)malloc(local_m * n * sizeof(double));
	if (!local_A) { perror("malloc arr"); exit(EXIT_FAILURE); };
	double* local_x = (double*)malloc(local_n * sizeof(double));
	if (!local_x) { perror("malloc arr"); exit(EXIT_FAILURE); };
	double* local_y = (double*)malloc(local_m * sizeof(double));
	if (!local_y) { perror("malloc arr"); exit(EXIT_FAILURE); };

	// generateLocalA(local_A, global_A, my_rank, n, local_m);
	generateLocalA2(local_A, my_rank, local_m * n);
	generateLocalX(local_x, global_x, my_rank, local_n);

	double start, end;
	if (my_rank == 0) {
		// Time Begin 
		start = MPI_Wtime();
	}
	Mat_vect_mul(local_A, local_x, local_y, local_m, n, local_n, MPI_COMM_WORLD);

	double* y = (double*)malloc(m * sizeof(double));

	MPI_Allgather(local_y, local_m, MPI_DOUBLE, y, local_m, MPI_DOUBLE, MPI_COMM_WORLD);

	if (my_rank == 0) {
		end = MPI_Wtime();
		double time = end - start;
		printf("Time cost is --%f-- seconds.\n", time);
	}


	// free(global_A);
	free(global_x);
	free(global_y);
	free(local_A);
	free(local_x);
	free(local_y);
	free(y);

	MPI_Finalize();

	return 0;
}

运行命令:
mpicc -o multi_4 multi_4.c
module load MPI/Intel/MPICH/3.2-icc14-dyn
Yhrun -p paratera -N 3 -n 64 -t 20 multi_4

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值