MPI编程基础与常用函数

冰冰冰冰淇淋君

已于 2022-07-13 17:50:10 修改

阅读量1.4k

点赞数 4

分类专栏： mpi C++ 文章标签： c++

于 2021-12-23 09:49:10 首次发布

本文链接：https://blog.csdn.net/qq_40759055/article/details/122088622

版权

C++ 同时被 2 个专栏收录

8 篇文章 0 订阅

订阅专栏

mpi

1 篇文章 0 订阅

订阅专栏

Experience

MPI编程的最大痛点在于修改bug，往往MPI之前的问题会导致MPI报错，从而导向错误的纠错方向。在实践中经常出现的错误有：

（1）MPI参数传递错误，有的参数传递的是地址，有的不是；

（2）某些进程调用了不该调用的MPI函数，如通讯域A内的进程需要广播，其他进程却也调用了该函数；

（3）MPI发送数据的大小为int类型，发送大数据时注意是否int越界；

（4）MPI发送的数据的指针为空，或者内存分配出错，指针指向的内存大小与MPI发送的数据大小不统一。

Basic Function（C++）

*不同语言的MPI函数微有不同

1.初始化

MPI_Init(&argc, &argv)

2.当前进程所在MPI_COMM_WORLD通讯域内唯一标识

MPI_Comm_rank(MPI_COMM_WORLD, &myid)

3.通信域包含的进程总数

MPI_Comm_size(MPI_COMM_WORLD, &numprocs)

4.当前进程所在机器名

MPI_Get_processor_name(processorName,&resultLength);

5.广播 - 0号进程将1个MPI_INT类型数据n发送给通信域内其他进程

MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD)

6.点对点发送 - 本进程将1个MPI_DOUBLE类型数据res发送给0号进程，99为通信标志，最后三个形参共同决定发送给哪个进程

MPI_Send(&res, 1, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD)

7.点对点接收 - 本进程接收1个来自i号进程的MPI_DOUBLE类型数据并存储到res中，99为通信标志，status为返回状态（*注：此处status无关紧要，第二个Practice有大用）

MPI_Recv(&res, 1, MPI_DOUBLE, i, 99, MPI_COMM_WORLD, &status)

8.并行结束

MPI_Finalize()

9. 进程同步

MPI_Barrier(MPI_COMM_WORLD)

10.根据color和key为进程划分通讯域

MPI_Comm SplitWorld
MPI_Comm_split(MPI_COMM_WORLD, color, key, &SplitWorld)

11.从MPI_COMM_WORLD中生成一个无通讯能力的group_world

MPI_Group group_world
MPI_Comm_group(MPI_COMM_WORLD, &group_world)

12.mpigroup从group_world中生成，选择特殊进程分配入mpigroup

const int ranks[7] = {0, 10, 20, 30, 40, 50, 60}
MPI_Group mpigroup
MPI_Group_incl(group_world, 7, ranks, &mpigroup)

13.赋予mpigroup通讯能力，由mpi_comm_world继承

MPI_Group mpi_comm_world
MPI_Comm_create_group(MPI_COMM_WORLD, mpigroup, 0, &mpi_comm_world)

14.0号进程将workload中的MPI_INT类型数据分发给其他进程（每个进程接收1个数据），存储于mywork中

int workload = {5, 10, 15, 20, 25, 30}
int mywork 
MPI_Scatter(workload, 1, MPI_INT, &mywork, 1, MPI_INT, 0, MPI_COMM_WORLD)

15.0号进程聚集其他进程的mywork到workload中

int mywork = myid
int workload = {}
MPI_Gather(&mywork, 1, MPI_INT, workload, 1, MPI_INT, 0, MPI_COMM_WORLD)

16.将指针rendfld指向的数据，长度为10，非阻塞（异步）发送给2号进程，标志为99，通讯域为MPI_COMM_WORLD

MPI_Request reqSend;
MPI_Isend(readfld, 10, MPI_DOUBLE, 2, 99, MPI_COMM_WORLD, &reqSend);

17.等待非阻塞发送结束，接收同理

MPI_Status status;
MPI_Wait(&reqSend, &status);

18.非阻塞（异步）接收来自0号进程的数据，长度为10，存储到指针read指向的内存中，标志为99，通讯域为MPI_COMM_WORLD

MPI_Request reqRecv；
MPI_Irecv(read, 10, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD, &reqRecv);

19.将自定义的结构体用于MPI通信

// 自定义结构体
struct STUDENT
{
    int stu_id;
    char stu_name[8];
    double stu_source;
};

STUDENT wbj;

int blocklen[3];
MPI_Aint disp[3], addresses[3], adrs_start;
MPI_Datatype type[3], MPI_STUDENT;

// 获取结构体参数地址
MPI_Get_address(&wbj.stu_id, &addresses[0]);
MPI_Get_address(&wbj.stu_name, &addresses[1]);
MPI_Get_address(&wbj.stu_source, &addresses[2]);
MPI_Get_address(&wbj, &adrs_start);

// 求相对偏移量
for(int i=0; i < 3; i++)
{
    disp[i] = addresses[i] - adrs_start;
}

// 设置结构体参数长度
for(int i=0; i < 3; i++)
{
    blocklen[i] = 1;
}
blocklen[1] = 8;

// 设置结构体参数类型
types[0]  = MPI_INT;
types[1]  = MPI_CHAR;
types[2]  = MPI_DOUBLE;

// 创建用于通信的MPI结构体
MPI_Type_create_struct(3, blocklen, disp, types, &MPI_STUDENT);
MPI_Type_commit(&MPI_STUDENT);

MPI_Bcast(wbj, 1, MPI_STUDENT, 0, MPI_COMM_WORLD);

Practice 计算π值：

// 求PI值所需的积分函数
double func(double xi)
{
	return (4.0 / (1.0 + xi * xi));
}

// mpi多进程计算PI的值
void computePI(int argc, char *argv[])
{
	int	n, myid, numprocs;
	double	pi, h, xi, res, startTime, endTime;
	pi = 0.0;
	res = 0.0;

	MPI_Status status;  
	MPI_Init(&argc, &argv);  // 并行初始化函数
	MPI_Comm_rank(MPI_COMM_WORLD, &myid);  // 获得当前进程标识
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);  // 获取通信域包含的进程总数

	if (myid != 0)
	{
		MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
		h = 1.0 / (double)n;
		for (int i = myid; i <= n; i += (numprocs - 1))
		{
			xi = h * ((double)i - 0.5);
			res += func(xi);
		}
		res = h * res;
		MPI_Send(&res, 1, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD);
	}
	else  // 0号进程做累加和
	{
		printf("Enter N : ");
		scanf_s("%d", &n);
		startTime = MPI_Wtime();
		MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
		h = 1.0 / (double)n;
		for (int i = 1; i < numprocs; i++)
		{
			MPI_Recv(&res, 1, MPI_DOUBLE, i, 99, MPI_COMM_WORLD, &status);
			pi = pi + res;
		}
		endTime = MPI_Wtime();
		printf("\nPI is %f\nTime is : %f\n", pi, endTime - startTime);
	}
	MPI_Finalize();  // 并行结束函数
}

Practice 矩阵相乘：

// mpi多进程矩阵相乘
void matrixMultiplication(int argc, char *argv[])
{
	const int MAX_ROWS = 3200, MAX_COLS = 4500;
	const int row = 3200, col = 4500;

	//a = (double*)malloc(row * sizeof(double)); -- 速度更快 -- 没做

	double **a = new double *[MAX_ROWS];  // 动态分配大型数组，避免堆栈溢出 - new & delete
	for (int i = 1; i < MAX_ROWS; i++)
	{
		a[i] = new double[MAX_COLS];
	}
	double *b = new double[MAX_COLS];
	double *c = new double[MAX_ROWS];

	double ans;

	int myid, master, numprocs, numsent, sender, tag;
	MPI_Status status;

	MPI_Init(&argc, &argv);  // 初始化
	MPI_Comm_rank(MPI_COMM_WORLD, &myid);  // 进程id
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);  // 进程数

	master = 0;  // 主节点

	double stime, etime;  // 计时
	stime = MPI_Wtime();

	if (myid == master)
	{
		cout << "this is master" << endl;
		// 测试数据初始化
		for (int i = 1; i < col; i++)
		{
			b[i] = i;
			for (int j = 1; j < row; j++)
			{
				a[j][i] = j;
				//cout << a[j][i] << " ";
			}
			//cout << endl;
		}
		//for (int i = 1; i < col; i++)
		//{
		//	cout << b[i] << " ";
		//}

		MPI_Bcast(b, col, MPI_DOUBLE, master, MPI_COMM_WORLD);  // 广播矩阵b

		numsent = 0;  // 子任务已发送数量
		for (int i = 1; i <= min(numprocs - 1, row); i++)  // 为所有已开辟的进程分配任务
		{
			MPI_Send(a[i], col, MPI_DOUBLE, i, i, MPI_COMM_WORLD);
			numsent += 1;
		}

		for (int i = 1; i < row; i++)  // 回收结果
		{
			MPI_Recv(&ans, 1, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
			sender = status.MPI_SOURCE;  // 回收结果来自哪个进程
			tag = status.MPI_TAG;  // 标记即这是哪个任务
			c[tag] = ans;

			if (numsent < row - 1)  // 继续为空闲进程分配任务
			{
				MPI_Send(a[numsent + 1], col, MPI_DOUBLE, sender, numsent + 1, MPI_COMM_WORLD);
				numsent += 1;
			}
			else  // 任务结束发送停止标志 0 
			{
				MPI_Send(a, 1, MPI_DOUBLE, sender, 0, MPI_COMM_WORLD);
			}
		}
		// 输出结果
		cout << "C:  " << endl;
		for (int i = 1; i < row; i++)
		{
			cout << c[i] << " ";
		}
		cout << endl;
		etime = MPI_Wtime();
		cout << "Elapsed time is " << etime - stime << " seconds" << endl;
		cout << "时间精度是 " << MPI_Wtick() << " 秒钟" << endl;
	}
	else  // 子进程业务
	{
		MPI_Bcast(b, col, MPI_DOUBLE, master, MPI_COMM_WORLD);  // 接收矩阵b
		cout << "this is slave" << endl;
		 验证接收是否成功
		//for (int i = 0; i < col; i++)
		//{
		//	cout << b[i] << " ";
		//}
		while (true)
		{
			double buff[col];
			ans = 0;

			MPI_Recv(buff, col, MPI_DOUBLE, master, MPI_ANY_TAG, MPI_COMM_WORLD, &status);  // 接收任务
			tag = status.MPI_TAG;  // 标志 - 这是哪个任务
			if (tag == 0)  // 结束
			{
				break;
			}
			for (int i = 1; i < row; i++)  // 计算
			{
				ans += buff[i] * b[i];
			}
			MPI_Send(&ans, 1, MPI_DOUBLE, master, tag, MPI_COMM_WORLD);  // 计算结果传回
		}
	}
	MPI_Finalize();  // 结束并行
	
	for (int i = 1; i < MAX_ROWS; i++)  // 释放内存
	{
		delete[] a[i];
	}
	delete[] a;
	delete[] b;
	delete[] c;
}