Experience
MPI编程的最大痛点在于修改bug,往往MPI之前的问题会导致MPI报错,从而导向错误的纠错方向。在实践中经常出现的错误有:
(1)MPI参数传递错误,有的参数传递的是地址,有的不是;
(2)某些进程调用了不该调用的MPI函数,如通讯域A内的进程需要广播,其他进程却也调用了该函数;
(3)MPI发送数据的大小为int类型,发送大数据时注意是否int越界;
(4)MPI发送的数据的指针为空,或者内存分配出错,指针指向的内存大小与MPI发送的数据大小不统一。
Basic Function(C++)
*不同语言的MPI函数微有不同
1.初始化
MPI_Init(&argc, &argv)
2.当前进程所在MPI_COMM_WORLD通讯域内唯一标识
MPI_Comm_rank(MPI_COMM_WORLD, &myid)
3.通信域包含的进程总数
MPI_Comm_size(MPI_COMM_WORLD, &numprocs)
4.当前进程所在机器名
MPI_Get_processor_name(processorName,&resultLength);
5.广播 - 0号进程将1个MPI_INT类型数据n发送给通信域内其他进程
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD)
6.点对点发送 - 本进程将1个MPI_DOUBLE类型数据res发送给0号进程,99为通信标志,最后三个形参共同决定发送给哪个进程
MPI_Send(&res, 1, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD)
7.点对点接收 - 本进程接收1个来自i号进程的MPI_DOUBLE类型数据并存储到res中,99为通信标志,status为返回状态(*注:此处status无关紧要,第二个Practice有大用)
MPI_Recv(&res, 1, MPI_DOUBLE, i, 99, MPI_COMM_WORLD, &status)
8.并行结束
MPI_Finalize()
9. 进程同步
MPI_Barrier(MPI_COMM_WORLD)
10.根据color和key为进程划分通讯域
MPI_Comm SplitWorld
MPI_Comm_split(MPI_COMM_WORLD, color, key, &SplitWorld)
11.从MPI_COMM_WORLD中生成一个无通讯能力的group_world
MPI_Group group_world
MPI_Comm_group(MPI_COMM_WORLD, &group_world)
12.mpigroup从group_world中生成,选择特殊进程分配入mpigroup
const int ranks[7] = {0, 10, 20, 30, 40, 50, 60}
MPI_Group mpigroup
MPI_Group_incl(group_world, 7, ranks, &mpigroup)
13.赋予mpigroup通讯能力,由mpi_comm_world继承
MPI_Group mpi_comm_world
MPI_Comm_create_group(MPI_COMM_WORLD, mpigroup, 0, &mpi_comm_world)
14.0号进程将workload中的MPI_INT类型数据分发给其他进程(每个进程接收1个数据),存储于mywork中
int workload = {5, 10, 15, 20, 25, 30}
int mywork
MPI_Scatter(workload, 1, MPI_INT, &mywork, 1, MPI_INT, 0, MPI_COMM_WORLD)
15.0号进程聚集其他进程的mywork到workload中
int mywork = myid
int workload = {}
MPI_Gather(&mywork, 1, MPI_INT, workload, 1, MPI_INT, 0, MPI_COMM_WORLD)
16.将指针rendfld指向的数据,长度为10,非阻塞(异步)发送给2号进程,标志为99,通讯域为MPI_COMM_WORLD
MPI_Request reqSend;
MPI_Isend(readfld, 10, MPI_DOUBLE, 2, 99, MPI_COMM_WORLD, &reqSend);
17.等待非阻塞发送结束,接收同理
MPI_Status status;
MPI_Wait(&reqSend, &status);
18.非阻塞(异步)接收来自0号进程的数据,长度为10,存储到指针read指向的内存中,标志为99,通讯域为MPI_COMM_WORLD
MPI_Request reqRecv;
MPI_Irecv(read, 10, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD, &reqRecv);
19.将自定义的结构体用于MPI通信
// 自定义结构体
struct STUDENT
{
int stu_id;
char stu_name[8];
double stu_source;
};
STUDENT wbj;
int blocklen[3];
MPI_Aint disp[3], addresses[3], adrs_start;
MPI_Datatype type[3], MPI_STUDENT;
// 获取结构体参数地址
MPI_Get_address(&wbj.stu_id, &addresses[0]);
MPI_Get_address(&wbj.stu_name, &addresses[1]);
MPI_Get_address(&wbj.stu_source, &addresses[2]);
MPI_Get_address(&wbj, &adrs_start);
// 求相对偏移量
for(int i=0; i < 3; i++)
{
disp[i] = addresses[i] - adrs_start;
}
// 设置结构体参数长度
for(int i=0; i < 3; i++)
{
blocklen[i] = 1;
}
blocklen[1] = 8;
// 设置结构体参数类型
types[0] = MPI_INT;
types[1] = MPI_CHAR;
types[2] = MPI_DOUBLE;
// 创建用于通信的MPI结构体
MPI_Type_create_struct(3, blocklen, disp, types, &MPI_STUDENT);
MPI_Type_commit(&MPI_STUDENT);
MPI_Bcast(wbj, 1, MPI_STUDENT, 0, MPI_COMM_WORLD);
Practice 计算π值:
// 求PI值所需的积分函数
double func(double xi)
{
return (4.0 / (1.0 + xi * xi));
}
// mpi多进程计算PI的值
void computePI(int argc, char *argv[])
{
int n, myid, numprocs;
double pi, h, xi, res, startTime, endTime;
pi = 0.0;
res = 0.0;
MPI_Status status;
MPI_Init(&argc, &argv); // 并行初始化函数
MPI_Comm_rank(MPI_COMM_WORLD, &myid); // 获得当前进程标识
MPI_Comm_size(MPI_COMM_WORLD, &numprocs); // 获取通信域包含的进程总数
if (myid != 0)
{
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
h = 1.0 / (double)n;
for (int i = myid; i <= n; i += (numprocs - 1))
{
xi = h * ((double)i - 0.5);
res += func(xi);
}
res = h * res;
MPI_Send(&res, 1, MPI_DOUBLE, 0, 99, MPI_COMM_WORLD);
}
else // 0号进程做累加和
{
printf("Enter N : ");
scanf_s("%d", &n);
startTime = MPI_Wtime();
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
h = 1.0 / (double)n;
for (int i = 1; i < numprocs; i++)
{
MPI_Recv(&res, 1, MPI_DOUBLE, i, 99, MPI_COMM_WORLD, &status);
pi = pi + res;
}
endTime = MPI_Wtime();
printf("\nPI is %f\nTime is : %f\n", pi, endTime - startTime);
}
MPI_Finalize(); // 并行结束函数
}
Practice 矩阵相乘:
// mpi多进程矩阵相乘
void matrixMultiplication(int argc, char *argv[])
{
const int MAX_ROWS = 3200, MAX_COLS = 4500;
const int row = 3200, col = 4500;
//a = (double*)malloc(row * sizeof(double)); -- 速度更快 -- 没做
double **a = new double *[MAX_ROWS]; // 动态分配大型数组,避免堆栈溢出 - new & delete
for (int i = 1; i < MAX_ROWS; i++)
{
a[i] = new double[MAX_COLS];
}
double *b = new double[MAX_COLS];
double *c = new double[MAX_ROWS];
double ans;
int myid, master, numprocs, numsent, sender, tag;
MPI_Status status;
MPI_Init(&argc, &argv); // 初始化
MPI_Comm_rank(MPI_COMM_WORLD, &myid); // 进程id
MPI_Comm_size(MPI_COMM_WORLD, &numprocs); // 进程数
master = 0; // 主节点
double stime, etime; // 计时
stime = MPI_Wtime();
if (myid == master)
{
cout << "this is master" << endl;
// 测试数据初始化
for (int i = 1; i < col; i++)
{
b[i] = i;
for (int j = 1; j < row; j++)
{
a[j][i] = j;
//cout << a[j][i] << " ";
}
//cout << endl;
}
//for (int i = 1; i < col; i++)
//{
// cout << b[i] << " ";
//}
MPI_Bcast(b, col, MPI_DOUBLE, master, MPI_COMM_WORLD); // 广播矩阵b
numsent = 0; // 子任务已发送数量
for (int i = 1; i <= min(numprocs - 1, row); i++) // 为所有已开辟的进程分配任务
{
MPI_Send(a[i], col, MPI_DOUBLE, i, i, MPI_COMM_WORLD);
numsent += 1;
}
for (int i = 1; i < row; i++) // 回收结果
{
MPI_Recv(&ans, 1, MPI_DOUBLE, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
sender = status.MPI_SOURCE; // 回收结果来自哪个进程
tag = status.MPI_TAG; // 标记即这是哪个任务
c[tag] = ans;
if (numsent < row - 1) // 继续为空闲进程分配任务
{
MPI_Send(a[numsent + 1], col, MPI_DOUBLE, sender, numsent + 1, MPI_COMM_WORLD);
numsent += 1;
}
else // 任务结束发送停止标志 0
{
MPI_Send(a, 1, MPI_DOUBLE, sender, 0, MPI_COMM_WORLD);
}
}
// 输出结果
cout << "C: " << endl;
for (int i = 1; i < row; i++)
{
cout << c[i] << " ";
}
cout << endl;
etime = MPI_Wtime();
cout << "Elapsed time is " << etime - stime << " seconds" << endl;
cout << "时间精度是 " << MPI_Wtick() << " 秒钟" << endl;
}
else // 子进程业务
{
MPI_Bcast(b, col, MPI_DOUBLE, master, MPI_COMM_WORLD); // 接收矩阵b
cout << "this is slave" << endl;
验证接收是否成功
//for (int i = 0; i < col; i++)
//{
// cout << b[i] << " ";
//}
while (true)
{
double buff[col];
ans = 0;
MPI_Recv(buff, col, MPI_DOUBLE, master, MPI_ANY_TAG, MPI_COMM_WORLD, &status); // 接收任务
tag = status.MPI_TAG; // 标志 - 这是哪个任务
if (tag == 0) // 结束
{
break;
}
for (int i = 1; i < row; i++) // 计算
{
ans += buff[i] * b[i];
}
MPI_Send(&ans, 1, MPI_DOUBLE, master, tag, MPI_COMM_WORLD); // 计算结果传回
}
}
MPI_Finalize(); // 结束并行
for (int i = 1; i < MAX_ROWS; i++) // 释放内存
{
delete[] a[i];
}
delete[] a;
delete[] b;
delete[] c;
}