二项树见http://hi.baidu.com/isswangqing/item/8e0908273e773d54c38d59bc非常适合bcast这个过程,因为没有额外的通信开销,集合的增加通信并行度。之前想到的,实现完了发现mpi也用了这个,而且还有其他的方法。过两天分析一下源码。
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
int mpiSize, mpiRank;
void bcast (void *data, int size)
{
void * p = data;
int start = 0;
int end = mpiSize - 1;
while (1)
{
if (start == end)
{
break;
}
int mid = (start + end + 1) / 2;
if (mpiRank >= start && mpiRank <= mid - 1)// front half
{
if (start == mpiRank)
{
MPI_Send (data, size, MPI_BYTE, mid, 0, MPI_COMM_WORLD);//data start to mid size mid - start
}
end = mid - 1;
}
else if (mpiRank >= mid && mpiRank <= end)
{
if (mpiRank == mid)
{
MPI_Status s;
MPI_Recv(data, size, MPI_BYTE, start, 0, MPI_COMM_WORLD, &s);//receive from start
}
start = mid;
// p = p + mid - start;
// printf ("%c \n",((char*) p)[0]);
}
}
}
void main (int argc, char **argv)
{
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &mpiSize);
MPI_Comm_rank (MPI_COMM_WORLD, &mpiRank);
char *data = (char *) malloc (mpiSize);
if (0 == mpiRank)
{
for (int i = 0; i < mpiSize; ++i)
{
data[i] = 'a' + i;
}
}
bcast (data, mpiSize);
printf("%d : %s \n", mpiRank, data);
free (data);
MPI_Finalize ();
return;
}