使用OpenMP+MPI实现PSRS排序

总体建立在MPI框架上,中间一些局部排序使用openMP实现的PSRS排序。

#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include <mpi.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "unistd.h"
#include <algorithm>
#include <time.h>
#include <string.h>
#define BLOCK 1024*1024
#define ERO if(!myRank) 
class OmpPsrs
{
public:
	OmpPsrs(){
		vdata = NULL;
		sampleList = NULL;
		primarySamples = NULL;
		sortPoint = NULL;
		primaryDividePoint = NULL;
	}

	~OmpPsrs(){};
	//运行OpenMP实现的PSRS排序
	double* run(double* dataSet, int dataSize) {
		double* swapData = NULL;
		thread_count = 32;
		todo = dataSize / thread_count;

		vdata = dataSet;

		sampleList = (double*)malloc(sizeof(double)*thread_count*thread_count);
	#	pragma omp parallel num_threads(thread_count)
		selectSamples();


		//select samples
		std::sort(sampleList, sampleList + thread_count*thread_count);
		primarySamples = (double*)malloc(sizeof(double)*(thread_count - 1));
		for (int i = 0;i < thread_count - 1;++i) primarySamples[i] = sampleList[(i + 1)*thread_count];
		primaryDividePoint = (double**)malloc(sizeof(double*)*(thread_count*thread_count + 1));

	#	pragma omp parallel num_threads(thread_count)
		primaryDivide();

		//global swap
		primaryDividePoint[thread_count*thread_count] = vdata + dataSize;

		int pointer = 0;
		sortPoint = (int*)malloc(sizeof(int)*(thread_count + 1));
		swapData = (double*)malloc(sizeof(double)*dataSize);
		sortPoint[0] = 0;
		for (int processer = 0; processer < thread_count; ++processer) {
			for (int segment = 0; segment < thread_count; ++segment)
			{
				double* head = primaryDividePoint[segment*thread_count + processer];
				double* tail = primaryDividePoint[segment*thread_count + processer + 1];
				for (double* i = head;i < tail;++i) {
					swapData[pointer++] = *i;
				}
			}
			sortPoint[processer + 1] = pointer;
		}
		free(vdata);

	#	pragma omp parallel num_threads(thread_count)
		lastSort(swapData);
		//printf("time to OmpPsrs sort :%fs\n", (double)(stop - start)/CLOCKS_PER_SEC);
		//printf("%d\n", dataSet[todo-1]);
		//printf("%d\n", dataSet[0]);

		/*bool check = true;
		for (int i = 0;i < dataSize - 1;++i) if (swapData[i]>swapData[i + 1]) check = false;
		if (check) printf("pass check\n\n");*/

		return swapData;
	}
private:
	int 			todo; 				/*各线程分配的数据数量	*/
	int 			thread_count;  		/*线程数				*/
	double* 		vdata;				/*各线程数据 			*/
	double* 		sampleList; 		/*采样数组				*/
	double* 		primarySamples;		/*主元数组				*/
	int* 			sortPoint;			/*全局交换点			*/
	double** 		primaryDividePoint;	/*划分点				*/
	
	void selectSamples() {
		//选择样本
		int my_rank = omp_get_thread_num();
		int start = my_rank * todo;
		int end = start + todo;

		std::sort(vdata + start, vdata + end);

		int sampleStride = todo / thread_count;
		for (int i = 0;i < thread_count;++i) sampleList[my_rank*thread_count + i] = vdata[my_rank*todo + sampleStride*i];
	}
	void primaryDivide() {
		//主元划分
		int my_rank = omp_get_thread_num();
		int start = my_rank * todo;

		primaryDividePoint[my_rank*thread_count] = vdata + start;
		for (int i = 0;i < thread_count - 1;++i) {
			double* pos = std::lower_bound(vdata + start, vdata + start + todo, primarySamples[i]);
			while (*pos == primarySamples[i]) {
				if (pos >= vdata + start + todo) break;
				pos++;
				pos = std::lower_bound(pos, vdata + start + todo, primarySamples[i]);
			}
			primaryDividePoint[my_rank*thread_count + 1 + i] = pos;
		}
	}

	void lastSort(double* swapData) {
		//最后阶段排序
		int my_rank = omp_get_thread_num();
		std::sort(swapData + sortPoint[my_rank], swapData + sortPoint[my_rank + 1]);
	}
};

int         commSize;             		/* Number of processes   */
int         myRank;              		/* My process rank       */
double*		data = NULL;		  		/* 汇总数据				*/
double*	    subData = NULL;		 		/* 各进程内数据			*/
double*		newData = NULL;		  		/* 全局交换后数据		*/
int		    todo;						/* 各进程最初分配的数据	*/
long		totalSize;					/* 数据总个数			*/
double*		samples = NULL;				/*采样 					*/
double*		primarySamples = NULL;		/*主元 					*/
double**	primaryDividePoint = NULL;	/*主元划分点 			*/
int*		subSegmentLength = NULL;	/*全局交换长度 			*/
int*		segmentLength = NULL;
int			newSize;					/*全局交换后数据个数 	*/
OmpPsrs 	ompPsrs = OmpPsrs();		/*OpenMp排序 			*/

void selectSamples() {
	subData = ompPsrs.run(subData, todo);
	//std::sort(subData, todo);
	int sampleStride = todo / commSize;
	double* selectedSample = (double*)malloc(sizeof(double)*commSize);
	for (int i = 0;i < commSize;++i) {
		selectedSample[i] = subData[i*sampleStride];
		//printf("%d\n", selectedSample[i]);
	}
	MPI_Gather(selectedSample, commSize, MPI_DOUBLE, samples, commSize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
}

void selectPrimarySamples() {
	//select primary samples
	primarySamples = (double*)malloc(sizeof(double)*(commSize - 1));
	ERO {
		samples = ompPsrs.run(samples, commSize*commSize);
		//std::sort(samples, samples + commSize*commSize);
		//for (int i = 0;i < commSize*commSize;i++) printf("%d  ", samples[i]);
		for (int i = 0;i < commSize - 1;++i) {
			primarySamples[i] = samples[(i + 1)*commSize];
			//printf("%d   ", primarySamples[i]);
		}
	}
	MPI_Bcast(primarySamples, commSize - 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
}

void primaryDivide() {
	subSegmentLength = (int*)malloc(sizeof(int) * commSize);
	segmentLength = (int*)malloc(sizeof(int)*commSize*commSize);
	primaryDividePoint = (double**)malloc(sizeof(double*) * (commSize + 1));
	primaryDividePoint[0] = subData;

	for (int i = 0;i < commSize - 1;++i) {
		double* pos = std::lower_bound(subData, subData + todo, primarySamples[i]);
		while (*pos == primarySamples[i]) {
			if (pos >= subData + todo) break;
			pos++;
			pos = std::lower_bound(pos, subData + todo, primarySamples[i]);
		}
		primaryDividePoint[i + 1] = pos;
		//printf("%d %d\n", myRank, *pos);
	}

	primaryDividePoint[commSize] = subData + todo;
	for (int i = 0;i < commSize;++i) subSegmentLength[i] = primaryDividePoint[i + 1] - primaryDividePoint[i];

	MPI_Gather(subSegmentLength, commSize, MPI_INT, segmentLength, commSize, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Bcast(segmentLength, commSize*commSize, MPI_INT, 0, MPI_COMM_WORLD);

	//if (myRank==1) for (int i = 0;i < commSize*commSize;i++) printf("%d  ", segmentLength[i]);
}

void globalSwap() {
	int* sendNum = (int*)malloc(sizeof(int)*commSize);
	int* recvNum = (int*)malloc(sizeof(int)*commSize);
	int* sendStride = (int*)malloc(sizeof(int)*commSize);
	int* recvStride = (int*)malloc(sizeof(int)*commSize);
	newSize = 0;
	for (int i = 0;i < commSize;++i) {
		sendNum[i] = segmentLength[myRank*commSize + i];
		recvNum[i] = segmentLength[i*commSize + myRank];
		newSize += recvNum[i];
	}
	sendStride[0] = 0;
	recvStride[0] = 0;
	for (int i = 1;i < commSize;++i) {
		sendStride[i] = sendStride[i - 1] + sendNum[i - 1];
		recvStride[i] = recvStride[i - 1] + recvNum[i - 1];
	}

	newData = (double*)malloc(sizeof(double)*newSize);

	MPI_Alltoallv(subData, sendNum, sendStride, MPI_DOUBLE, newData, recvNum, recvStride, MPI_DOUBLE, MPI_COMM_WORLD);
	MPI_Barrier(MPI_COMM_WORLD);
	free(subData);

	/*if (myRank == 0) {
		for (int i = 0;i < newSize;i++) printf("%d\n", newData[i]);
	}*/
}

bool subSortCheck(){
	for (int i = 0;i < newSize - 1;++i) if (newData[i] > newData[i + 1]) return false;
	return true;
}

void finalSort() {
	newData = ompPsrs.run(newData, newSize);
	//std::sort(newData, newData + newSize);
	/*
	ERO{
		data = (double*)malloc(4* totalSize);
		double* ff = (double*)malloc(4* totalSize);
		//double* ff = (double*)malloc(4* totalSize);
		if (data == NULL || ff == NULL) printf("final data malloc failed\n");
	}
	int* sendNum = (int*)malloc(sizeof(int)*(commSize));
	int* sendStride = (int*)malloc(sizeof(int)*(commSize));

	MPI_Gather(&newSize, 1, MPI_INT, sendNum, 1, MPI_INT, 0, MPI_COMM_WORLD);
	sendStride[0] = 0;
	for (int i = 1;i < commSize;++i) sendStride[i] = sendStride[i - 1] + sendNum[i - 1];

	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Gatherv(newData, newSize, MPI_DOUBLE, data, sendNum, sendStride, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	MPI_Barrier(MPI_COMM_WORLD);*/
	
}

bool sortCheck() {
	for (int i = 0;i < totalSize - 1;++i) if (data[i] > data[i + 1]) return false;
	return true;
}

int readSubData() {
	char pre[10];
	char pos[5] = ".txt";
	char filename[50] = "../../test4/";
	sprintf(pre, "%d", myRank + 1);
	strcat(pre, pos);
	strcat(filename, pre);

	subData = (double*)malloc(todo*sizeof(double));
	if (subData == NULL) printf("malloc failed\n");

	int handle = open(filename, O_RDWR);
	if (handle == -1)
	{
		printf("Cannot Open file %s\n", filename);
		return -1;
	}
	int readRes;
	readRes = read(handle, subData, todo*sizeof(double));
	if (!readRes) return 0;
	//printf("%f\n", subData[1]);
	//printf("%d\n", readRes);
	return 1;
}

int main(int argc, char* argv[]) {
	time_t start, stop, sm, sps, pd, gs, fs;
	time_t ts,te;
	/* Start up MPI */
	MPI_Init(NULL, NULL);

	/* Get the number of processes */
	MPI_Comm_size(MPI_COMM_WORLD, &commSize);

	/* Get my rank among all the processes */
	MPI_Comm_rank(MPI_COMM_WORLD, &myRank);

	totalSize = 1024ll*1024*1024*10;
	todo = 167772160;//1342177280 / 8;

	ERO{
		samples = (double*)malloc(sizeof(double)*commSize*commSize);
	}

	//makeData();
	readSubData();				//读取数据

	ERO printf("load complete\n");
	ERO ts = time(NULL);
	time_t subdataTime = time(NULL);

	selectSamples();			//选择样本
	ERO{
		sm = time(NULL);
		printf("selectSamples time: %lds\n", (sm - subdataTime));
		sm = time(NULL);
	}
	MPI_Barrier(MPI_COMM_WORLD);

	selectPrimarySamples();		//选择主元
	ERO{
		sps = time(NULL);
		printf("selectPrimarySamples time: %lds\n", (sps - sm));
		sps = time(NULL);
	}

	primaryDivide();			//主元划分
	ERO{
		pd = time(NULL);
		printf("primaryDivide time: %lds\n", (pd - sps));
		pd = time(NULL);
	}

	globalSwap();				//全局交换
	ERO{
		gs = time(NULL);
		printf("globalSwap time: %lds\n", (gs - pd));
		gs = time(NULL);
	}

	finalSort();				//子数组排序
	ERO{
		fs = time(NULL);
		printf("finalSort time: %lds\n", (fs - gs));
	}

	MPI_Barrier(MPI_COMM_WORLD);
	ERO te = time(NULL);
	//计算时间
	ERO printf("time : %lds\n", te - ts);
	if (!subSortCheck()) printf("subSort failed\n");
	free(newData);
	ERO{
		//free(data);
	}

	MPI_Finalize();

	return 0;
}
  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
好的,我可以为您解答。首先,蒙特卡罗方法是一种通过随机抽样来估计数学问题的方法,而求π的蒙特卡罗方法是通过在单位圆内随机生成点,并计算落在单位圆内的点数与总点数的比例来估计π的值。 接下来,关于OpenMPMPI混合编程,OpenMP是一种共享内存的并行编程模型,MPI是一种消息传递的并行编程模型,两者结合可以充分利用多核CPU和分布式计算环境的优势,提高程序的并行性和加速度。 下面是一个使用OpenMPMPI混合编程求解π的蒙特卡罗方法的示例代码: ```c++ #include <stdio.h> #include <stdlib.h> #include <math.h> #include <time.h> #include <omp.h> #include <mpi.h> int main(int argc, char* argv[]) { int rank, size; int n = 1000000; // 总点数 int cnt = 0; // 落在单位圆内的点数 double x, y, pi; double t1, t2; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // 每个进程计算一部分点数 srand(time(NULL) + rank); #pragma omp parallel for reduction(+:cnt) for (int i = 0; i < n / size; i++) { x = (double)rand() / RAND_MAX; y = (double)rand() / RAND_MAX; if (x * x + y * y <= 1.0) { cnt++; } } // 把每个进程计算出的结果累加起来 MPI_Reduce(&cnt, &cnt, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); // 进程0计算π的值 if (rank == 0) { pi = 4.0 * cnt / n; printf("pi = %f\n", pi); } MPI_Finalize(); return 0; } ``` 这个程序首先通过MPI初始化进程通信环境,然后每个进程使用OpenMP并行计算一部分点数,并把计算结果通过MPI_Reduce函数累加到进程0中。最后进程0计算π的值并输出结果。 希望对您有所帮助!

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值