使用MPI 实现奇偶排序

最新推荐文章于 2024-06-26 21:53:02 发布

神里綾華

最新推荐文章于 2024-06-26 21:53:02 发布

阅读量766

点赞数 36

分类专栏： # 并行程序设计文章标签：算法 c++ 笔记

本文链接：https://blog.csdn.net/qq_42426268/article/details/139791657

版权

并行程序设计专栏收录该内容

4 篇文章 0 订阅

订阅专栏

使用MPI 实现奇偶排序

0号进程获得待排序序列并输出排序好的序列

使用文件进行输入输出

进行性能测试与对比

代码

奇偶排序

头文件引入

#include <iostream>
#include <algorithm>
#include <mpi.h>
#include <fstream>
#include <chrono>

定义规模

#define N 100000000
int I[N];

计算partner

int Compute_partner(int phase, int my_rank, int comm_sz) {
	int partner;
	if (phase % 2 == 0) {  // 偶数阶段
		if (my_rank % 2 != 0) {
			partner = my_rank - 1;
		}
		else {
			partner = my_rank + 1;
		}
	}
	else {  // 奇数阶段
		if (my_rank % 2 != 0) {
			partner = my_rank + 1;
		}
		else {
			partner = my_rank - 1;
		}
	}
	if (partner < 0 || partner >= comm_sz) {
		partner = -1;  // No valid partner
	}
	return partner;
}

保留小一半数据

void Keep_smaller_keys(int* A, int* B, int n) {
	int* tmp = new int[n];
	int a = 0, b = 0, t = 0;
	while (t < n) {
		if (A[a] < B[b]) {
			tmp[t] = A[a];
			t++;
			a++;
		}
		else {
			tmp[t] = B[b];
			t++;
			b++;
		}
	}
	for (int i = 0; i < n; i++) {
		A[i] = tmp[i];
	}
}

保留大一半数据

void Keep_larger_keys(int* A, int* B, int n) {
	int* tmp = new int[n];
	int a = n - 1, b = n - 1, t = n - 1;
	while (t >= 0) {
		if (A[a] < B[b]) {
			tmp[t] = B[b];
			t--;
			b--;
		}
		else {
			tmp[t] = A[a];
			t--;
			a--;
		}
	}
	for (int i = 0; i < n; i++) {
		A[i] = tmp[i];
	}
}

主函数

int main() {
	int* A, * B;
	int comm_sz, my_rank;
	int n;
	MPI_Init(NULL, NULL);
	auto start = std::chrono::high_resolution_clock::now();	
	auto finish = std::chrono::high_resolution_clock::now();
	MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	n = N / comm_sz;
	A = new int[n];
	B = new int[n];
	
  //文件读入
	if (my_rank == 0) {
		ifstream input("input.txt");
		for (int i = 0; i < N && input >> I[i]; i++);
		input.close();
		start = std::chrono::high_resolution_clock::now();
	}

	MPI_Scatter(I, n, MPI_INT, A, n, MPI_INT, 0, MPI_COMM_WORLD);

	sort(A, A + n);
	for (int phase = 0; phase < comm_sz; phase++) {
		int partner = Compute_partner(phase, my_rank, comm_sz);

		if (partner != -1) {
			MPI_Sendrecv(A, n, MPI_INT, partner, 0, B, n, MPI_INT, partner, 0, MPI_COMM_WORLD, MPI_STATUSES_IGNORE);
			if (my_rank < partner) {
				Keep_smaller_keys(A, B, n);
			}
			else {
				Keep_larger_keys(A, B, n);
			}
		}
	}

  MPI_Gather(A, n, MPI_INT, I, n, MPI_INT, 0, MPI_COMM_WORLD);
  
  //文件输出
	if (my_rank == 0) {
		finish = std::chrono::high_resolution_clock::now();
		std::chrono::duration<double> elapsed = finish - start;
		std::cout << "Elapsed time: " << elapsed.count() << " s\n" << std::endl;
		ofstream output("output.txt");
		for (int elem : I) {
			output << elem << " ";
		}
		output << endl;
		output.close();
	}

	MPI_Finalize();
	return 0;
}

随机数序列生成

使用python生成一个包含100000000个数字的文本文件

import random

def generate_random_numbers(count):
    # Generate random numbers and write to a file
    with open(f'input{count}.txt', 'w') as file:
        for _ in range(count):
            number = random.randint(1, 10*count)  # Random numbers between 1 and 100
            file.write(f"{number}\n")

if __name__ == "__main__":
		generate_random_numbers(pow(10,8))

测试代码

#define _CRT_SECURE_NO_WARNINGS

#include<iostream>
#include<fstream>
#include<algorithm>
#include<Windows.h>
#include<chrono>

using namespace std;

#define N 100000000
int I1[N], I2[N];

int main() {
    ifstream input1("C:\\Users\\Xavier\\Documents\\并行\\Project2\\Project2\\input.txt");
    for (int i = 0; i < N && input1 >> I1[i]; i++);
    input1.close();
    ifstream input2("C:\\Users\\Xavier\\Documents\\并行\\Project2\\Project2\\output.txt");
    for (int i = 0; i < N && input2 >> I2[i]; i++);
    input2.close();

    auto start = std::chrono::high_resolution_clock::now();
    sort(I1, I1 + N);
    auto finish = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed = finish - start;
    std::cout << "Elapsed time: " << elapsed.count() << " s\n" << std::endl;
    Sleep(5000);

    bool is_success = true;
    for (int i = 0; i < N; i++) {
        if (I1[i] == I2[i]) {
            if (i % (N/10) == 0) {
                // Update progress bar in the first line
                HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
                COORD pos = { 0, 0 }; // Go back to the start of the first line
                SetConsoleCursorPosition(hConsole, pos);
                printf("测试中[%.2lf%%]: ", i * 100.0 / N);
                int show_num = (i + 1) * 20 / N;
                for (int j = 0; j < show_num; j++) {
                    cout << "=";
                }
                for (int j = show_num; j < 20; j++) {
                    cout << " ";
                }
                cout << "|" << endl;

                cout << i + 1 << "st Number Pass" << endl;
                Sleep(100); // Adjust speed of the update
            }
        }
        else {
            is_success = false;
            cout << "No " << i + 1 << " failed" << endl;
            cout << I1[i] << " " << I2[i] << endl;
            break;
        }
    }

    if (is_success) {
        cout << "Success" << endl;
    }
    else {
        cout << "Test failed." << endl;
    }
    return 0;
}

可视化代码

利用matplot进行可视化

import matplotlib.pyplot as plt
import json

with open('test.json') as f:
    data = json.load(f)

# Extracting data for plotting
powers = [item['pow'] for item in data]
oddeven_values = [item['oddeven'] * 1000 for item in data]  # Convert to milliseconds
std_values = [item['std'] * 1000 for item in data]         # Convert to milliseconds

# Number of groups
n_groups = len(powers)

# Create plot
fig, ax = plt.subplots()
index = range(n_groups)
bar_width = 0.35
opacity = 0.8

rects1 = ax.bar(index, oddeven_values, bar_width, alpha=opacity, color='b', label='OddEven (ms)')

rects2 = ax.bar([p + bar_width for p in index], std_values, bar_width, alpha=opacity, color='r', label='Std (ms)')

ax.set_xlabel('Log(Number of elements)')
ax.set_ylabel('Time (ms)')
ax.set_title('Execution time comparison')
ax.set_yscale('log')  # Set logarithmic scale
ax.set_xticks([p + bar_width / 2 for p in index])
ax.set_xticklabels(powers)
ax.legend()

plt.tight_layout()
plt.show()

结果

test

在这里插入图片描述

因为我的电脑的CPU是I7-12700H，只有20个物理核心，所以在执行MPI并行程序时将线程设置为16。将16线程奇偶排序与串行Std::Sort在不同数据规模下用相同数据集进行测试，得到的结果如下（具体的数值记录在test.json文件中）：
在这里插入图片描述

对于较小的数据集（小于等于 $10^4$ 个数字），并行奇偶排序的执行效率不如Std::Sort，但是随着数据规模增加，并行奇偶排序的优势开始体现，对于超过 $10^5$ 个数字的数据集，并行奇偶排序的效率明显更高，在数据规模来到 $10^8$ 时，16线程并行奇偶排序的用时是5.43495s，而串行Std::Sort的用时是42.4069s。随着规模增大，并行奇偶排序的优势将不断增加，并行特性使其能够更高效地处理较大的数据集。

遇到的问题及解决

C2131表达式的计算结果不是常数

vs2022禁止如下语句：

int a[n];	//n为变量

我们可以使用动态内存分配解决：

int *a=new int[n];

使用普通计时器精度过低

在数据规模较小时使用普通计时器无法记录程序运行时间，采用高精度计时器：

#include <chrono>
auto start = std::chrono::high_resolution_clock::now();
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
std::cout << "Elapsed time: " << elapsed.count() << " s\n"<<std::endl;

有未经处理的异常: 0xC00000FD: Stack overflow

爆栈了，可以在vs2022中手动设置堆栈保留大小或者使用动态内存分配

C2148: 数组的总大小不得超过0x7fffffff字节

在测试 $10^9$ 数据时报错，因此最终仅对 $10^8$ 规模的数据做测试

神里綾華

关注

36
点赞
踩
17

收藏

觉得还不错? 一键收藏
0
评论
使用MPI 实现奇偶排序

定义规模计算partner保留小一半数据保留大一半数据主函数随机数序列生成使用python生成一个包含100000000个数字的文本文件测试代码可视化代码利用matplot进行可视化结果因为我的电脑的CPU是I7-12700H，只有20个物理核心，所以在执行MPI并行程序时将线程设置为16。将16线程奇偶排序与串行Std::Sort在不同数据规模下用相同数据集进行测试，得到的结果如下（具体的数值记录在test.json文件中）：对于较小的数据集（小于等于10410^410
复制链接

扫一扫