禁止不加改动,不理解原理用于课设。
原理:
// parallel_prefix.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include "pch.h"
#include <iostream>
#include <thread>
#include "windows.h"
#include <vector>
#include <string>
#include <random>
template <typename T>
class parallel_prefix
{
private:
int heigh;
const std::vector<T> &a;
std::vector<std::vector<T>> b, c;
void serial_up_deduction();
void serial_down_deduction();
void parallel_up_deduction(const int thread_num);
void parallel_down_deduction(const int thread_num);
public:
parallel_prefix(const std::vector<T> &p);
void serial_calculate();
void parallel_calculate(const int thread_num);
void print_prifex_sum() const;
~parallel_prefix();
};
template <typename T>
parallel_prefix<T>::parallel_prefix(const std::vector<T> &p):a(p)
{
heigh = ceil(log2(p.size())); //应该向上取整
b.resize(heigh + 1); //还应包含第0层
for (auto &t : b)
t.resize(p.size());
c.resize(heigh + 1);
for (auto &t : c)
t.resize(p.size());
}
template <typename T>
void parallel_prefix<T>::serial_calculate()
{
serial_up_deduction();
serial_down_deduction();
}
template <typename T>
void parallel_prefix<T>::serial_up_deduction()
{
int cur_level_length;
for (auto it = b[0].begin(); it != b[0].end(); ++it)
{
*it = a[it - b[0].begin()];
}
cur_level_length = a.size() / 2;
for (int i = 1; i <= heigh; ++i)
{
for (int j = 1; j <= cur_level_length; ++j)
b[i][j] = b[i - 1][2 * j] + b[i - 1][2 * j - 1];
cur_level_length /= 2;
}
}
template <typename T>
void parallel_prefix<T>::serial_down_deduction()
{
int cur_level_length;
cur_level_length = 1;
for (int i = heigh; i >= 0; --i)
{
for (int j = 1; j <= cur_level_length && j < a.size(); ++j) //自顶向下防止最后一层越界
if (j == 1)
c[i][j] = b[i][j];
else if (j % 2 == 0)
c[i][j] = c[i + 1][j / 2];
else
c[i][j] = c[i + 1][j / 2] + b[i][j];
cur_level_length *= 2;
}
}
template <typename T>
void parallel_prefix<T>::parallel_calculate(const int thread_num)
{
parallel_up_deduction(thread_num);
parallel_down_deduction(thread_num);
}
template <typename T>
void parallel_prefix<T>::parallel_up_deduction(const int thread_num)
{
int cur_level_length;
int cur_star, cur_end;
int basic;
std::vector<std::thread> thread_s;
std::vector<std::thread> thread_s1;
auto assign = [](int cur_star, int cur_end, const std::vector<T> &a,
std::vector<std::vector<T>> &b) {
for (auto j = cur_star; j <= cur_end; ++j)
{
b[0][j] = a[j];
}
};
basic = (a.size() - 1) / thread_num;
cur_end = 0; //线程数为1的情况
for (int i = 1; i < thread_num; ++i)
{
cur_star = (i - 1) * basic + 1;
cur_end = cur_star + basic - 1;
thread_s.push_back(std::thread(assign, cur_star, cur_end, cref(a), ref(b)));
}
cur_star = cur_end + 1;
cur_end = a.size() - 1;
thread_s.push_back(std::thread(assign, cur_star, cur_end, cref(a), ref(b))); //最后一个核心是余数
for (auto &t : thread_s)
t.join();
cur_level_length = a.size() / 2;
basic = cur_level_length / thread_num;
for (int i = 1; i <= heigh; ++i)
{
auto up_block = [](int i, int cur_star, int cur_end,
std::vector<std::vector<T>> &b) {
for (auto k = cur_star; k <= cur_end; ++k)
{
b[i][k] = b[i - 1][2 * k] + b[i - 1][2 * k - 1];
}
};
cur_end = 0; //线程数为1的情况
for (int j = 1; j < thread_num; ++j)
{
cur_star = (j - 1) * basic + 1;
cur_end = cur_star + basic - 1;
thread_s1.push_back(std::thread(up_block, i, cur_star, cur_end, ref(b)));
}
cur_star = cur_end + 1;
cur_end = cur_level_length;
thread_s1.push_back(std::thread(up_block, i, cur_star, cur_end, ref(b))); //最后一个核心是余数
cur_level_length /= 2;
basic = cur_level_length / thread_num; //更新每一层状态
}
for (auto &t : thread_s1)
t.join();
}
template <typename T>
void parallel_prefix<T>::parallel_down_deduction(const int thread_num)
{
int cur_star, cur_end;
int basic;
int cur_level_length;
std::vector<std::thread> thread_s;
cur_level_length = 1;
basic = cur_level_length / thread_num;
for (int i = heigh; i >= 0; --i)
{
auto down_block = [](int i, int cur_star, int cur_end,
const std::vector<std::vector<T>> &b, std::vector<std::vector<T>> &c) {
for (auto k = cur_star; k <= cur_end; ++k)
{
if (k == 1)
c[i][k] = b[i][k];
else if (k % 2 == 0)
c[i][k] = c[i + 1][k / 2];
else
c[i][k] = c[i + 1][k / 2] + b[i][k];
}
};
cur_end = 0; //线程数为1的情况
for (int j = 1; j < thread_num; ++j)
{
cur_star = (j - 1) * basic + 1;
cur_end = cur_star + basic - 1;
thread_s.push_back(std::thread(down_block, i, cur_star, cur_end, cref(b), ref(c)));
}
cur_star = cur_end + 1;
cur_end = cur_level_length;
thread_s.push_back(std::thread(down_block, i, cur_star, cur_end, cref(b), ref(c))); //最后一个核心是余数
cur_level_length *= 2;
if (cur_level_length >= a.size())
cur_level_length = a.size() - 1;//防止最后一层越界
basic = cur_level_length / thread_num; //更新每一层状态
}
for (auto &t : thread_s)
t.join();
}
template <typename T>
void parallel_prefix<T>::print_prifex_sum() const
{
for (auto it = c[0].cbegin() + 1; it != c[0].cend(); ++it)
std::cout << *it << ' ';
std::cout << std::endl;
}
template <typename T>
parallel_prefix<T>::~parallel_prefix()
{
}
void random_vector(std::vector<double> &p, const int p_size)
{
p.resize(p_size+1);
std::random_device rd; // 将用于为随机数引擎获得种子
std::mt19937 gen(rd()); // 以播种标准 mersenne_twister_engine
std::uniform_real_distribution<double> dis;
for (int i = 1; i <= p_size; ++i)
p[i] = dis(gen);
}
int main()
{
struct
{
std::string excute_type;
int thread_num;
int spend_time;
} mini;
int star, end;
int max_thread_num;
const int c_size=20000000;
std::vector<double> c;
std::vector<double> c_sum;
const std::vector<int> a = {0, 7, 3, 15, 10, 13, 18, 6, 4}; //忽略第0个元素
const std::vector<int> b = {0, 7, 3, 15, 10, 13, 18}; //部分前缀
parallel_prefix<int> A(a);
parallel_prefix<int> B(b);
std::cout << " 所测数据:" << std::endl;
std::cout << " 2的整数倍:" << std::endl;
for (auto it = a.cbegin()+1; it != a.cend(); ++it)
std::cout << *it << ' ';
std::cout << std::endl;
std::cout << "非2的整数倍 :" << std::endl;
for (auto it = a.cbegin()+1; it != a.cend(); ++it)
std::cout << *it << ' ';
std::cout << std::endl;
std::cout << "串行 :" << std::endl;
A.serial_calculate();
A.print_prifex_sum();
B.serial_calculate();
B.print_prifex_sum();
std::cout << "并行(8线程) :" << std::endl;
A.parallel_calculate(8);
A.print_prifex_sum();
B.parallel_calculate(8);
B.print_prifex_sum();
std::cout << "并行(1线程) :" << std::endl;
A.parallel_calculate(1);
A.print_prifex_sum();
B.parallel_calculate(1);
B.print_prifex_sum();
mini.spend_time = INT_MAX;
mini.thread_num = 0;
random_vector(c,c_size);
c_sum.resize(c_size);
std::cout << " 测试模块:" << std::endl;
std::cout << "所测数据(double)大小:"<<c_size<<std::endl;
parallel_prefix<double> C(c);
std::cout << "串行用时(ms):";
star = clock();
C.serial_calculate();
end = clock();
std::cout << end - star << std::endl;
if (end - star < mini.spend_time)
{
mini.excute_type = "串行";
mini.spend_time = end - star;
mini.thread_num = 0;
}
// Sleep(100000);//防止上一次运算对下一次运算造成干扰
std::cout << "并行用时(ms):" << std::endl;
SYSTEM_INFO SysInfo;
GetSystemInfo(&SysInfo);
max_thread_num = SysInfo.dwNumberOfProcessors;
for (int i = 1; i <= max_thread_num; ++i)
{
std::cout << "线程数为:" << i << "用时为:";
star = clock();
C.parallel_calculate(i);
end = clock();
std::cout << end - star << std::endl;
if (end - star < mini.spend_time)
{
mini.excute_type = "并行";
mini.spend_time = end - star;
mini.thread_num = i;
}
// Sleep(10000);//防止上一次运算对下一次运算造成干扰
}
std::cout << "最少用时(ms):" << mini.spend_time << std::endl;
std::cout << "此时线程数为:" << mini.thread_num << std::endl;
std::cout << "此时采用计算策略为:" <<mini.excute_type << std::endl;
std::cout << "O(n)算法用时(ms):";
star = clock();
*c_sum.begin() = *c.begin();
for (auto it = c.cbegin() + 1; it != c.cend(); ++it)
c_sum[it - c.cbegin()] = c_sum[it - c.cbegin() - 1] + *it;
end = clock();
std::cout << end - star << std::endl;
}
// 运行程序: Ctrl + F5 或调试 >“开始执行(不调试)”菜单
// 调试程序: F5 或调试 >“开始调试”菜单
// 入门提示:
// 1. 使用解决方案资源管理器窗口添加/管理文件
// 2. 使用团队资源管理器窗口连接到源代码管理
// 3. 使用输出窗口查看生成输出和其他消息
// 4. 使用错误列表窗口查看错误
// 5. 转到“项目”>“添加新项”以创建新的代码文件,或转到“项目”>“添加现有项”以将现有代码文件添加到项目
// 6. 将来,若要再次打开此项目,请转到“文件”>“打开”>“项目”并选择 .sln 文件