1、获取cpu核个数
unsigned long const hardware_threads = std::thread::hardware_concurrency();
2、demo示例
将一个数组中的元素进行累加计算,通过并行处理
#include <iostream>
#include <numeric>
#include <thread>
#include <vector>
template<typename Iterator, typename T>
struct accumulate_block {
void operator()(Iterator first, Iterator last, T &result) {
result = std::accumulate(first, last, result);
}
};
template<typename Iterator, typename T>
T parallel_accumulate(Iterator first, Iterator last, T init) {
// 开始结束元素个数
unsigned long const length = std::distance(first, last);
if (!length) {
return init;
}
// 按照每个线程最少处理25个数
unsigned long const min_per_thread = 25;
// 不考虑cpu个数下,期望的最大线程数
unsigned long const max_threads = (length + min_per_thread - 1) / min_per_thread;
// 获取cpu核个数
unsigned long const hardware_threads = std::thread::hardware_concurrency();
std::cout << "cpu num: " << hardware_threads << std::endl;
// 结合cpu核个数,取最小值。如果获取不到cpu核个数,则默认为2个线程
unsigned long const num_threads = std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads);
// 每个线程平均分配需要计算的元素个数
unsigned long const block_size = length / num_threads;
// 多线程结果
std::vector<T> results(num_threads);
// 多线程数组
std::vector<std::thread> threads(num_threads - 1);
Iterator block_start = first;
// 并行启动多线程进行计算
for (unsigned long i = 0; i < (num_threads - 1); ++i) {
Iterator block_end = block_start;
std::advance(block_end, block_size);
threads[i] = std::thread(
accumulate_block<Iterator, T>(),
block_start,block_end, std::ref(results[i]));
block_start = block_end;
}
accumulate_block<Iterator, T>()(block_start,last,results[num_threads -1]);
// 等待多线程执行完成
std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
// 累加多线程计算结果并返回
return std::accumulate(results.begin(), results.end(), init);
}
int main() {
std::vector<int> ivec = {10, 29,10,10,30,20};
int res = parallel_accumulate(ivec.begin(), ivec.end(), 0);
std::cout << res << std::endl;
return 0;
}
运行结果:
cpu num: 8
109