One feature of the C++ Standard Library that helps here is std::thread::hardware_concurrency() . This function returns an indication of the number of threads that can
truly run concurrently for a given execution of a program. On a multicore system it might be the number of CPU cores, for example. This is only a hint, and the functionmight return 0 if this information is not available, but it can be a useful guide for splitting a task among threads.
std::thread::hardware_concurrency()这个函数用于获取程序可以调动的最大线程数,在多核系统中可能代表CPU核数。这个函数返回值仅可以作为参考,因为有可能返回0。
摘抄一段原书中的代码,稍做修改:
template<typename Iterator, typename T>
struct accumulate_block
{
void operator()(Iterator first, Iterator last, T& result)
{
result = std::accumulate(first, last, result);
}
};
template<typename Iterator, typename T>
T parallel_accumulate(Iterator first, Iterator last, T init)
{
unsigned long const length = std::distance(first, last);
if (!length)
return init;
unsigned long const min_per_thread = 25;
unsigned long const max_threads = (length + min_per_thread - 1) / min_per_thread;
unsigned long const hardware_threads = std::thread::hardware_concurrency();
cout << "hardware_threads:" << hardware_threads << endl;
unsigned long const num_threads = std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads);
cout << "num_threads:" << num_threads << endl;
unsigned long const block_size = length / num_threads;
std::vector<T> results(num_threads);
std::vector<std::thread> threads(num_threads - 1);
Iterator block_start = first;
for (unsigned long i = 0; i<(num_threads - 1); ++i)
{
Iterator block_end = block_start;
std::advance(block_end, block_size);
threads[i] = std::thread(accumulate_block<Iterator, T>(),block_start, block_end, std::ref(results[i]));
block_start = block_end;
}
accumulate_block<Iterator, T>()(block_start, last, results[num_threads - 1]);
std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join));
return std::accumulate(results.begin(), results.end(), init);
}
void call_by_main()
{
vector<int> v(51);
fill(v.begin(), v.end(), 1);
cout << parallel_accumulate(v.begin(), v.end(), 0) << endl;
system("pause");
}
在我本机上执行的结果是:
hardware_threads:4
num_threads:3
51
请按任意键继续. . .
求一段迭代器指定范围的所有元素的和。