面试官：请写出一个函数用于vector＜int＞的求和，一道题看出你的C++功底

GTO灬征服_

已于 2023-03-28 22:57:36 修改

阅读量336

点赞数 1

文章标签： c++ 算法数据结构

于 2023-03-28 22:55:18 首次发布

本文链接：https://blog.csdn.net/a2476100824/article/details/129827235

版权

面试官：请写出一个函数用于vector的求和

我：这不是有手就行？代码如下

//Version 1
int accumulate(vector<int>& v) // v注意使用传引用，避免调用拷贝构造函数增加函数运行时间成本
{
	int sum = 0;
	for (int i = 0; i < v.size(); i++)
		sum += v[i];
	return sum;
}

//Version 2
//C++ 11版本for循环
int accumulate(vector<int>& v)
{
	int sum = 0;
	for(int i: v)
		sum += i;
	return sum;
}

面试官：如果vector中数据量很大，调用一次函数要使用的时间很长，要怎么处理呢？

我：咦？有点东西。使用多线程，将vector中的数据拆分成多块，分别计算求和结果，然后在主线程中汇总各个线程的计算结果。

//Version 3
int accumulate_3(vector<int>& v)
{
	if (v.size() == 0)
		return 0;

	//  lambda 表达式，用于计算区间[start, end)中元素的和
	auto accumulate_block = [&v](int start, int end, int& result) { 
		for (int i = start; i < end; i++)
			result += v[i];
		return result;
	};

	// 获得电脑支持的最大线程数
	unsigned int hardware_threads = thread::hardware_concurrency();
	if (hardware_threads == 0) // 程序无法获得线程数，可能电脑不支持多线程
		hardware_threads = 2;

	// 每个线程最少要计算的工作量
	const size_t min_work_per_thread = 25;

	// 获得数组长度
	size_t size = v.size();

	// 可以想一下"size + min_work_per_thread - 1" 如果改成 "size" 会发生什么
	// 例如当size = 49时，"(size + min_work_per_thread - 1) / min_work_per_thread" 的结果为2
	// 而"(size) / min_work_per_thread" 的结果为1（注意int除法是向下取整）
	// 我认为第一种做法更优，当然你如果想用第二种也不是不可以
	unsigned int num_threads = min(hardware_threads, 
		(unsigned int)((size + min_work_per_thread - 1) / min_work_per_thread));

	// 计算每个线程要计算的工作长度
	size_t work_per_thread = size / num_threads;

	vector<thread> threads(num_threads); // 用于存储线程
	vector<int> results(num_threads); // 用于存储线程计算结果

	// 计算每个线程的工作范围
	// 注意for循环范围，只产生num_threads - 1个线程，另外一个线程在for循环外面创建
	for(unsigned int i = 0; i < num_threads - 1; i++)
		threads[i] = thread(accumulate_block, i * work_per_thread,
			(i + 1) * work_per_thread, ref(results[i]));
	threads[num_threads - 1] = thread(accumulate_block, (num_threads - 1) * work_per_thread,
		size, ref(results[num_threads - 1]));

	// 等待所有线程结束
	// 或者使用 for_each(threads.begin(), threads.end(), mem_fn(&thread::join));
	for (thread& t : threads)
		t.join();

	// 计算结果
	int result = 0;
	for (int i : results)
		result += i;
	return result;
}

面试官：怎么将这个求和函数修改，从而支持泛型

我：上面的都能写出来，这个改一改就行了。

//Version 4
//应保证传入的类T重载了'+'运算符
template<typename T>
int accumulate_4(vector<T>& v, T& result) // 注意！这里要传入result用于存储求和结果，避免传入的类没有默认构造函数导致函数编译失败
{
	if (v.size() == 0)
		return 0;

	//  lambda 表达式，用于计算区间和
	auto accumulate_block = [&v](int start, int end, T& result) {
		for (int i = start; i < end; i++)
			result += v[i];
		return result;
	};

	// 获得电脑支持的最大线程数
	unsigned int hardware_threads = thread::hardware_concurrency();
	if (hardware_threads == 0) // 程序无法获得线程数
		hardware_threads = 2;

	// 每个线程最少要计算的工作量
	const size_t min_work_per_thread = 25;

	// 获得数组长度
	size_t size = v.size();

	// 可以想一下"size + min_work_per_thread - 1" 如果改成 "size" 会发生什么
	// 例如当size = 49时，"(size + min_work_per_thread - 1) / min_work_per_thread" 的结果为2
	// 而"(size) / min_work_per_thread" 的结果为1（注意int除法是向下取整）
	// 我认为第一种做法更优，当然你如果想用第二种也不是不可以
	unsigned int num_threads = min(hardware_threads,
		(unsigned int)((size + min_work_per_thread - 1) / min_work_per_thread));

	// 计算每个线程要计算的工作长度
	size_t work_per_thread = size / num_threads;

	vector<thread> threads(num_threads); // 用于存储线程
	vector<T> results(num_threads); // 用于存储线程计算结果

	// 计算每个线程的工作范围
	// 注意for循环范围，只产生num_threads - 1个线程，另外一个线程在for循环外面创建
	for (unsigned int i = 0; i < num_threads - 1; i++)
		threads[i] = thread(accumulate_block, i * work_per_thread,
			(i + 1) * work_per_thread, ref(results[i]));
	threads[num_threads - 1] = thread(accumulate_block, (num_threads - 1) * work_per_thread,
		size, ref(results[num_threads - 1]));

	// 等待所有线程结束
	// 或者使用 for_each(threads.begin(), threads.end(), mem_fn(&thread::join));
	for (thread& t : threads)
		t.join();

	// 计算结果
	for (T& i : results) // 传入引用，避免使用拷贝构造函数
		result += i;
	return result;
}

最后献上《C++并发编程实战》（第二版）书中的实现方式

template<typename Iterator, typename T>
struct accumulate_block
{
	void operator()(Iterator first, Iterator last, T& result)
	{
		result = std::accumulate(first, last, result);
	}
};
template<typename Iterator, typename T>
T parallel_accumulate(Iterator first, Iterator last, T init)
{
	unsigned long const length = std::distance(first, last);
	if (!length) // 1
		return init;
	unsigned long const min_per_thread = 25;
	unsigned long const max_threads =
		(length + min_per_thread - 1) / min_per_thread; // 2
	unsigned long const hardware_threads =
		std::thread::hardware_concurrency();
	unsigned long const num_threads = // 3
		std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads);
	unsigned long const block_size = length / num_threads; // 4
	std::vector<T> results(num_threads);
	std::vector<std::thread> threads(num_threads - 1); // 5
	Iterator block_start = first;
	for (unsigned long i = 0; i < (num_threads - 1); ++i)
	{
		Iterator block_end = block_start;
		std::advance(block_end, block_size); // 6
		threads[i] = std::thread( // 7
			accumulate_block<Iterator, T>(),
			block_start, block_end, std::ref(results[i]));
		block_start = block_end; // 8
	}
	accumulate_block<Iterator, T>()(
		block_start, last, results[num_threads - 1]); // 9
	for (auto& entry : threads)
		entry.join(); // 10
	return std::accumulate(results.begin(), results.end(), init); // 11
}