一 std::async
template< class Function, class... Args>
std::future<std::result_of_t<std::decay_t<Function>(std::decay_t<Args>...)>>
async( Function&& f, Args&&... args );
std::async返回一个future对象,可通过get方法异步获取执行结果,如下所示:
#include <iostream>
#include <string>
#include <future>
std::string helloFunction(const std::string& s) {
return "Hello C++11 from " + s + ".";
}
class HelloFunctionObject {
public:
std::string operator()(const std::string& s) const {
return "Hello C++11 from " + s + ".";
}
};
int main() {
// 带函数的future
auto futureFunction = std::async(helloFunction, "function");
// 带函数对象的future
HelloFunctionObject helloFunctionObject;
auto futureFunctionObject = std::async(helloFunctionObject, "function object");
// 带匿名函数的future
auto futureLambda = std::async([](const std::string& s) {return "Hello C++11 from " + s + "."; }, "lambda function");
std::cout << futureFunction.get() << std::endl;
std::cout << futureFunctionObject.get() << std::endl;
std::cout << futureLambda.get() << std::endl;
}
二 std::async实现并行计算
1.主程序:
#include <iostream>
#include <vector>
#include <future>
#include <random>
#include <numeric>
#include <cassert>
#include "time_elapse.hpp"
static const int NUM = 100000000;
long long parallel_inner_product(std::vector<int>&v, std::vector<int>&w) {
if (v.size() != w.size()) {
return 0;
}
if (v.size() < 100) {
return std::inner_product(begin(v), end(v), begin(w), 0LL);
}
auto future1 = std::async([&] { return std::inner_product(&v[0], &v[v.size() / 4], &w[0], 0LL); });
auto future2 = std::async([&] { return std::inner_product(&v[v.size() / 4], &v[v.size() / 2], &w[v.size() / 4], 0LL); });
auto future3 = std::async([&] { return std::inner_product(&v[v.size() / 2], &v[v.size() * 3 / 4], &w[v.size() / 2], 0LL); });
auto future4 = std::async([&] { return std::inner_product(&v[v.size() * 3 / 4], &v[v.size()], &w[v.size() * 3 / 4], 0LL); });
return future1.get() + future2.get() + future3.get() + future4.get();
}
long long serial_inner_product(std::vector<int>&v, std::vector<int>&w) {
if (v.size() != w.size()) {
return 0;
}
return std::inner_product(begin(v), end(v), begin(w), 0LL);
}
int main() {
std::random_device seed;
std::mt19937 engine(seed());
std::uniform_int_distribution<int> dist(0, 100);
std::vector<int> v, w;
v.reserve(NUM);
w.reserve(NUM);
time_elapse te;
te.start();
for (int i = 0; i < NUM; ++i) {
v.push_back(dist(engine));
w.push_back(dist(engine));
}
std::cout << "load vector elapse = " << te.end() << "s" << std::endl;
te.start();
auto res1 = parallel_inner_product(v, w);
std::cout << "parallel inner_product elapse = " << te.end() << "s" << std::endl;
te.start();
auto res2 = serial_inner_product(v, w);
std::cout << "serial inner_product elapse = " << te.end() << "s" << std::endl;
assert(res1 == res2);
return 0;
}
2.time_elapse.hpp
#include <chrono>
struct time_elapse {
void start() {
start_ = std::chrono::steady_clock::now();
}
double end() {
std::chrono::duration<double>dura = std::chrono::steady_clock::now() - start_;
return dura.count();
}
private:
std::chrono::time_point<std::chrono::steady_clock> start_ = std::chrono::steady_clock::now();
};
3.编译脚本make.sh
g++ -std=c++11 -g -o Test test.cpp -pthread
三 程序性能测试
1.测试环境:
(1)国产环境(实体机)
CPU:
OS:
(2)Intel环境(虚拟机)
CPU:
OS:
2.测试结果对比:
(1)国产环境测试结果(执行两次)
(2)Interl环境测试结果(执行两次)
3.测试结论
(1)使用std::async优化后的并行计算比串行更节省时间;
(2)国产化环境并没有太多优势,反而调用C++相关库更慢。
https://github.com/wangzhicheng2013/async_parallel