编译环境:gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)
例子按4.4节所写:"指向 Member Function之指针"的效率(Pointer-to-Member Efficiency),未包括多继承和虚拟继承
float g_cx = 0.f, g_cy = 0.f, g_cz = 0.f;
class pt3d
{
public:
float x;
float y;
float z;
public:
pt3d(float xx = 0.0, float yy = 0.0, float zz = 0.0)
: x(xx), y(yy), z(zz)
{
}
virtual inline void cross_product(const pt3d &pA)
{
g_cx += pA.y * z - pA.z * y;
g_cy += pA.z * x - pA.x * z;
g_cz += pA.x * y - pA.y * x;
}
};
struct pt3d_derive : public pt3d
{
pt3d_derive(float xx = 0.0, float yy = 0.0, float zz = 0.0)
: pt3d(xx, yy, zz)
{
}
virtual inline void cross_product(const pt3d &pA) override
{
g_cx += pA.y * z - pA.z * y;
g_cy += pA.z * x - pA.x * z;
g_cz += pA.x * y - pA.y * x;
}
};
void cross_product(const pt3d &pA, const pt3d &pB)
{
g_cx += pA.y * pB.z - pA.z * pB.y;
g_cy += pA.z * pB.x - pA.x * pB.z;
g_cz += pA.x * pB.y - pA.y * pB.x;
}
inline void cross_product_inline(const pt3d &pA, const pt3d &pB)
{
g_cx += pA.y * pB.z - pA.z * pB.y;
g_cy += pA.z * pB.x - pA.x * pB.z;
g_cz += pA.x * pB.y - pA.y * pB.x;
}
constexpr unsigned long long ITERATION_COUNT_FUN_PERFOM = 1024 * 1024 * 1024ULL;
void test_common_function()
{
pt3d pA(1.725f, 0.875f, 0.478f);
pt3d pB(0.315f, 0.317f, 0.838f);
g_cx = 0.f;
g_cy = 0.f;
g_cz = 0.f;
START_TIMING(0);
for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
{
cross_product(pA, pB);
}
END_TIMING(0);
cout << "common:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_inline_common_function()
{
pt3d pA(1.725f, 0.875f, 0.478f);
pt3d pB(0.315f, 0.317f, 0.838f);
g_cx = 0.f;
g_cy = 0.f;
g_cz = 0.f;
START_TIMING(0);
for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
{
cross_product_inline(pA, pB);
}
END_TIMING(0);
cout << "inline common:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_virtual_member_function()
{
pt3d *pA = new pt3d_derive(1.725f, 0.875f, 0.478f);
pt3d *pB = new pt3d_derive(0.315f, 0.317f, 0.838f);
g_cx = 0.f;
g_cy = 0.f;
g_cz = 0.f;
START_TIMING(0);
for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
{
pB->cross_product(*pA);
}
END_TIMING(0);
cout << "virtual:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_inline_member_function()
{
pt3d pA(1.725f, 0.875f, 0.478f);
pt3d pB(0.315f, 0.317f, 0.838f);
g_cx = 0.f;
g_cy = 0.f;
g_cz = 0.f;
START_TIMING(0);
for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
{
pB.cross_product(pA);
}
END_TIMING(0);
cout << "inline member:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_inline_member_function_derive()
{
pt3d_derive pA(1.725f, 0.875f, 0.478f);
pt3d_derive pB(0.315f, 0.317f, 0.838f);
g_cx = 0.f;
g_cy = 0.f;
g_cz = 0.f;
START_TIMING(0);
for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
{
pB.cross_product(pA);
}
END_TIMING(0);
cout << "inline member derive:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_function_call_performance()
{
test_inline_common_function();//warm up
test_inline_common_function();
test_common_function();
test_inline_member_function();
test_inline_member_function_derive();
test_virtual_member_function();
}
-O2优化编译下的结果:
1.879560 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
1.784226 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
5.440695 common:1.67772e+07, -3.35544e+07 ,8.38861e+06
1.791872 inline member:1.67772e+07, -3.35544e+07 ,8.38861e+06
1.767321 inline member derive:1.67772e+07, -3.35544e+07 ,8.38861e+06
5.453181 virtual:1.67772e+07, -3.35544e+07 ,8.38861e+06
-O0编译下的结果:
13.819541 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.965824 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.135956 common:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.531857 inline member:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.435564 inline member derive:1.67772e+07, -3.35544e+07 ,8.38861e+06
15.631393 virtual:1.67772e+07, -3.35544e+07 ,8.38861e+06
结论:
- -O2优化情况下,普通函数与虚函数的性能一致,均慢于inline函数的三倍
- -O0关闭优化下,除虚函数调用慢以外,其它所有函数调用性能一致