转载请注明原创出处:http://aigo.iteye.com/blog/1908084
C++0X标准提供的std::mutex和std::thread两个接口开发多线程同步的应用非常方便,而且可以跨平台,自己做了一下测试,发现这个跨平台的代价还是很大的,我分别用std::mutex与Windows的CRITICAL_SECTION、std::thead和WIndows的CreateThread接口做了对比,测试代码如下:
#include "stdafx.h"
#include <mutex>
#include <atomic>
#include <iostream>
#include <time.h>
#include <thread>
#include <list>
#include <atomic>
#include <Windows.h>
using namespace std;
#define MAX_THREADS 16
// 全局的结果数据
long total1 = 0;
long total2 = 0;
std::atomic<long> total;
std::mutex m_lock;
CRITICAL_SECTION m_Lock2;
void use_std_mutex();
void use_win_critical();
void use_win_thread();
void test_mutex()
{
for(int i=0; i<1000000;++i)
{
m_lock.lock();
total1 += 1;
m_lock.unlock();
}
}
void test_critical()
{
for(int i=0; i<1000000;++i)
{
EnterCriticalSection(&m_Lock2);
total2 += 1;
LeaveCriticalSection(&m_Lock2);
}
}
int main(int argc, char* argv[])
{
use_std_mutex();
use_win_critical();
use_win_thread();
return 0;
}
void use_std_mutex()
{
std::list<std::thread*> threadlist;
//测试mutex
printf("testing mutex...\n");
clock_t start = clock();
for(int i=0; i<MAX_THREADS; ++i)
{
std::thread *t1 = new std::thread((&test_mutex));
threadlist.push_back(t1);
}
for(std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++ )
{
(*i)->join();
}
clock_t finish = clock();
printf("result:%d\n", total1);
printf("cost:%dms\n", finish - start);
for(std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++ )
{
delete(*i);
}
}
void use_win_critical()
{
//测试Critical
InitializeCriticalSection(&m_Lock2);
std::list<std::thread*> threadlist;
printf("testing critical...\n");
clock_t start = clock();
for(int i=0; i<MAX_THREADS; ++i)
{
std::thread *t1 = new std::thread((&test_critical));
threadlist.push_back(t1);
}
for(std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++ )
{
(*i)->join();
}
clock_t finish = clock();
printf("result:%d\n", total2);
printf("cost:%dms\n", finish - start);
for(std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++ )
{
delete(*i);
}
}
#define BUF_SIZE 255
long total3 = 0;
CRITICAL_SECTION m_Lock3;
DWORD WINAPI MyThreadFunction( LPVOID lpParam );
//使用Windows线程测试
void use_win_thread()
{
DWORD dwThreadIdArray[MAX_THREADS];
HANDLE hThreadArray[MAX_THREADS];
InitializeCriticalSection(&m_Lock3);
printf("testing use_win_thread...\n");
clock_t start = clock();
for( int i=0; i<MAX_THREADS; i++ )
{
hThreadArray[i] = CreateThread(
NULL,
0,
MyThreadFunction,
NULL,
0,
&dwThreadIdArray[i]);
}
WaitForMultipleObjects(MAX_THREADS, hThreadArray, TRUE, INFINITE);
clock_t finish = clock();
printf("result:%d\n", total3);
printf("cost:%dms\n", finish - start);
for(int i=0; i<MAX_THREADS; i++)
{
CloseHandle(hThreadArray[i]);
}
}
DWORD WINAPI MyThreadFunction( LPVOID lpParam )
{
for(int i=0; i<1000000;++i)
{
EnterCriticalSection(&m_Lock3);
total3 += 1;
LeaveCriticalSection(&m_Lock3);
}
return 0;
}
测试环境:
硬件:i7 2630qm 4核
系统:Windows7 64bit 旗舰版 SP1
程序:VS2012 Release win32
测试结果:
2线程抢占:
testing mutex...
result:2000000
cost:628ms
testing critical...
result:2000000
cost:132ms
testing use_win_thread...
result:2000000
cost:98ms
4线程抢占:
testing mutex...
result:4000000
cost:1150ms
testing critical...
result:4000000
cost:266ms
testing use_win_thread...
result:4000000
cost:216ms
8线程抢占:
testing mutex...
result:8000000
cost:2855ms
testing critical...
result:8000000
cost:582ms
testing use_win_thread...
result:8000000
cost:461ms
16线程抢占:
testing mutex...
result:16000000
cost:138052ms
testing critical...
result:16000000
cost:1448ms
testing use_win_thread...
result:16000000
cost:1169ms
结论:如果想追求高性能,C++11的std::mutex不要用,std::thread性能损耗不大,要用随喜。
【2016-04-14更新】
刚刚用VS2015又测试了下,发现和三年前测试的结果天壤之别!!!
testing mutex...
result:16000000
cost:885ms
testing critical...
result:16000000
cost:5339ms
testing use_win_thread...
result:16000000
cost:6101ms
这次测试的硬件还是当年的硬件,只是操作系统换成了windows 10 pro x64,vs换成2015旗舰版。
vs2012是当年第一个较完善支持c++11的版本(vs2010连std::thread都没提供),莫非当时没来及优化编译器?
std::mutex完爆critical_section,但是为啥critical相对三年前的结果,为啥性能降低了这么多?感觉像是bug一样,微软自家独占的critical性能居然比std::mutex差这么多。