通过比较以下几种同步方式,测试各方法效率,分为以下6种情况做对比:
- 不加任何同步措施;
- Windows原子操作;
- c++11 mutrex;
- 自定义的自旋锁CLCS;
- Windows临界区;
- Windows互斥对象;
硬件平台:AMD 8核16线程,内存16GB
系统/软件平台:windows10,vs2019com(vc++);
方法:定义3个变量,在多线程中自加(16线程x10万次),记录时间并考察结果正确性;
结果如下:
结论 :
- 不加锁,速度极快,但结果是不可预测的错误值,没有应用价值。
- windows原子操作 及 c++11中的mutrex效率非常高;
【让人不可思议的是,c++的mutrex锁的实现效率非常高,接近了原子操作的效率,这一点非常让人意外,一直以为c++锁会由于跨平台要求和底层调用封装,而会慢于windows临界区,看来这是不
对的。通过查看mutrex在windows vc 上面的底层实现,发现原来是使用了windows7才加入 SRWLock锁(读写锁),也就是在windows vista及以前的windwos平台仍然使用的临界区,而windows7平台用SRWLock锁替代了,而进一步剖析SRWLock的底层实现,仍然是用的原子自旋锁,这也解释了为什么c++mutrex在win7及以上的平台效率和原子锁效率相同的原因】 - 自定义的自旋锁,速度可以实现得很精简,效率略高于Windows临界区的效率,毕竟摆脱了一些多余的调用过程;
- Windows临界区效率尚可,但仍然是原子操作和c++锁的所需时间的10倍以上;
- Windows互斥对象,由于实现跨进程同步,需要进入系统内核进行调度,所以比较慢,耗时是临界区的10倍以上;
以下是测试代码:
【 并行化支持】
int main() {
const size_t times = 100000;
size_t a = 0, b = 0, c = 0;
CLAtomic<size_t> aa = 0, ba = 0, ca = 0; //原子对象
CLCS cs;
CLCSLock cs2;
TaskPoolStatic volatile as;
CLTick tk; //高精度定时器
tk.timingStart();
parallel_proc([&](int ci, int n) { //并行化
for (size_t i = 0; i < times; i++)
{
++a; ++b; ++c;
}
});
auto s0_ = tk.getSpendTime();
cout << "\nno lock: " << a << " , " << b << " , " << c << " , time = " << s0_ << " , error ...";
tk.timingStart();
parallel_proc([&](int ci, int n) {
for (size_t i = 0; i < times; i++)
{
++aa;++ba;++ca;
}
});
auto s0 = tk.getSpendTime();
cout << "\nAtomic: " << aa() << " , " << ba() << " , " << ca() << " , time = " << s0;
a = 0, b = 0, c = 0;
mutex cs3;
tk.timingStart();
parallel_proc([&](int ci, int n) {
for (size_t i = 0; i < times; i++)
{
cs3.lock(); ++a; cs3.unlock();
cs3.lock(); ++b; cs3.unlock();
cs3.lock(); ++c; cs3.unlock();
}
});
auto s4 = tk.getSpendTime();
cout << "\nc++11 mutex: " << a << " , " << b << " , " << c << " , time = " << s4;
a = 0, b = 0, c = 0;
tk.timingStart();
parallel_proc([&](int ci, int n) {
for (size_t i = 0; i < times; i++)
{
cs.lock(); ++a; cs.unlock();
cs.lock(); ++b; cs.unlock();
cs.lock(); ++c; cs.unlock();
}
});
auto s1 = tk.getSpendTime();
cout << "\nclcs: " << a << " , " << b << " , " << c << " , time = " << s1;
a = 0, b = 0, c = 0;
tk.timingStart();
parallel_proc([&](int ci, int n) {
for (size_t i = 0; i < times; i++)
{
cs2.lock(); ++a; cs2.unlock();
cs2.lock(); ++b; cs2.unlock();
cs2.lock(); ++c; cs2.unlock();
}
});
auto s2 = tk.getSpendTime();
cout << "\nWindows cs: " << a << " , " << b << " , " << c << " , time = " << s2;
a = 0, b = 0, c = 0;
HANDLE mt = CreateMutex(0, 0, 0);
tk.timingStart();
parallel_proc([&](int ci, int n) {
for (size_t i = 0; i < times; i++)
{
if (WaitForSingleObject(mt, INFINITE) == WAIT_OBJECT_0) { ++a; ReleaseMutex(mt); }
if (WaitForSingleObject(mt, INFINITE) == WAIT_OBJECT_0) { ++b; ReleaseMutex(mt); }
if (WaitForSingleObject(mt, INFINITE) == WAIT_OBJECT_0) { ++c; ReleaseMutex(mt); }
}
});
auto s3 = tk.getSpendTime();
CloseHandle(mt);
cout << "\nWindows Mutex: " << a << " , " << b << " , " << c << " , time = " << s3;
return 1;
}
自旋锁定义:
#ifndef _CL_CS_H_
#define _CL_CS_H_
//自旋锁
typedef struct _cl_cs {
LONG bLock = FALSE;
DWORD ownerThreadId = 0;
LONG lockCounts = 0;
void enter() {
auto cid = GetCurrentThreadId();
if (ownerThreadId != cid) {
while (InterlockedExchange(&bLock, TRUE) == TRUE);
//Sleep(0);
ownerThreadId = cid;
}
++lockCounts;
}
void leave() {
auto cid = GetCurrentThreadId();
if (cid != ownerThreadId)
return;
--lockCounts;
if (lockCounts == 0) {
ownerThreadId = 0;
bLock = FALSE;
}
}
inline void lock() { enter(); }
inline void unlock() { leave(); }
}CLCS, * PCLCS;
#endif
原子模板类定义:
//原子操作类
#ifndef _CL_ATOMIC_H_
#define _CL_ATOMIC_H_
//默认状态下CLAtomic类对象的原子操作模式是否启动,原子操作在保证线程安全情况下将降低代码效率;
#define CLAtomic_bUseAtomc_def true
//原子操作类(模板)
template<class T>
class CLAtomic {
protected:
T Target;
bool bUseAtomc;
inline static bool atomicCAS(CHAR volatile* dest, CHAR newvalue, CHAR oldvalue)
{
SHORT cov = *reinterpret_cast<SHORT volatile*>(dest);cov &= 0xff00;
SHORT new1 = newvalue;new1 |= cov;
SHORT old = oldvalue;old |= cov;
SHORT tem = InterlockedCompareExchange16(reinterpret_cast<SHORT volatile*>(dest),new1,old);
return tem == old ? true : false;
}
inline static bool atomicCAS(UCHAR volatile* dest, UCHAR newvalue, UCHAR oldvalue)
{
return atomicCAS(reinterpret_cast<CHAR volatile*>(dest), CHAR(newvalue), CHAR(oldvalue));
}
inline static bool atomicCAS(SHORT volatile* dest, SHORT newvalue, SHORT oldvalue)
{
return InterlockedCompareExchange16(dest, newvalue, oldvalue) == oldvalue ? true : false;
}
inline static bool atomicCAS(USHORT volatile* dest, USHORT newvalue, USHORT oldvalue)
{
return atomicCAS(reinterpret_cast<SHORT volatile*>(dest), SHORT(newvalue), SHORT(oldvalue));
}
inline static bool atomicCAS(INT volatile* dest, INT newvalue, INT oldvalue)
{
return InterlockedCompareExchange((LONG volatile*)dest, (LONG)newvalue, (LONG)oldvalue) == (LONG)oldvalue ? true : false;
}
inline static bool atomicCAS(UINT volatile* dest, UINT newvalue, UINT oldvalue)
{
return atomicCAS(reinterpret_cast<INT volatile*>(dest), INT(newvalue), INT(oldvalue));
}
inline static bool atomicCAS(LONG volatile* dest, LONG newvalue, LONG oldvalue)
{
return InterlockedCompareExchange(dest, newvalue, oldvalue) == oldvalue ? true : false;
}
inline static bool atomicCAS(ULONG volatile* dest, ULONG newvalue, ULONG oldvalue)
{
return atomicCAS(reinterpret_cast<LONG volatile*>(dest), LONG(newvalue), LONG(oldvalue));
}
inline static bool atomicCAS(LONG64 volatile* dest, LONG64 newvalue, LONG64 oldvalue)
{
return InterlockedCompareExchange64(dest,newvalue,oldvalue) == oldvalue ? true : false;
}
inline static bool atomicCAS(ULONG64 volatile* dest, ULONG64 newvalue, ULONG64 oldvalue)
{
return atomicCAS(reinterpret_cast<LONG64 volatile*>(dest), LONG64(newvalue), LONG64(oldvalue));
}
inline static bool atomicCAS(FLOAT volatile* dest, FLOAT newvalue, FLOAT oldvalue)
{
//auto& new1 = *(LONG*)(void*)&newvalue;
auto old1 = *(LONG*)(void*)&oldvalue;
return InterlockedCompareExchange(
reinterpret_cast<LONG volatile*>(dest),
*(LONG*)(void*)&newvalue,
old1) == old1 ? true : false;
}
inline static bool atomicCAS(DOUBLE volatile* dest, DOUBLE newvalue, DOUBLE oldvalue)
{
//auto& new1 = *(LONG64*)(void*)&newvalue;
auto old1 = *(LONG64*)(void*)&oldvalue;
return InterlockedCompareExchange64(
reinterpret_cast<LONG64 volatile*>(dest),
*(LONG64*)(void*)&newvalue,
old1) == old1 ? true : false;
}
inline T increment()
{
T old;
do {
old = Target;
} while (!CLAtomic::atomicCAS(&Target, old + T(1), old));
return old;
}
inline T decrement()
{
T old;
do {
old = Target;
} while (!CLAtomic::atomicCAS(&Target, old - T(1), old));
return old;
}
inline T add(T v)
{
T old;
do {
old = Target;
} while (!CLAtomic::atomicCAS(&Target, old + v, old));
return old;
}
inline T mul(T v)
{
T old;
do {
old = Target;
} while (!CLAtomic::atomicCAS(&Target, old * v, old));
return old;
}
inline T div(T v)
{
T old;
do {
old = Target;
} while (!CLAtomic::atomicCAS(&Target, old / v, old));
return old;
}
public:
inline CLAtomic():bUseAtomc(CLAtomic_bUseAtomc_def){
}
inline CLAtomic(T v, bool _bUseAtomc = CLAtomic_bUseAtomc_def)
:Target(v), bUseAtomc(_bUseAtomc) {
}
inline CLAtomic(const CLAtomic& v)
:Target(v.Target), bUseAtomc(v.bUseAtomc) {
}
template<class T2>
inline CLAtomic(const CLAtomic<T2>& v)
:Target(v()), bUseAtomc(v.isUseAtomic()) {
}
CLAtomic& setUseAtomic(bool _bUseAtomc = CLAtomic_bUseAtomc_def) {
return bUseAtomc = _bUseAtomc,*this;
}
bool isUseAtomic() const {
return bUseAtomc;
}
template<class T2>
inline CLAtomic& operator=(const T2 v2) {
return Target = v2, *this;
}
template<class T2>
inline CLAtomic& operator=(const CLAtomic<T2>& v2) {
return Target = v2(), *this;
}
inline CLAtomic& operator=(const CLAtomic& v2) {
return Target = v2.Target, *this;
}
inline CLAtomic& operator=(const T& v2) {
return Target = v2, *this;
}
//默认为原子操作++(),当setUseAtomic(false)时采用非原子操作方式;
inline CLAtomic& operator++() {
return bUseAtomc ? increment() : ++Target, * this;
}
//默认为原子操作--(),当setUseAtomic(false)时采用非原子操作方式;
inline CLAtomic& operator--() {
return bUseAtomc ? decrement() : --Target, * this;
}
//默认为原子操作()++,当setUseAtomic(false)时采用非原子操作方式;
inline T operator++(int) {
if (bUseAtomc) {
return increment();
}
else {
T old = Target;
++Target;
return old;
}
}
//默认为原子操作()--,当setUseAtomic(false)时采用非原子操作方式;
inline T operator--(int) {
if (bUseAtomc) {
return decrement();
}
else {
T old = Target;
--Target;
return old;
}
}
//默认为原子操作+=,当setUseAtomic(false)时采用非原子操作方式;
inline CLAtomic& operator+=(T v) {
return bUseAtomc ? add(v) : Target += v, * this;
}
//默认为原子操作-=,当setUseAtomic(false)时采用非原子操作方式;
inline CLAtomic& operator-=(T v) {
return bUseAtomc ? add(0 - v) : Target -= v, * this;
}
//默认为原子操作*=,当setUseAtomic(false)时采用非原子操作方式;
inline CLAtomic& operator*=(T v) {
return bUseAtomc ? mul(v) : Target *= v, * this;
}
//默认为原子操作/=,当setUseAtomic(false)时采用非原子操作方式;
inline CLAtomic& operator/=(T v) {
return bUseAtomc ? div(v) : Target /= v, * this;
}
//默认为原子操作%=,当setUseAtomic(false)时采用非原子操作方式;
inline CLAtomic& operator%=(int v) {
T old;
do {
old = Target;
} while (!CLAtomic::atomicCAS(&Target, T(int(old) % v), old));
return *this;
}
inline operator T() { return Target; }
inline T operator()() const { return Target; }
template<class T2> inline operator T2() const { return (T2)Target; }
template<class T2> inline T operator+(const T2 v) const { return Target + v; }
template<class T2> inline T operator-(const T2 v) const { return Target - v; }
template<class T2> inline T operator*(const T2 v) const { return Target * v; }
template<class T2> inline T operator/(const T2 v) const { return Target / v; }
template<class T2> inline T operator%(const T2 v) const { return Target % (int)v; }
};
template<class T,class T2> inline T operator+(const T v,const CLAtomic<T2>& tag){
return v + tag();
}
template<class T,class T2> inline T operator-(const T v,const CLAtomic<T2>& tag){
return v - tag();
}
template<class T,class T2> inline T operator*(const T v,const CLAtomic<T2>& tag){
return v * tag();
}
template<class T,class T2> inline T operator/(const T v,const CLAtomic<T2>& tag){
return v / tag();
}
template<class T,class T2> inline T operator%(const T v,const CLAtomic<T2>& tag){
return v % (int)tag();
}
#endif
计时器定义:
#ifndef __CL_TICK_DEF__
#define __CL_TICK_DEF__
//高精度计时器类
class CLTick {
protected:
LARGE_INTEGER lis;
LARGE_INTEGER lie;
LARGE_INTEGER Freg;
public:
CLTick() {
timingStart();
}
//开始计时
CLTick& timingStart() {
QueryPerformanceFrequency(&Freg);
QueryPerformanceCounter(&lis);
return *this;
}
//取得从计时开始到当前的时间
double getSpendTime(bool saveToStart = false) {
QueryPerformanceCounter(&lie);
double rt = double(lie.QuadPart - lis.QuadPart) / double(Freg.QuadPart);
if (saveToStart)lis = lie;
return rt;
}
};
#endif