Windows在Vista之后提供了一组称为slim读写锁的API函数,网上传闻效率很高。关于这组读写锁的功能和用法在mdsn和《windows核心编程第五版》上都有详细介绍。
SRW lock function | Description |
Acquires an SRW lock in exclusive mode. | |
Acquires an SRW lock in shared mode. | |
Initialize an SRW lock. | |
Releases an SRW lock that was opened in exclusive mode. | |
Releases an SRW lock that was opened in shared mode. |
所以,在这里更关心的是这几个函数里面到底做了什么,为什么效率高。通过学习这些API的实现,希望能够使自己有所提高。这几个函数主要用到了以下几点技术:
(1)等待获取锁的线程必然会卡在AcquireSRWLockExclusive或AcquireSRWLockShared这两个函数其中之一上。这就意味着,如果线程没有获取到锁,那么这连个函数的局部变量就是有效的。这一点就使得将所有等待锁的线程串到一个链表中成为了可能。事实上,微软也就是这么做的。这两个函数中分别都存在一个结构体局部变量,大概的形式是下面这个样子:
struct _SyncItem
{
_SyncItem* back;
_SyncItem* notify;
_SyncItem* next;
size_t shareCount;
size_t flag;
};
每当线程调用上述两个函数的时候,就会被成为链表中的一个节点,通过_SyncItem串到链表中。串起来的目的嘛,自然是为了能够在可以获取资源的时候方便通知。
(2)函数局部变量以特定方式对齐到指定栈地址边界上。实现的手段就是使用VC的align关键字。举个例子,AcquireSRWLockExclusive和AcquireSRWLockShared中应该是使用了类似下面的一行代码:
__declspec( align( 16 ) ) _SyncItem syn = {0};
上面这行代码就是让变量syn对齐到16字节的内存地址上。也就是说变量syn地址的低4位必然是0。这样做的意义在于,微软要把低4位当做记录当前锁状态的标志量。看样子是出于效率的考虑,才借用了地址的第四位,很多事情可以放到一个原子操作中完成。
(3)使用KeyedEvent相关函数。这组函数具有如下形式:
NTSTATUS NTAPI NtCreateKeyedEvent(OUT PHANDLE handle, IN ACCESS_MASK access, IN POBJECT_ATTRIBUTES attr, IN ULONG flags);
NTSTATUS NTAPI NtWaitForKeyedEvent(IN HANDLE handle, IN PVOID key,
IN BOOLEAN alertable, IN PLARGE_INTEGER mstimeout);
NTSTATUS NTAPI NtReleaseKeyedEvent(IN HANDLE handle, IN PVOID key,
IN BOOLEAN alertable, IN PLARGE_INTEGER mstimeout);
这组KeyedEvent相关的函数并未正式公开,它们由ntdll.dll导出,从vista以后的windows版本开始提供。KeyedEvent的特点就是等在同一个事件句柄上的线程可以指定不同的关键字(key)。要唤醒等待者,不光需要这个事件句柄还需要掌握和该线程相同的关键字(key)。这个机制据说和linux下的futex机制相同,实际使用中发现还是有很大区别。关于KeyedEvent其他知识,可以问问强大的搜索引擎。
(4)使用interlockedxxx系列的函数。这一点相当关键,但是也最没有必要花文字讲解,大家都清楚。
好了,这些函数具体是怎样工作的,还是看下面的代码直接明了!代码里面的注释是在反复分析asm的时候逐步添加上的,相当随意,见谅!
rwlock.h:
#ifndef __RW_LOCK_H__
#define __RW_LOCK_H__
#include "KeyedEvent.h"
#define SRWLockSpinCount 1024
#define Busy_Lock 1 // 已经有人获取了锁
#define Wait_Lock 2 // 有人等待锁
#define Release_Lock 4 // 说明已经有人释放一次锁
#define Mixed_Lock 8 // 共享锁、独占锁并存
#define EXTRACT_ADDR(s) ((s) & (~0xf))
class CRWLock
{
struct _SyncItem
{
_SyncItem* back;
_SyncItem* notify;
_SyncItem* next;
size_t shareCount;
size_t flag;
};
public:
CRWLock();
~CRWLock();
void ExclusiveLock();
void SharedLock();
void ReleaseExclusiveLock();
void ReleaseSharedLock();
private:
void RtlInitializeSRWLock(SRWLOCK* pSRWLock);
void RtlAcquireSRWLockExclusive(SRWLOCK* pSRWLock);
void RtlAcquireSRWLockShared(SRWLOCK* pSRWLock);
void __stdcall RtlReleaseSRWLockExclusive(SRWLOCK* pSRWLock);
void __stdcall RtlReleaseSRWLockShared(SRWLOCK *pSRWLock);
void __stdcall RtlpWakeSRWLock(SRWLOCK* pSRWLock, size_t st);
void __stdcall RtlBackoff(unsigned int *pCount);
void __stdcall RtlpOptimizeSRWLockList(SRWLOCK* pSRWLock, size_t st);
private:
SRWLOCK m_SRWLock;
HANDLE m_hKeyedEvent;
};
#endif
rwlock.cpp:
#include <Windows.h>
#include "rwlock.h"
#include <intrin.h>
#define ASSERT(f) ((f) || (__debugbreak(),0))
#define NT_SUCCESS(Status) (((NTSTATUS)(Status)) >= 0)
CRWLock::CRWLock()
{
NTSTATUS ret = MyNtCreateKeyedEvent(&m_hKeyedEvent, EVENT_ALL_ACCESS, NULL, 0);
ASSERT(NT_SUCCESS(ret));
RtlInitializeSRWLock(&m_SRWLock);
}
CRWLock::~CRWLock()
{
CloseHandle(m_hKeyedEvent);
}
void CRWLock::RtlInitializeSRWLock(SRWLOCK* pSRWLock)
{
pSRWLock->Ptr = 0;
}
void CRWLock::RtlAcquireSRWLockExclusive(SRWLOCK *pSRWLock)
{
__declspec( align( 16 ) ) _SyncItem syn = {0};
size_t newStatus;
size_t curStatus;
size_t lastStatus;
unsigned int nBackOff = 0;
char bOptimize;
if ( _interlockedbittestandset((volatile LONG *)pSRWLock, 0) )
{
lastStatus = (size_t)pSRWLock->Ptr;
while (1)
{
if ( lastStatus & Busy_Lock )// locked
{
// if ( RtlpWaitCouldDeadlock() )
// ZwTerminateProcess((HANDLE)0xFFFFFFFF, -1073741749);
syn.flag = 3;
syn.next = 0;
bOptimize = 0;
if ( lastStatus & Wait_Lock )// someone is waiting the lock earlier than me.
{
syn.notify = NULL;
syn.shareCount = 0;
syn.back = (_SyncItem *)(EXTRACT_ADDR(lastStatus));
newStatus = (size_t)&syn | lastStatus & 8 | 7;// (8==1000b,继承混合等待的状态标志) (7==0111b)
if ( !(lastStatus & Release_Lock) )// v15 & 0100b, lock is not released now
bOptimize = 1;
}
else// i am the first one to wait the lock.(另外,全部是share-lock的情况下,也不存在有人等待的情况)
{
syn.notify = &syn;// i must be the next one who want to wait the lock
syn.shareCount = (size_t)lastStatus >> 4;
if ( syn.shareCount > 1 )
{// share locked by other thread
newStatus = (size_t)&syn | 0xB;
}
else
{// i am the first one want owner-lock
newStatus = (size_t)&syn | 3;
}
}
//if value in lock has not been changed by other thread,
// change it with my value!
curStatus = _InterlockedCompareExchange((volatile LONG *)pSRWLock, newStatus, lastStatus);
if ( curStatus != lastStatus )// not changed by me
{
RtlBackoff(&nBackOff);
lastStatus = (size_t)pSRWLock->Ptr;
continue;
}
if ( bOptimize )
RtlpOptimizeSRWLockList(pSRWLock, newStatus);
for ( int i = SRWLockSpinCount; i>0; --i )
{
// flag(bit1) can be reset by release-lock operation in other thread
if ( !(syn.flag & 2) )
break;
_mm_pause();
}
// if flag(bit1) reset by Release operation,
// no need to wait event anymore
if ( _interlockedbittestandreset((volatile LONG *)&syn.flag, 1u) )
MyNtWaitForKeyedEvent(m_hKeyedEvent, &syn, 0, 0);
lastStatus = curStatus;
}
else
{
// try to get lock
if ( _InterlockedCompareExchange((volatile LONG *)pSRWLock, lastStatus + 1, lastStatus) == lastStatus )
return;// get lock successfully.
// status of the lock was changed by other thread
// get lock failed
RtlBackoff(&nBackOff);
lastStatus = (size_t)pSRWLock->Ptr;
}
}
}
return;
}
void CRWLock::RtlAcquireSRWLockShared(SRWLOCK *pSRWLock)
{
__declspec( align( 16 ) ) _SyncItem syn = {0};
size_t newStatus;
size_t curStatus;
size_t lastStatus;
unsigned int nBackOff = 0;
char bOptimize;
lastStatus = _InterlockedCompareExchange((volatile LONG *)pSRWLock, 17, 0);
if ( lastStatus )// someone is looking at the lock
{
while ( 1 )
{
// get_share_lock 只有在有人独占锁的情况才会等待
// x & 1,有人获取了锁
// x & Wait_Lock != 0,有人在等待锁释放(必定存在独占锁)
// (x & 0xFFFFFFF0) == 0,有人独占锁,但是可能还没有人等待
if ( lastStatus & Busy_Lock && ((lastStatus & Wait_Lock) != 0 || !(EXTRACT_ADDR(lastStatus))) )
{
// if ( RtlpWaitCouldDeadlock() )
// ZwTerminateProcess((HANDLE)0xFFFFFFFF, -1073741749);
syn.flag = 2;
syn.shareCount = 0;
bOptimize = 0;
syn.next = 0;
if ( lastStatus & Wait_Lock )// someone is waiting the lock earlier than me.
{
syn.back = (_SyncItem *)(EXTRACT_ADDR(lastStatus));
newStatus = (size_t)&syn | lastStatus & 9 | 6;// 9==1001 , 6==0110(因为lastStatus的bit0必为1,等价于(x & 8) | 7)
syn.notify = NULL;
if ( !(lastStatus & Release_Lock) )//(bit2 not set) lock is not released now.
bOptimize = 1;
}
else // i am the first one to wait the lock.
{
syn.notify = &syn;
newStatus = (size_t)&syn | 3;// 3==0011b
}
curStatus = _InterlockedCompareExchange((volatile LONG *)pSRWLock, newStatus, lastStatus);
if ( curStatus == lastStatus )
{
if ( bOptimize )
{
RtlpOptimizeSRWLockList(pSRWLock, newStatus);
}
for(int i = SRWLockSpinCount; i > 0; --i)
{
if ( !(syn.flag & 2) )// flag(bit1) can be reset by release-lock operation
break;
_mm_pause();
}
// if flag(bit1) is reset by release-lock operation
// no need to wait event anymore
if ( _interlockedbittestandreset((volatile LONG *)&syn.flag, 1u) )
MyNtWaitForKeyedEvent(m_hKeyedEvent, &syn, 0, 0);
}
else
{
RtlBackoff(&nBackOff);
curStatus = (size_t)pSRWLock->Ptr;
}
}
else
{
if ( lastStatus & Wait_Lock )// 2 == 0010b,有人等待锁,但是没有进入if,说明bit0已经被清除
newStatus = lastStatus + 1;// (有人处于过渡态,直接获取锁,不管他是哪种类型)
else// 当前是共享锁,没有人获取了独占锁或者等待独占锁
newStatus = (lastStatus + 16) | 1;
// try to get lock
if ( lastStatus == _InterlockedCompareExchange((volatile LONG *)pSRWLock, newStatus, lastStatus))
return;// get lock successfully
// status of the lock was changed by other thread
// get lock failed
RtlBackoff(&nBackOff);
curStatus = (size_t)pSRWLock->Ptr;
}
lastStatus = curStatus;
}
}
return;
}
void __stdcall CRWLock::RtlReleaseSRWLockExclusive(SRWLOCK* pSRWLock)
{
size_t newStatus;
size_t curStatus;
size_t lastWaiter;
lastWaiter = InterlockedExchangeAdd((volatile LONG *)pSRWLock, -1); // reset lock flag
if ( !(lastWaiter & Busy_Lock) ) // bit0 != 1
{
ASSERT("STATUS_RESOURCE_NOT_OWNED" && 0);
}
if ( lastWaiter & Wait_Lock && // some one is waiting (0010b)
!(lastWaiter & Release_Lock) ) // lock is not released, bit2==0(0100b)
{
newStatus = lastWaiter - 1; // reset lock flag
curStatus = InterlockedCompareExchange((volatile LONG *)pSRWLock, newStatus | Release_Lock, newStatus); // set released flag, set bit2 (0100b)
// lock is not changed by others, and now it is changed with my new value
if ( curStatus == newStatus )
RtlpWakeSRWLock(pSRWLock, (newStatus | Release_Lock));
}
return;
}
void __stdcall CRWLock::RtlReleaseSRWLockShared(SRWLOCK *pSRWLock)
{
size_t newStatus;
size_t curStatus;
size_t lastStatus;
_SyncItem* pLastNode;
lastStatus = InterlockedCompareExchange((volatile LONG *)pSRWLock, 0, 17);
if ( lastStatus != 17 ) // not single share lock, release lock failed.
{
if ( !(lastStatus & Busy_Lock) )
{
ASSERT("STATUS_RESOURCE_NOT_OWNED" && 0);
}
while ( 1 )
{
if ( lastStatus & Wait_Lock )
{
if ( lastStatus & Mixed_Lock ) // 两种锁混合等待
{
pLastNode = (_SyncItem*)(EXTRACT_ADDR(lastStatus));
while (!pLastNode->notify)
{
pLastNode = pLastNode->back;
}
// 既然是在释放共享锁,说明一定有人获取了共享锁
// 如果有人获取了共享锁,就一定没有人获取独到占锁
// 只需要把共享次数减1
// 取出notify节点的共享次数变量的地址, 原子减
if ( InterlockedDecrement((volatile LONG *)&(pLastNode->notify->shareCount)) > 0 )
{
return;
}
}
while ( 1 )
{
newStatus = lastStatus & (~0x9); //0xFFFFFFF6;// reset bit0 and bit3 (0110b)
if ( lastStatus & Release_Lock )// test bit2 is set
{
curStatus = InterlockedCompareExchange((volatile LONG *)pSRWLock, newStatus, lastStatus);// reset bit0 and bit3
if ( curStatus == lastStatus )
return ;
}
else
{
curStatus = InterlockedCompareExchange((volatile LONG *)pSRWLock, newStatus | Release_Lock, lastStatus);// set bit2(0100b)
if ( curStatus == lastStatus )
return RtlpWakeSRWLock(pSRWLock, (newStatus | Release_Lock));// set bit2(0100b)
}
lastStatus = curStatus;
}
break;
}
else
{ // 只存在share lock
newStatus = (EXTRACT_ADDR(lastStatus)) <= 0x10 ? // share lock count == 0
0 : // set to not locked
lastStatus - 16; // share lock count -1
curStatus = InterlockedCompareExchange((volatile LONG *)pSRWLock, newStatus, lastStatus);
if ( curStatus == lastStatus )
break;
lastStatus = curStatus;
}
}
}
return;
}
void __stdcall CRWLock::RtlpWakeSRWLock(SRWLOCK *pSRWLock, size_t st)
{
size_t status;
size_t curStatus;
_SyncItem* notify;
_SyncItem* tmp1;
_SyncItem* tmp2;
status = st;
while (1)
{
if(!(status & Busy_Lock))
{
// 找出需要notify的节点
for(tmp1 = (_SyncItem*)(EXTRACT_ADDR(status)),notify = tmp1->notify; !notify; notify = tmp1->notify)
{
tmp2 = tmp1;
tmp1 = tmp1->back;
tmp1->next = tmp2;
}
((_SyncItem*)(EXTRACT_ADDR(status)))->notify = notify;
// notify->next!=0,说明v6不是唯一一个等待的
// test flag bit0, notify is waiting owner-lock
// 因此只通知notify
if ( notify->next && notify->flag & 1 )//flag
{
((_SyncItem *)(EXTRACT_ADDR(status)))->notify = notify->next;//notify
notify->next = 0;//next
_InterlockedAnd((volatile LONG *)pSRWLock, -5);
if ( !_interlockedbittestandreset((volatile LONG *)&(notify->flag), 1u) ) //flag
MyNtReleaseKeyedEvent(m_hKeyedEvent, (PVOID)notify, 0, 0);
return;
}
// notify是唯一一个等待者
// 或者notify是等待share锁
// 通知notify以及后续所有节点(如果有的话)
curStatus = _InterlockedCompareExchange((volatile LONG *)pSRWLock, 0, status);
if ( curStatus == status )
{// change status successfully.
tmp2 = notify;
do
{
notify = notify->next;//通知之后,v6中的地址可能在其他线程释放已经无效,必须先保存next
if ( !_interlockedbittestandreset((volatile LONG *)&(tmp2->flag), 1u) ) //flag
MyNtReleaseKeyedEvent(m_hKeyedEvent, (PVOID)tmp2, 0, 0);
tmp2 = notify;
}while ( notify );
return;
}
// status was changed by other thread earlier than me
// change status failed
((_SyncItem*)(EXTRACT_ADDR(status)))->notify = notify; //notify
status = curStatus;
}
else
{
curStatus = _InterlockedCompareExchange((volatile LONG *)pSRWLock, status - 4, status);
if ( curStatus == status )
return;
status = curStatus;
}
}
}
void __stdcall CRWLock::RtlBackoff(unsigned int *pCount)
{
unsigned int nBackCount;
nBackCount = *pCount;
if ( nBackCount )
{
if ( nBackCount < 0x1FFF )
nBackCount *= 2;
}
else
{
// __readfsdword(24) --> teb
// teb+48 --> peb
// peb+100 --> NumberOfProcessors
if ( *(DWORD *)(*(DWORD *)(__readfsdword(24) + 48) + 100) == 1 ) // 获取cpu个数(核数)
return;
// ================for ia64================
// NtCurrentTeb()
// teb+48h --> tid(64bits)
// teb+60h --> peb(64bits)
// peb+b8h --> NumberOfProcessors(32bits)
nBackCount = 64;
}
nBackCount = ((nBackCount - 1) & __rdtsc()) + nBackCount;
for ( unsigned int i = 0; i < nBackCount; i++ )
{
_mm_pause();
}
return;
}
void __stdcall CRWLock::RtlpOptimizeSRWLockList(SRWLOCK* pSRWLock, size_t st)
{
size_t status;
_SyncItem* tmp1;
_SyncItem* tmp2;
size_t curStatus;
status = st;
while ( 1 )
{
if ( status & Busy_Lock )
{
tmp1 = (_SyncItem*)(EXTRACT_ADDR(status));
if ( tmp1 )
{
while ( !tmp1->notify )
{
tmp2 = tmp1;
tmp1 = (_SyncItem *)tmp1->back; // *v3 ->back pointer of list-entry
tmp1->next = tmp2; // *v3+8 ->next pointer of list-entry
}
((_SyncItem*)(EXTRACT_ADDR(status)))->notify = tmp1->notify;
}
curStatus = InterlockedCompareExchange((volatile LONG *)pSRWLock, status - 4, status); // v2-4, set v2 not released
if ( curStatus == status )
break;
status = curStatus;
}
else
{
RtlpWakeSRWLock(pSRWLock, status);
break;
}
}
return;
}
void CRWLock::ExclusiveLock()
{
RtlAcquireSRWLockExclusive(&m_SRWLock);
}
void CRWLock::SharedLock()
{
RtlAcquireSRWLockShared(&m_SRWLock);
}
void CRWLock::ReleaseExclusiveLock()
{
RtlReleaseSRWLockExclusive(&m_SRWLock);
}
void CRWLock::ReleaseSharedLock()
{
RtlReleaseSRWLockShared(&m_SRWLock);
}
KeyedEvent.h
#ifndef __KEYED_EVENT_H__
#define __KEYED_EVENT_H__
#include <windows.h>
#include <tchar.h>
typedef struct _UNICODE_STRING {
USHORT Length;
USHORT MaximumLength;
__field_bcount_part(MaximumLength, Length) PWCH Buffer;
} UNICODE_STRING;
typedef UNICODE_STRING *PUNICODE_STRING;
typedef struct _OBJECT_ATTRIBUTES {
ULONG Length;
HANDLE RootDirectory;
PUNICODE_STRING ObjectName;
ULONG Attributes;
PVOID SecurityDescriptor; // Points to type SECURITY_DESCRIPTOR
PVOID SecurityQualityOfService; // Points to type SECURITY_QUALITY_OF_SERVICE
} OBJECT_ATTRIBUTES;
typedef OBJECT_ATTRIBUTES *POBJECT_ATTRIBUTES;
#define STATUS_UNSUCCESSFUL ((NTSTATUS)0xC0000001L)
typedef NTSTATUS (NTAPI *ft_NtCreateKeyedEvent)(OUT PHANDLE handle, IN ACCESS_MASK access/*EVENT_ALL_ACCESS*/,
IN POBJECT_ATTRIBUTES attr, IN ULONG flags);
typedef NTSTATUS (NTAPI *ft_NtWaitForKeyedEvent)(IN HANDLE handle, IN PVOID key,
IN BOOLEAN alertable, IN PLARGE_INTEGER mstimeout);
typedef NTSTATUS (NTAPI *ft_NtReleaseKeyedEvent)(IN HANDLE handle, IN PVOID key,
IN BOOLEAN alertable, IN PLARGE_INTEGER mstimeout);
NTSTATUS NTAPI MyNtCreateKeyedEvent(OUT PHANDLE handle, IN ACCESS_MASK access/*EVENT_ALL_ACCESS*/,
IN POBJECT_ATTRIBUTES attr, IN ULONG flags);
NTSTATUS NTAPI MyNtWaitForKeyedEvent(IN HANDLE handle, IN PVOID key,
IN BOOLEAN alertable, IN PLARGE_INTEGER mstimeout);
NTSTATUS NTAPI MyNtReleaseKeyedEvent(IN HANDLE handle, IN PVOID key,
IN BOOLEAN alertable, IN PLARGE_INTEGER mstimeout);
#endif
KeyedEvent.cpp
#include "KeyedEvent.h"
NTSTATUS NTAPI MyNtCreateKeyedEvent( OUT PHANDLE handle, IN ACCESS_MASK access/*EVENT_ALL_ACCESS*/, IN POBJECT_ATTRIBUTES attr, IN ULONG flags )
{
HMODULE hNtDll = GetModuleHandle(_T("ntdll.dll"));
NTSTATUS ntStatus = STATUS_UNSUCCESSFUL;//STATUS_SUCCESS;
ft_NtCreateKeyedEvent fun = NULL;
if (hNtDll)
{
fun = (ft_NtCreateKeyedEvent)GetProcAddress(hNtDll, "NtCreateKeyedEvent");
if (fun)
{
ntStatus = fun(handle, access, attr, flags);
}
}
return ntStatus;
}
NTSTATUS NTAPI MyNtWaitForKeyedEvent( IN HANDLE handle, IN PVOID key, IN BOOLEAN alertable, IN PLARGE_INTEGER mstimeout )
{
HMODULE hNtDll = GetModuleHandle(_T("ntdll.dll"));
NTSTATUS ntStatus = STATUS_UNSUCCESSFUL;//STATUS_SUCCESS;
ft_NtWaitForKeyedEvent fun = NULL;
if (hNtDll)
{
fun = (ft_NtWaitForKeyedEvent)GetProcAddress(hNtDll, ("NtWaitForKeyedEvent"));
if (fun)
{
ntStatus = fun(handle, key, alertable, mstimeout);
}
}
return ntStatus;
}
NTSTATUS NTAPI MyNtReleaseKeyedEvent( IN HANDLE handle, IN PVOID key, IN BOOLEAN alertable, IN PLARGE_INTEGER mstimeout )
{
HMODULE hNtDll = GetModuleHandle(_T("ntdll.dll"));
NTSTATUS ntStatus = STATUS_UNSUCCESSFUL;//STATUS_SUCCESS;
ft_NtReleaseKeyedEvent fun = NULL;
if (hNtDll)
{
fun = (ft_NtReleaseKeyedEvent)GetProcAddress(hNtDll, ("NtReleaseKeyedEvent"));
if (fun)
{
ntStatus = fun(handle, key, alertable, mstimeout);
}
}
return ntStatus;
}