在上一篇文章中介绍了SQLite怎么用Linux中的记录锁来实现每一种类型的事务锁。但这只适合多进程间的互斥,不适合多线程,在Linux中每一个进程只能拥有一把锁,也就是说一个进程里的多个线程共用一把锁,这时会出现一个线程拥有共享锁,另一个线程再获取独占锁时并不会出现排斥,仅仅是把当前进程的锁改为独占锁,还有当一个线程占有锁的时候可能被另一个线程释放等等,这就破坏了数据库中多事务的隔离性。所以SQLite在进程内部自己维护着每个事务对文件锁的拥有情况。
1.inode
为了管理进程内部的锁,首先需要对每个文件做唯一标识,SQLite使用文件的inode作为唯一标识。要注意不能用文件描述符,因为Linux中存在一个文件是另一个文件的硬连接或软连接(可以理解成类似快捷方式的东西),它们虽然有不同文件名,打开的文件描述符不同,实际上对应着同一个文件。
每打开一个事务连接,SQLite会通过fstat()函数寻找对应的文件的inode,并关联到连接句柄中。如果是第一次打开文件,则把对应的inode插到全局链表inodeList中。
static int findInodeInfo(
unixFile *pFile, /* Unix file with file desc used in the key */
unixInodeInfo **ppInode /* Return the unixInodeInfo object here */
){
int rc; /* System call return code */
int fd; /* The file descriptor for pFile */
struct unixFileId fileId; /* Lookup key for the unixInodeInfo */
struct stat statbuf; /* Low-level file information */
unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */
assert( unixMutexHeld() );
/* Get low-level information about the file that we can used to
** create a unique name for the file.
*/
fd = pFile->h;
rc = osFstat(fd, &statbuf);
if( rc!=0 ){
storeLastErrno(pFile, errno);
return SQLITE_IOERR;
}
memset(&fileId, 0, sizeof(fileId));
fileId.dev = statbuf.st_dev;
#if OS_VXWORKS
fileId.pId = pFile->pId;
#else
fileId.ino = (u64)statbuf.st_ino;
#endif
pInode = inodeList;
while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){
pInode = pInode->pNext;
}
if( pInode==0 ){
pInode = sqlite3_malloc64( sizeof(*pInode) );
if( pInode==0 ){
return SQLITE_NOMEM_BKPT;
}
memset(pInode, 0, sizeof(*pInode));
memcpy(&pInode->fileId, &fileId, sizeof(fileId));
pInode->nRef = 1;
pInode->pNext = inodeList;
pInode->pPrev = 0;
if( inodeList ) inodeList->pPrev = pInode;
inodeList = pInode;
}else{
pInode->nRef++;
}
*ppInode = pInode;
return SQLITE_OK;
}
在SQLite中用unixInodeInfo对象来保存每个inode相关信息,每个文件只能有一个inode,这里记录了该文件在进程内锁的分配情况,每一个事务连接都有一个pInode指针来指向unixInodeInfo对象。
下图描述了一个进程内的3个事务打开同一个数据库文件,其中有2个线程,其中一个线程包含2个事务。
在unixFile对象里,h为每个连接的句柄,eFileLock为事务当前锁的类型。在unixInodeInfo对象里,nRef为文件对应的连接数,eFileLock为所有连接中等级最高的锁,nShared为当前锁的个数。
2.锁的实现
在sqlite中事务锁的接口为:
int sqlite3OsLock(sqlite3_file *id, int lockType)
int sqlite3OsUnlock(sqlite3_file *id, int lockType)
在Linux中对应的接口是:
static int unixLock(sqlite3_file *id, int eFileLock)
static int unixUnlock(sqlite3_file *id, int eFileLock)
下面就来讲解SQLite是如何在进程内部管理锁的分配,每个锁的对应状态的宏定义如下,数字越大等级越高
#define NO_LOCK 0
#define SHARED_LOCK 1
#define RESERVED_LOCK 2
#define PENDING_LOCK 3
#define EXCLUSIVE_LOCK 4
加锁时传入的参数为连接句柄和锁的类型,unixLock()实现了具体的流程,pFile->eFileLock表示当前连接的锁,eFileLock为当前连接需要获取的锁,pInode->eFileLock为文件在当前进程中的锁,以下是这几个变量的关系
if (pFile->eFileLock>=eFileLock)
return SQLITE_OK
else if(pInode->eFileLock >= PENDING_LOCK)
此时必定有其他线程申请了独占锁
return SQLITE_OK
else
switch (eFileLock)
case NO_LOCK:
此时必定有pFile->eFileLock>=eFileLock,所以不
可能出现这种情况。
case SHARED_LOCK:
此时pFile->eFileLock == NO_LOCK,pInode->
eFileLock <= RESERVED_LOCK,可以正常申请锁
case RESERVED_LOCK:
此时pFile->eFileLock == SHARED_LOCK,如果
pInode->eFileLock==RESERVED_LOCK将返回busy
case EXCLUSIVE_LOCK:
如果pInode->eFileLock==RESERVED_LOCK将返回
busy,不管pFile->eFileLock是什么
源代码如下:
static int unixLock(sqlite3_file *id, int eFileLock){
int rc = SQLITE_OK;
unixFile *pFile = (unixFile*)id;
unixInodeInfo *pInode;
struct flock lock;
int tErrno = 0;
assert( pFile );
// 判断1:eFileLock比当前锁的等级高才加锁
if( pFile->eFileLock>=eFileLock ){
return SQLITE_OK;
}
/* Make sure the locking sequence is correct.
** (1) We never move from unlocked to anything higher than shared lock.
** (2) SQLite never explicitly requests a pendig lock.
** (3) A shared lock is always held when a reserve lock is requested.
*/
//断言1:当pFile->eFileLock=NO_LOCK时,只能加共享锁
assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
//断言2:Pending锁是内部锁,不能由外部事务获取
assert( eFileLock!=PENDING_LOCK );
//断言3:只有在共享锁的基础上才能申请Reserved锁
assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
/* This mutex is needed because pFile->pInode is shared across threads
*/
unixEnterMutex();
//获取当前连接对应的inode
pInode = pFile->pInode;
/* If some thread using this PID has a lock via a different unixFile*
** handle that precludes the requested lock, return BUSY.
*/
//如果有其他连接拥有了Pending锁,返回busy
//由定义可知pInode->eFileLock >= pFile->eFileLock
//当eFileLock>SHARED_LOCK,由断言1和pFile->eFileLock!=pInode-> eFileLock知pInode->eFileLock> SHARED_LOCK
//当pFile->eFileLock==pInode->eFileLock,那么只能是NO_LOCK或SHARED_LOCK状态,此时不可能出现排斥状态
//等价于(pInode->eFileLock>=PENDING_LOCK)||((pFile->eFileLock == RESERVED_LOCK)&&(eFileLock>SHARED_LOCK))
if( (pFile->eFileLock!=pInode->eFileLock &&
(pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
){
rc = SQLITE_BUSY;
goto end_lock;
}
/* If a SHARED lock is requested, and some thread using this PID already
** has a SHARED or RESERVED lock, then increment reference counts and
** return SQLITE_OK.
*/
//获取的是共享锁,但是其他线程已经拥有共享锁或Reserved锁,此时无需对文件加锁,只需在pInode和pFile里记录信息即可
if( eFileLock==SHARED_LOCK &&
(pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){
assert( eFileLock==SHARED_LOCK );
assert( pFile->eFileLock==0 );
assert( pInode->nShared>0 );
pFile->eFileLock = SHARED_LOCK;
pInode->nShared++;
pInode->nLock++;
goto end_lock;
}
/* A PENDING lock is needed before acquiring a SHARED lock and before
** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
** be released.
*/
//如果进程是第一次获取共享锁,或者获取独占锁时都需要先获取Pending锁
lock.l_len = 1L;
lock.l_whence = SEEK_SET;
if( eFileLock==SHARED_LOCK
|| (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)
){
lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK);
lock.l_start = PENDING_BYTE;
//如果获取失败,说明已经有其他进程拥有Pending锁或独占锁
if( unixFileLock(pFile, &lock) ){
tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
if( rc!=SQLITE_BUSY ){
storeLastErrno(pFile, tErrno);
}
goto end_lock;
}
}
/* If control gets to this point, then actually go ahead and make
** operating system calls for the specified lock.
*/
if( eFileLock==SHARED_LOCK ){
assert( pInode->nShared==0 );
assert( pInode->eFileLock==0 );
assert( rc==SQLITE_OK );
/* Now get the read-lock */
lock.l_start = SHARED_FIRST;
lock.l_len = SHARED_SIZE;
//此时已经拥有Pending锁,SHARED_FIRST区域不可能有写锁
//所以理论上在SHARED_FIRST区域获取读锁并不会失败
if( unixFileLock(pFile, &lock) ){
tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
}
//获取到共享锁后释放Pending锁
/* Drop the temporary PENDING lock */
lock.l_start = PENDING_BYTE;
lock.l_len = 1L;
lock.l_type = F_UNLCK;
if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){
/* This could happen with a network mount */
tErrno = errno;
rc = SQLITE_IOERR_UNLOCK;
}
if( rc ){
if( rc!=SQLITE_BUSY ){
storeLastErrno(pFile, tErrno);
}
goto end_lock;
}else{
//修改锁的相关信息
pFile->eFileLock = SHARED_LOCK;
pInode->nLock++;
pInode->nShared = 1;
}
}else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){
/* We are trying for an exclusive lock but another thread in this
** same process is still holding a shared lock. */
// pInode->nShared>1说明还有其他线程拥有共享锁
rc = SQLITE_BUSY;
}else{
/* The request was for a RESERVED or EXCLUSIVE lock. It is
** assumed that there is a SHARED or greater lock on the file
** already.
*/
assert( 0!=pFile->eFileLock );
lock.l_type = F_WRLCK;
assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK );
if( eFileLock==RESERVED_LOCK ){
lock.l_start = RESERVED_BYTE;
lock.l_len = 1L;
}else{
lock.l_start = SHARED_FIRST;
lock.l_len = SHARED_SIZE;
}
//如果还有其他进程拥有Reserved以上等级的锁,将会获取失败
if( unixFileLock(pFile, &lock) ){
tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
if( rc!=SQLITE_BUSY ){
storeLastErrno(pFile, tErrno);
}
}
}
//根据判断1可知 pInode->eFileLock>=pFile->eFileLock
if( rc==SQLITE_OK ){
pFile->eFileLock = eFileLock;
pInode->eFileLock = eFileLock;
}else if( eFileLock==EXCLUSIVE_LOCK ){
//说明还有其他进程有锁,等待其释放
pFile->eFileLock = PENDING_LOCK;
pInode->eFileLock = PENDING_LOCK;
}
end_lock:
unixLeaveMutex();
return rc;
}
unixUnlock的实现就比较简单,在此简单说明一下:
if (pFile->eFileLock>=eFileLock)
此时已经有更低级的锁,无需任何操作
return SQLITE_OK
if(pFile->eFileLock > SHARED_LOCK)
此时根据锁的协议,不可能存在由独占锁降到Reserved锁
的情况,所以eFileLock为NO_LOCK或共享锁
if(eFileLock == SHARED_LOCK)
把锁页中的Shared byte区域设为读锁
清除Pending byte和Reserved byte区域的写锁
if(eFileLock == NO_LOCK)
pInode->nShared--
if(pInode->nShared == 0)
清除这个文件的所有锁