内核版本:linux-3.10
一、内核定义
tasklist_lock为例:
初始化如下,初始化的时候会将raw_lock置为:0x000000100000000
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock)
#define __RW_LOCK_UNLOCKED(lockname) \
(rwlock_t) { .raw_lock = __ARCH_RW_LOCK_UNLOCKED, \
RW_DEP_MAP_INIT(lockname) }
#endif
读写锁定义:其中低32位是unsigned,高32位是signed
typedef union {
s64 lock;
struct {
u32 read;
s32 write;
};
} arch_rwlock_t;
define RW_LOCK_BIAS (_AC(1,L) << 32)
#define READ_LOCK_SIZE(insn) __ASM_FORM(insn##q)
#define READ_LOCK_ATOMIC(n) atomic64_##n
#define WRITE_LOCK_ADD(n) __ASM_FORM(incl)
#define WRITE_LOCK_SUB(n) __ASM_FORM(decl)
#define WRITE_LOCK_CMP 1
#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
比如:
rash> p tasklist_lock -x
tasklist_lock = $2 = {
raw_lock = {
lock = 0xffffffe500000000,
{
read = 0x0,
write = 0xffffffe5
}
}
}
二、原理解读
1、write try lock
判断是否write unlock,即是否等于WRITE_LOCK_CMP
static inline int arch_write_can_lock(arch_rwlock_t *lock)
{
return lock->write == WRITE_LOCK_CMP;
}
判断write减去WRITE_LOCK_CMP后是否是0,若是0表示无人持锁
static inline int arch_write_trylock(arch_rwlock_t *lock)
{
atomic_t *count = (atomic_t *)&lock->write;
if (atomic_sub_and_test(WRITE_LOCK_CMP, count))
return 1;
atomic_add(WRITE_LOCK_CMP, count);
return 0;
}
2、read try lock
判断是否大于0,若是表示无人持读锁
/**
* read_can_lock - would read_trylock() succeed?
* @lock: the rwlock in question.
*/
static inline int arch_read_can_lock(arch_rwlock_t *lock)
{
return lock->lock > 0;
}
判断read减去1后是否大于等于0,若是表示可可能有读者持锁,不是write持锁
static inline int arch_read_trylock(arch_rwlock_t *lock)
{
READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock;
if (READ_LOCK_ATOMIC(dec_return)(count) >= 0)
return 1;
READ_LOCK_ATOMIC(inc)(count);
return 0;
}
3、wirte lock
raw_lock的高s32位 减1之后,若等于0,跳转到1:拿写锁成功,并raw_lock.write = 0。
static inline void arch_write_lock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t"
"jz 1f\n"
"call __write_lock_failed\n\t"
"1:\n"
::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS)
: "memory");
}
如果不为0,拿锁失败,就会跳转到__write_lock_failed。这里会去循环判断%rdi(从_raw_write_lock_irq反汇编此时rdi已经是raw_lock的write)是否等于WRITE_LOCK_CMP,如果不等于就是还没有释放写锁,然后跳转到1:
# define __lock_ptr rdi
ENTRY(__write_lock_failed)
CFI_STARTPROC
FRAME
0: LOCK_PREFIX
WRITE_LOCK_ADD($RW_LOCK_BIAS) (%__lock_ptr)
1: rep; nop
cmpl $WRITE_LOCK_CMP, (%__lock_ptr)
jne 1b
LOCK_PREFIX
WRITE_LOCK_SUB($RW_LOCK_BIAS) (%__lock_ptr)
jnz 0b
ENDFRAME
ret
CFI_ENDPROC
END(__write_lock_failed)
crash> dis _raw_write_lock_irq
0xffffffff81642400 <_raw_write_lock_irq>: nopl 0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffff81642405 <_raw_write_lock_irq+5>: push %rbp
0xffffffff81642406 <_raw_write_lock_irq+6>: mov %rsp,%rbp
0xffffffff81642409 <_raw_write_lock_irq+9>: cli
0xffffffff8164240a <_raw_write_lock_irq+10>: nopw 0x0(%rax,%rax,1)
0xffffffff81642410 <_raw_write_lock_irq+16>: add $0x4,%rdi
0xffffffff81642414 <_raw_write_lock_irq+20>: lock decl (%rdi)
0xffffffff81642417 <_raw_write_lock_irq+23>: je 0xffffffff8164241e <_raw_write_lock_irq+30>
0xffffffff81642419 <_raw_write_lock_irq+25>: callq 0xffffffff81307ff0 <__write_lock_failed>
0xffffffff8164241e <_raw_write_lock_irq+30>: pop %rbp
0xffffffff8164241f <_raw_write_lock_irq+31>: retq
write释放锁的时候会将write+1,因为持锁的时候是0,所以释锁此时会等于RW_LOCK_BIAS,即raw_lock.write = WRITE_LOCK_CMP
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0"
: "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory");
}
4、read lock
raw_lock减一之后,若大于等于0,SF=0,是正数,即跳转到1:拿读锁成功,若小于0说明是写锁被拿了,跳转到__read_lock_failed
static inline void arch_read_lock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t"
"jns 1f\n"
"call __read_lock_failed\n\t"
"1:\n"
::LOCK_PTR_REG (rw) : "memory");
}
类似的,循环比较1和rdi,如果rdi为0,说明现在有人持有写锁,SF=0,就会跳转到1:继续循环等待
ENTRY(__read_lock_failed)
CFI_STARTPROC
FRAME
0: LOCK_PREFIX
READ_LOCK_SIZE(inc) (%__lock_ptr)
1: rep; nop
READ_LOCK_SIZE(cmp) $1, (%__lock_ptr)
js 1b
LOCK_PREFIX
READ_LOCK_SIZE(dec) (%__lock_ptr)
js 0b
ENDFRAME
ret
CFI_ENDPROC
END(__read_lock_failed)