文章从《奔跑吧linux内核》中参考加上自己的理解
linux 读写信号量, 奔跑吧linux 内核的作者使用 十进制来解释 count 数值的变化,发现这样非常好理解。
rw_semaphore 我之前一直没有从代码理解如何实现的。只知道理论上是可以多个读者去读,如果有写者就不能去读。最近看这块代码代码实现上是在等待队列中如果有写者,把写者之前的读者唤醒,写者还是在等待队列中。如果等待队列中写者之前没有读者那就只唤醒写者,写者后面的还继续在等待队列中等待。
/*
* the semaphore definition
*/
#ifdef CONFIG_64BIT
# define RWSEM_ACTIVE_MASK 0xffffffffL
#else
# define RWSEM_ACTIVE_MASK 0x0000ffffL
#endif
#define RWSEM_UNLOCKED_VALUE 0x00000000L
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
这宏定义一十六进制应用,之前我也觉的太难理解,看到奔跑吧linux 用十进制描述感觉比较好理解。用32位理解了原理,换成64位的就只是个数字变化了。
RWSEM_ACTIVE_MASK 这个在32位系统和64 定义不相同。64位的0xffffffff转换成10 进制是一个非常大的数字了。
RWSEM_WAITING_BIAS 在64位是 0xFFFFFFFF00000000
RWSEM_ACTIVE_READ_BIAS 64位还是1
RWSEM_ACTIVE_WRITE_BIAS 是0xFFFFFFFF00000001
读者在获得锁时候会调用_don_read函数
/*
* lock for reading
*/
static inline void __down_read(struct rw_semaphore *sem)
{
if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0))
rwsem_down_read_failed(sem);
}
函数会对count 进行原子操作,进行加1。 在读写信号量中每当有读写拿锁就会count +1,如果是写着会减去RWSEM_ACTIVE_WRITE_BIAS。这样如果count +1 小于0 说明当前有写者持有锁,获得锁失败。只能进入rwsem_down_read_failed函数
/*
* Wait for the read lock to be granted
*/
static inline struct rw_semaphore __sched *
__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; //RWSEM_ACTIVE_READ_BIAS 是1
struct rwsem_waiter waiter;
DEFINE_WAKE_Q(wake_q);
waiter.task = current; //获取当前的task
waiter.type = RWSEM_WAITING_FOR_READ;
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list))//如果等待队列中是空的
adjustment += RWSEM_WAITING_BIAS; // RWSEM_WAITING_BIAS 表明当前在等待队
中有等待的任务 32位系统这里是-65536 (64位系统是0xFFFF FFFF 0000 0000)
list_add_tail(&waiter.list, &sem->wait_list); //将任务加入到等待队列中
/* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count); //之前在_down_read 函数中加1 了,这里需要减去
/*
* If there are no active locks, wake the front queued process(es).
*
* If there are no writers and we are first in the queue,
* wake our own waiter to join the existing active readers !
*/
if (count == RWSEM_WAITING_BIAS || //假设 等待队列为空,这里如果一个写者持有
-65535,然后读者尝试拿锁 +1 = -65534,前面代码 -65534 + (RWSEM_WAITING_BIAS - 1) ,然后写者释放锁在加上65535 等于RWSEM_WAITING_BIAS.
64位系统 写者持有锁是加上0xFFFF FFFF 0000 0001,然后读者尝试拿锁变成了-FFFFFFFF00000001 +1,
等待队列为空需要加上0xFFFFFFFF00000000,然后在-1 ,然后写者释放锁减去0xFFFFFFFF00000001,最后等于FFFFFFFF00000000)
(count > RWSEM_WAITING_BIAS && //如果等待队列中已经有多个读者等待,那么-65535 +n 最后是这个条件
adjustment != -RWSEM_ACTIVE_READ_BIAS))
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);/ 去唤醒等待队列中的线程
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q); //将找到的读者或者写者唤醒
/* wait to be given the lock */
while (true) {
set_current_state(state);
if (!waiter.task)
break;
if (signal_pending_state(state, current)) {
raw_spin_lock_irq(&sem->wait_lock);
if (waiter.task)
goto out_nolock;
raw_spin_unlock_irq(&sem->wait_lock);
break;
}
schedule(); //不断的触发调度,任务获得cpu,成功获得后会设置waiter.stask
为空,跳出while 循环
}
__set_current_state(TASK_RUNNING);
return sem;
out_nolock:
list_del(&waiter.list);
if (list_empty(&sem->wait_list))
atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
raw_spin_unlock_irq(&sem->wait_lock);
__set_current_state(TASK_RUNNING);
return ERR_PTR(-EINTR);
}
在调用__rwsem_mark_wake 函数的时候传递了参数 RWSEM_WAKE_ANY 这是一个枚举值,
enum rwsem_wake_type {
RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
RWSEM_WAKE_READERS, /* Wake readers only */
RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
};
前两个很好理解,第三个没理解其含义
static void __rwsem_mark_wake(struct rw_semaphore *sem,
enum rwsem_wake_type wake_type,
struct wake_q_head *wake_q)
{
struct rwsem_waiter *waiter, *tmp;
long oldcount, woken = 0, adjustment = 0;
struct list_head wlist;
/*
* Take a peek at the queue head waiter such that we can determine
* the wakeup(s) to perform.
*/
waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);//从等待队列中找到第一个
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wake_type == RWSEM_WAKE_ANY) {
/*
* Mark writer at the front of the queue for wakeup.
* Until the task is actually later awoken later by
* the caller, other writers are able to steal it.
* Readers, on the other hand, will block as they
* will notice the queued writer.
*/
//如果在等待队列的第一个是写者,去唤醒写者,读者不唤醒。
wake_q_add(wake_q, waiter->task);//把任务放到一个队列中,后面使用wake_up_q 函数将任务都挨个唤醒
}
return;
}
/*
* Writers might steal the lock before we grant it to the next reader.
* We prefer to do the first reader grant before counting readers
* so we can bail out early if a writer stole the lock.
*/
if (wake_type != RWSEM_WAKE_READ_OWNED) {
adjustment = RWSEM_ACTIVE_READ_BIAS;
try_reader_grant:
//这里有个投偷锁的概念,为了检测是否已经有写者拿到了锁,这里先假装是个读者去读一下
oldcount = atomic_long_fetch_add(adjustment, &sem->count);//返回值是旧
的count,新的count 已经被加1
if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {// RWSEM_WAITING_BIAS
代表当前等待队列中有等待的任务,由于 RWSEM_WAITING_BIAS 和 RWSEM_ACTIVE_WRITE_BIAS 都是负值,所以当有写者持有锁的时候, 肯定是oldcount 小于RWSEM_WAITING_BIAS 的
/*
* If the count is still less than RWSEM_WAITING_BIAS
* after removing the adjustment, it is assumed that
* a writer has stolen the lock. We have to undo our
* reader grant.
*/
if (atomic_long_add_return(-adjustment, &sem->count) <
RWSEM_WAITING_BIAS) //因为已经有写者持有锁了,所以读者不能唤醒,刚才尝试读+1 这里要减去。然后返回。
return;
/* Last active locker left. Retry waking readers. */
goto try_reader_grant;
}
/*
* It is not really necessary to set it to reader-owned here,
* but it gives the spinners an early indication that the
* readers now have the lock.
*/ //如果没有写者来偷锁,就需要唤醒在等待队列中写者之前的读者
rwsem_set_reader_owned(sem); //读者持有锁的时候并不是将owner 只想读者的task,而是设置位1。 这里只是设置了个1.所以一旦发生死锁,读者持有锁,没办法直接找到owner
}
/*
* Grant an infinite number of read locks to the readers at the front
* of the queue. We know that woken will be at least 1 as we accounted
* for above. Note we increment the 'active part' of the count by the
* number of readers before waking any processes up.
*
* We have to do wakeup in 2 passes to prevent the possibility that
* the reader count may be decremented before it is incremented. It
* is because the to-be-woken waiter may not have slept yet. So it
* may see waiter->task got cleared, finish its critical section and
* do an unlock before the reader count increment.
*
* 1) Collect the read-waiters in a separate list, count them and
* fully increment the reader count in rwsem.
* 2) For each waiters in the new list, clear waiter->task and
* put them into wake_q to be woken up later.
*/
// 从等待队列中查找,找等待队列中写者之前的读者。统计下来
list_for_each_entry(waiter, &sem->wait_list, list) {
if (waiter->type == RWSEM_WAITING_FOR_WRITE)
break;
woken++;
}
list_cut_before(&wlist, &sem->wait_list, &waiter->list);
//从队列中删除
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
if (list_empty(&sem->wait_list)) {
/* hit end of list above */
adjustment -= RWSEM_WAITING_BIAS;
//如果对列中没有写者,队列的读者都被唤醒,此时队列已经为空,要减去RWSEM_WAITING_BIAS 这个标志,来标致队列是空的
}
if (adjustment)//有多少个读者来时候这个sem ,前面已经计算出来,这里和count相加
atomic_long_add(adjustment, &sem->count);
/* 2nd pass */
list_for_each_entry_safe(waiter, tmp, &wlist, list) {
struct task_struct *tsk;
tsk = waiter->task;
get_task_struct(tsk);
/*
* Ensure calling get_task_struct() before setting the reader
* waiter to nil such that rwsem_down_read_failed() cannot
* race with do_exit() by always holding a reference count
* to the task to wakeup.
*/
smp_store_release(&waiter->task, NULL);
/*
* Ensure issuing the wakeup (either by us or someone else)
* after setting the reader waiter to nil.
*/
wake_q_add(wake_q, tsk); //把找到的任务加到一个队列中
/* wake_q_add() already take the task ref */
put_task_struct(tsk);
}
}
读者释放锁
static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_dec_return_release(&sem->count);
if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
rwsem_wake(sem);
}
读者释放锁的时候需要在减去1.在 64位系统中RWSEM_ACTIVE_MASK是ffffffff ,所以tmp & RWSEM_ACTIVE_MASK 是0,tmp就的等于0 或者 大于0xfffffffff。 也就是说count 现在必须有等待读者。tmp & RWSEM_ACTIVE_MASK 就是为了判断有没有加RWSEM_ACTIVE_MASK。加了就有等待的任务。有任务然后唤醒,调用rwsem_wake函数。rwsem_wake函数中有内存屏障,rwsem_wake 也是调用__rwsem_mark_wake 唤醒任务。
写者获取信号量
static inline void __down_write(struct rw_semaphore *sem)
{
long tmp;
tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
&sem->count);
if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
rwsem_down_write_failed(sem);
}
当前count 加上RWSEM_ACTIVE_WRITE_BIAS 如果tmp 不等于RWSEM_ACTIVE_WRITE_BIAS 说明已经有读者或者写者拿到这个锁了,所以获取锁失败。
调用rwsem_down_write_failed 函数
static inline struct rw_semaphore *
__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
{
long count;
bool waiting = true; /* any queued threads before us */
struct rwsem_waiter waiter;
struct rw_semaphore *ret = sem;
DEFINE_WAKE_Q(wake_q);
bool is_first_waiter = false;
/* undo write bias from down_write operation, stop active locking */
count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count); //之前尝试获取锁的加上去的,
这里需要在减去。
/* do optimistic spinning and steal lock if possible */
if (rwsem_optimistic_spin(sem)) //这里尝试使用自旋锁一直等待锁释放,
这里会偷锁。前面读者防止偷锁和这里在自旋锁种偷锁有关系,这里并不挂到等待队列种
return sem;
/*
* Optimistic spinning failed, proceed to the slowpath
* and block until we can acquire the sem.
*/
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_WRITE;
raw_spin_lock_irq(&sem->wait_lock);
/* account for this before adding a new element to the list */
if (list_empty(&sem->wait_list))
waiting = false;
/*
* is_first_waiter == true means we are first in the queue,
* so there is no read locks that were queued ahead of us.
*/
is_first_waiter = rwsem_list_add_per_prio(&waiter, sem);
//在rwsem_list_add_per_prio 函数中,等待队列如果不为空,优先级小于默认优先级120,会先
写者按照优先级顺序放在队列中(返回true)。如果优先级大于等于120,就直接放入到队列尾部(返回false)。如果队列是空的返回也是(true)
/* we're now waiting on the lock, but no longer actively locking */
if (waiting) { ///队列不是空会进来
count = atomic_long_read(&sem->count);
/*
* If there were already threads queued before us and there are
* no active writers, the lock must be read owned; so we try to
* wake any read locks that were queued ahead of us.
*/
if (!is_first_waiter && count > RWSEM_WAITING_BIAS) {//写者直接放在等待队列的尾部
,并且等待队列中有读者成员,只唤醒读者。
__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
/*
* The wakeup is normally called _after_ the wait_lock
* is released, but given that we are proactively waking
* readers we can deal with the wake_q overhead as it is
* similar to releasing and taking the wait_lock again
* for attempting rwsem_try_write_lock().
*/
wake_up_q(&wake_q);
/*
* Reinitialize wake_q after use.
*/
wake_q_init(&wake_q);
}
} else
count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
/* wait until we successfully acquire the lock */
set_current_state(state);
while (true) {
if (rwsem_try_write_lock(count, sem))
break;
raw_spin_unlock_irq(&sem->wait_lock);
/* Block until there are no active lockers. */
do {
if (signal_pending_state(state, current))
goto out_nolock;
schedule();
set_current_state(state);
} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
raw_spin_lock_irq(&sem->wait_lock);
}
__set_current_state(TASK_RUNNING);
list_del(&waiter.list);
raw_spin_unlock_irq(&sem->wait_lock);
return ret;
out_nolock:
__set_current_state(TASK_RUNNING);
raw_spin_lock_irq(&sem->wait_lock);
list_del(&waiter.list);
if (list_empty(&sem->wait_list))
atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
else
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
return ERR_PTR(-EINTR);
}
尝试自旋锁
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
{
bool taken = false;
preempt_disable();//关闭了抢占
/* sem->wait_lock should not be held when doing optimistic spinning */
if (!rwsem_can_spin_on_owner(sem)) //查看owner 是否已经被设置了,判断是否owner
在cpu上运行,有写者获取到锁并且有在cpu上运行 自旋锁才有必要。
goto done;
if (!osq_lock(&sem->osq))
goto done;
/*
* Optimistically spin on the owner field and attempt to acquire the
* lock whenever the owner changes. Spinning will be stopped when:
* 1) the owning writer isn't running; or
* 2) readers own the lock as we can't determine if they are
* actively running or not.
*/
while (rwsem_spin_on_owner(sem)) {
/*
* Try to acquire the lock
*/
if (rwsem_try_write_lock_unqueued(sem)) {
taken = true;
break;
}
/*
* When there's no owner, we might have preempted between the
* owner acquiring the lock and setting the owner field. If
* we're an RT task that will live-lock because we won't let
* the owner complete.
*/
if (!sem->owner && (need_resched() || rt_task(current)))
break;
/*
* The cpu_relax() call is a compiler barrier which forces
* everything in this loop to be re-loaded. We don't need
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
cpu_relax();
}
osq_unlock(&sem->osq);
done:
preempt_enable();
return taken;
}