紧接上文: https://blog.csdn.net/qq_42693685/article/details/129469179?csdn_share_tail=%7B%22type%22%3A%22blog%22%2C%22rType%22%3A%22article%22%2C%22rId%22%3A%22129469179%22%2C%22source%22%3A%22qq_42693685%22%7D
mutex定义
struct mutex {
/* 1: unlocked, 0: locked, negative: locked, possible waiters */
atomic_t count;
spinlock_t wait_lock;
struct list_head wait_list;
#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
struct task_struct *owner;
#endif
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
struct optimistic_spin_queue osq; /* Spinner MCS lock */
#endif
};
count:原子计数,1表示没人持有锁;0表示锁被持有;负数表示锁被持有且有人在等待队列中等待。
wait_lock:spinlock锁,用于保护wait list睡眠等待队列。
wait_list:用于管理所有在该Mutex上睡眠的进程,没有成功获取锁的进程会睡眠在此链表上。
owner:要打开CONFIG_MUTEX_SPIN_ON_OWNER选项才会有owner,用于指向锁持有者的task_struct数据结构。
osq:用于实现MCS锁机制。
Mutex初始化:
#define __MUTEX_INITIALIZER(lockname) \
{ .count = ATOMIC_INIT(1) \
, .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
__DEBUG_MUTEX_INITIALIZER(lockname) \
__DEP_MAP_MUTEX_INITIALIZER(lockname) }
#define DEFINE_MUTEX(mutexname) \
struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
mutex_lock:
void __sched mutex_lock(struct mutex *lock)
{
might_sleep();
/*
* The locking fastpath is the 1->0 transition from
* 'unlocked' into 'locked' state.
*/
/* count从1->0说明之前没有人持有锁*/
__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
mutex_set_owner(lock);
}
static inline void
__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
/* count--如果小于0说明已经有人持有锁了,进入慢车道 */
if (unlikely(atomic_dec_return(count) < 0))
fail_fn(count);
}
进入慢车道__mutex_lock_slowpath->__mutex_lock_common
__visible void __sched
__mutex_lock_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0,
NULL, _RET_IP_, NULL, 0);
}
__mutex_lock_common:
mutex_can_spin_on_owner检测是否有必要自旋等待,即判断锁持有者是否正在临界区执行(那这种情况下锁等待者和锁只有者一定不再同一个cpu上执行)
osq_lock:如果可以自旋等待,那么先获取osq_lock,保证只有一个人自旋等待
mutex_spin_on_owner:接下来就是自旋等待
针对不能自旋等待的情况,将获取失败的加入等待队列wait_list,调用schedule_preempt_disabled主动让出cpu
static __always_inline int __sched
__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
struct task_struct *task = current;
struct mutex_waiter waiter;
unsigned long flags;
int ret;
preempt_disable();//禁用内核抢占
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
* Optimistic spinning.
*
* We try to spin for acquisition when we find that there are no
* pending waiters and the lock owner is currently running on a
* (different) CPU.
*
* The rationale is that if the lock owner is running, it is likely to
* release the lock soon.
*
* Since this needs the lock owner, and this mutex implementation
* doesn't track the owner atomically in the lock field, we need to
* track it non-atomically.
*
* We can't do this for DEBUG_MUTEXES because that relies on wait_lock
* to serialize everything.
*
* The mutex spinners are queued up using MCS lock so that only one
* spinner can compete for the mutex. However, if mutex spinning isn't
* going to happen, there is no point in going through the lock/unlock
* overhead.
*/
/* 感觉这个就是检查是否有必要进行自旋等待 */
if (!mutex_can_spin_on_owner(lock))
goto slowpath;
/*
获取一个osq锁来保护,OSQ锁是自旋锁的一种优化方案,为什么要申请MCS锁?
因为接下来要自旋等待该锁尽快释放,因此不希望有其他人参与进来一起自旋等待。
那按照这个意思说,只要有一个人申请了OSQ锁,其他人申请都会失败咯?
明天看看osq_lock是这样的吗??
osq_lock是保证了同一个cpu上只有一个人在自旋,
但是可以同时存在多个人在不同的cpu上自旋
osq_lock获取成功返回true,失败返回false
*/
if (!osq_lock(&lock->osq))
goto slowpath;
for (;;) {
struct task_struct *owner;
if (use_ww_ctx && ww_ctx->acquired > 0) {
struct ww_mutex *ww;
ww = container_of(lock, struct ww_mutex, base);
/*
* If ww->ctx is set the contents are undefined, only
* by acquiring wait_lock there is a guarantee that
* they are not invalid when reading.
*
* As such, when deadlock detection needs to be
* performed the optimistic spinning cannot be done.
*/
if (ACCESS_ONCE(ww->ctx))
break;
}
/*
* If there's an owner, wait for it to either
* release the lock or go to sleep.
*/
owner = ACCESS_ONCE(lock->owner);
if (owner && !mutex_spin_on_owner(lock, owner))
break;
/* count==1表示无人持有锁.如果没有人持有锁,那么我们去获取锁 */
if ((atomic_read(&lock->count) == 1) &&
(atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
lock_acquired(&lock->dep_map, ip);
if (use_ww_ctx) {
struct ww_mutex *ww;
ww = container_of(lock, struct ww_mutex, base);
ww_mutex_set_context_fastpath(ww, ww_ctx);
}
mutex_set_owner(lock);
osq_unlock(&lock->osq);
preempt_enable();
return 0;
}
/*
* When there's no owner, we might have preempted between the
* owner acquiring the lock and setting the owner field. If
* we're an RT task that will live-lock because we won't let
* the owner complete.
* 翻译:owner为空,可能是锁持有者在成功获取锁和设置owner之间的间隙
* 被抢占
*/
/*
如果当前进程需要被调度或者有实时进程需要调度,则退出
反之继续自旋等待
*/
if (!owner && (need_resched() || rt_task(task)))
break;
/*
* The cpu_relax() call is a compiler barrier which forces
* everything in this loop to be re-loaded. We don't need
* memory barriers as we'll eventually observe the right
* values at the cost of a few extra spins.
*/
arch_mutex_cpu_relax();
}
osq_unlock(&lock->osq);
slowpath:
/*
* If we fell out of the spin path because of need_resched(),
* reschedule now, before we try-lock the mutex. This avoids getting
* scheduled out right after we obtained the mutex.
*/
if (need_resched())
schedule_preempt_disabled();
#endif
/* 下面需要操作等待队列,先获取自旋锁进行保护 */
spin_lock_mutex(&lock->wait_lock, flags);
/* once more, can we acquire the lock? */
if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, 0) == 1))
goto skip_wait;
debug_mutex_lock_common(lock, &waiter);
debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
/* add waiting tasks to the end of the waitqueue (FIFO): */
list_add_tail(&waiter.list, &lock->wait_list);
waiter.task = task;//task执行当前进程
lock_contended(&lock->dep_map, ip);
for (;;) {
/*
* Lets try to take the lock again - this is needed even if
* we get here for the first time (shortly after failing to
* acquire the lock), to make sure that we get a wakeup once
* it's unlocked. Later on, if we sleep, this is the
* operation that gives us the lock. We xchg it to -1, so
* that when we release the lock, we properly wake up the
* other waiters:
*/
/* 每次循环都尝试获取一下锁 */
if (MUTEX_SHOW_NO_WAITER(lock) &&
(atomic_xchg(&lock->count, -1) == 1))
break;
/*
* got a signal? (This code gets eliminated in the
* TASK_UNINTERRUPTIBLE case.)
*/
if (unlikely(signal_pending_state(state, task))) {
ret = -EINTR;
goto err;
}
if (use_ww_ctx && ww_ctx->acquired > 0) {
ret = __mutex_lock_check_stamp(lock, ww_ctx);
if (ret)
goto err;
}
__set_task_state(task, state);
/* didn't get the lock, go to sleep: */
spin_unlock_mutex(&lock->wait_lock, flags);
/* 让出cpu */
schedule_preempt_disabled();
spin_lock_mutex(&lock->wait_lock, flags);
}
mutex_remove_waiter(lock, &waiter, current_thread_info());
/* set it to 0 if there are no waiters left: */
if (likely(list_empty(&lock->wait_list)))
atomic_set(&lock->count, 0);
debug_mutex_free_waiter(&waiter);
skip_wait:
/* got the lock - cleanup and rejoice! */
lock_acquired(&lock->dep_map, ip);
mutex_set_owner(lock);
if (use_ww_ctx) {
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct mutex_waiter *cur;
/*
* This branch gets optimized out for the common case,
* and is only important for ww_mutex_lock.
*/
ww_mutex_lock_acquired(ww, ww_ctx);
ww->ctx = ww_ctx;
/*
* Give any possible sleeping processes the chance to wake up,
* so they can recheck if they have to back off.
*/
list_for_each_entry(cur, &lock->wait_list, list) {
debug_mutex_wake_waiter(lock, cur);
wake_up_process(cur->task);
}
}
spin_unlock_mutex(&lock->wait_lock, flags);
preempt_enable();
return 0;
err:
mutex_remove_waiter(lock, &waiter, task_thread_info(task));
spin_unlock_mutex(&lock->wait_lock, flags);
debug_mutex_free_waiter(&waiter);
mutex_release(&lock->dep_map, 1, ip);
preempt_enable();
return ret;
}
mutex_can_spin_on_owner:是否有必要自旋等待
static inline int mutex_can_spin_on_owner(struct mutex *lock)
{
struct task_struct *owner;
int retval = 1;
if (need_resched())/* 表示当前进程需要需要被调度了 */
return 0;
rcu_read_lock();
owner = ACCESS_ONCE(lock->owner);
if (owner)
retval = owner->on_cpu;//on_cpu为1表示锁持有者正在临界区执行
rcu_read_unlock();
/*
* if lock->owner is not set, the mutex owner may have just acquired
* it and not set the owner yet or the mutex has been released.
*/
return retval;
}
static noinline
int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
{
rcu_read_lock();
/*
owner_running为false有两种情况:
1、lock->owner发生变化,不再指向锁持有者或者是锁持有者发生变化
2、锁持有者没有释放锁,但是在临界区进行了调度使得on_cpu为0
针对这两种情况,当前进程都应该退出自旋状态
另外有其他进程需要调度时,也应退出
*/
while (owner_running(lock, owner)) {
if (need_resched())
break;
arch_mutex_cpu_relax();
}
rcu_read_unlock();
/*
* We break out the loop above on need_resched() and when the
* owner changed, which is a sign for heavy contention. Return
* success only when lock->owner is NULL.
*/
/* 锁释放时owner会被置空 */
return lock->owner == NULL;
}
因为进入的时候禁用了内核抢占,修改了preempt_cnt。所以使用schedule_preempt_disabled保证调度的时候preempt_cnt为0,当被再次调度的时候,有能关闭内核抢占
void __sched schedule_preempt_disabled(void)
{
sched_preempt_enable_no_resched();
schedule();
preempt_disable();
}
#define sched_preempt_enable_no_resched() \
do { \
barrier(); \
preempt_count_dec(); \
} while (0)
mutex_unlock:
void __sched mutex_unlock(struct mutex *lock)
{
/*
* The unlocking fastpath is the 0->1 transition from 'locked'
* into 'unlocked' state:
*/
#ifndef CONFIG_DEBUG_MUTEXES
/*
* When debugging is enabled we must not clear the owner before time,
* the slow path will always be taken, and that clears the owner field
* after verifying that it was indeed current.
*/
mutex_clear_owner(lock);
#endif
__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
}
static inline void
__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
{
/*
快车道:增加count如果<=0说明等待的人很多, >0说明无人等待
因此慢车道肯定是去把人唤醒
*/
if (unlikely(atomic_inc_return(count) <= 0))
fail_fn(count);
}
可以看到解锁的操作很简单,只是将lock->count++,然后将等待队列中的头结点唤醒。
由于在mutex_lock成功获取到锁的时候,就会去释放osq_lock。因此这里也不需要释放osq
__visible void
__mutex_unlock_slowpath(atomic_t *lock_count)
{
__mutex_unlock_common_slowpath(lock_count, 1);
}
static inline void
__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
unsigned long flags;
/*
* some architectures leave the lock unlocked in the fastpath failure
* case, others need to leave it locked. In the later case we have to
* unlock it here
*/
if (__mutex_slowpath_needs_to_unlock())
atomic_set(&lock->count, 1);
spin_lock_mutex(&lock->wait_lock, flags);
mutex_release(&lock->dep_map, nested, _RET_IP_);
debug_mutex_unlock(lock);
/* 如果等待队列不为空,直接取出waiter,并唤醒第一个节点 */
if (!list_empty(&lock->wait_list)) {
/* get the first entry from the wait-list: */
struct mutex_waiter *waiter =
list_entry(lock->wait_list.next,
struct mutex_waiter, list);
debug_mutex_wake_waiter(lock, waiter);
wake_up_process(waiter->task);
}
spin_unlock_mutex(&lock->wait_lock, flags);
}
Mutex比信号量实现高效很多:
Mutex有自旋等待机制,始终保证一个cpu上都有一个人在原地自旋等待。可以减小进程切换的开销
Mutex在睡眠之前,即让出cpu之前。每次都会尝试先获取锁
MCS锁避免多个cpu竞争锁 ,导致cpu缓存颠簸。这个不明白
Mutex使用限制:
同一时刻,只有一个线程可以只有Mutex(互斥);
mutex会睡眠,因此只能在进程上下文中使用。
只有锁持有者可以解锁。即不能在一个进程中持有锁,在另一个进程中释放锁。自旋锁、信号量没有这个限制。
关于这条,我怎么感觉从代码层面来说并没有这个限制呢?需要锁使用者自己保证??
不允许递归加锁解锁
当进程持有Mutex时,进程不能退出。这个是不能进行切换的意思嘛??
我感觉代码层面,也并没有这个限制。mutext获取成功后,也将内核抢占打开了,直接使用schedule调度感觉不会出问题呢??只是这个让乐观自旋的人会等待很久。
另外我看书上也提 了on_cpu=0表示锁持有者在临界区休眠了。这样看来获取了mutex还是可以休眠的嘛