前面看操作系统导论这本书,其中锁的历史变化有点感悟,现在追一下linux内核锁的代码。
一、自旋锁
1.锁的结构体
系统自旋锁结构体如下:
typedef struct spinlock {
union {
struct raw_spinlock rlock;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
struct {
u8 __padding[LOCK_PADSIZE];
struct lockdep_map dep_map;
};
#endif
};
} spinlock_t;
我们可以看到struct spinlock起始就是一个struct raw_spinlock:
typedef struct raw_spinlock {
arch_spinlock_t raw_lock;
#ifdef CONFIG_DEBUG_SPINLOCK
unsigned int magic, owner_cpu;
void *owner;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
} raw_spinlock_t;
继续看可以发现struct raw_spinlock其实也就是arch_spinlock_t ,这就是具体的架构具体实现了。但是很多架构,比如x86,arm64都没有实现,而是使用通用的qspinlock :
typedef struct qspinlock {
union {
atomic_t val;
/*
* By using the whole 2nd least significant byte for the
* pending bit, we can allow better optimization of the lock
* acquisition for the pending bit holder.
*/
#ifdef __LITTLE_ENDIAN
struct {
u8 locked;
u8 pending;
};
struct {
u16 locked_pending;
u16 tail;
};
#else
struct {
u16 tail;
u16 locked_pending;
};
struct {
u8 reserved[2];
u8 pending;
u8 locked;
};
#endif
};
} arch_spinlock_t;
struct qspinlock是一个联合体,所以其的本质仍然是一个整数。只是根据大小端的cpu做出不同的内存布局。因为内存是共享的,所以lock->val取出的值是locked + pending 或者 tail + locked_pending。实际上锁和解锁就是上面说的根据cpu的硬件支持,来对这结构体进行指令级别的操作了。
2.锁的初始化
我们是使用spin_lock_init函数来初始化一个自旋锁的:
#define spin_lock_init(_lock) \
do { \
//检查锁的是否存在
spinlock_check(_lock); \
//初始化自旋锁
raw_spin_lock_init(&(_lock)->rlock); \
} while (0)
spinlock_check主要是判断锁是否存在,也就是是否申请了锁的内存:
static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
{
return &lock->rlock;
}
而raw_spin_lock_init才是真正的初始化锁:
# define raw_spin_lock_init(lock) \
do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
(raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
{ \
.raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
//SPIN_DEBUG_INIT是初始化自旋锁debug需要的成员
SPIN_DEBUG_INIT(lockname) \
SPIN_DEP_MAP_INIT(lockname) }
SPIN_DEBUG_INIT是初始化自旋锁debug需要的成员,没有开启debug功能则是空函数,我们不必理会:
#ifdef CONFIG_DEBUG_SPINLOCK
# define SPIN_DEBUG_INIT(lockname) \
.magic = SPINLOCK_MAGIC, \
.owner_cpu = -1, \
.owner = SPINLOCK_OWNER_INIT,
#else
# define SPIN_DEBUG_INIT(lockname)
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
#else
# define SPIN_DEP_MAP_INIT(lockname)
#endif
总结:spin_lock_init最主要做的是判断是否申请了锁的需要的内存,后面的都是debug才需要的初始化操作,没有开启自旋锁的debug则都是空操作。
3.加锁操作
一般加锁操作有以下几种:
- spin_lock(lock)//加锁操作,加锁成功后返回,否则一直忙等
- spin_lock_irqsave(lock, flags) //加锁操作,并且关闭硬中断
- spin_lock_bh(lock) //加锁操作,并且关闭软中断
先看spin_lock,他最终调用的是__raw_spin_lock函数,
static __always_inline void spin_lock(spinlock_t *lock)
{
raw_spin_lock(&lock->rlock);
}
#define raw_spin_lock(lock) _raw_spin_lock(lock)
#define _raw_spin_lock(lock) __raw_spin_lock(lock)
static inline void __raw_spin_lock(raw_spinlock_t *lock)
{
preempt_disable();//关抢占
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);//检查锁的有效性,一般是空操作
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);//参与锁竞争的函数
}
spin_lock调用了__raw_spin_lock函数,在函数中首先关闭抢占,然后检查锁的有效性,最后,也是最重要的一步,调用LOCK_CONTENDED参数锁的竞争:
#define LOCK_CONTENDED(_lock, try, lock) \
do { \
if (!try(_lock)) { \
lock_contended(&(_lock)->dep_map, _RET_IP_); \
lock(_lock); \
} \
lock_acquired(&(_lock)->dep_map, _RET_IP_); \
} while (0)
LOCK_CONTENDED这个宏首先尝试上锁,上锁成功则退出,上锁失败则使用lock_contended去竞争锁,竞争到锁后使用lock,lock实际是do_raw_spin_lock上锁,lock_acquired函数也是检查锁的有效性,我追了一下,发现还是空操作,就不再拿出来了,下面主要看看lock_contended是如何竞争锁的:
void lock_contended(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
if (unlikely(!lock_stat || !debug_locks))
return;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);//保存irq_flags并且关中断
check_flags(flags);
current->lockdep_recursion = 1;//记录锁的状态,防止锁递归
trace_lock_contended(lock, ip);
__lock_contended(lock, ip);//真正进行锁竞争的函数
current->lockdep_recursion = 0;//恢复锁的状态
raw_local_irq_restore(flags);//恢复irq_flags并且开启中断
}
EXPORT_SYMBOL_GPL(lock_contended);
lock_contended函数首先保存irq_flags并且关中断,然后通过__lock_contended函数真正进行锁竞争,最后通过raw_local_irq_restore函数恢复irq_flags并且开启中断,我们重点看__lock_contended:
static void
__lock_contended(struct lockdep_map *lock, unsigned long ip)
{
struct task_struct *curr = current;
struct held_lock *hlock;
struct lock_class_stats *stats;
unsigned int depth;
int i, contention_point, contending_point;
depth = curr->lockdep_depth;//记录锁的深度
/*
* Whee, we contended on this lock, except it seems we're not
* actually trying to acquire anything much at all..
*/
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
hlock = find_held_lock(curr, lock, depth, &i);
if (!hlock) {
print_lock_contention_bug(curr, lock, ip);
return;
}
if (hlock->instance != lock)
return;
hlock->waittime_stamp = lockstat_clock();
contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
contending_point = lock_point(hlock_class(hlock)->contending_point,
lock->ip);
stats = get_lock_stats(hlock_class(hlock));
if (contention_point < LOCKSTAT_POINTS)
stats->contention_point[contention_point]++;
if (contending_point < LOCKSTAT_POINTS)
stats->contending_point[contending_point]++;
if (lock->cpu != smp_processor_id())
stats->bounces[bounce_contended + !!hlock->read]++;
}
__lock_contended到这里,后面的看不太懂,不过我们可以继续看lock函数,也就是do_raw_spin_lock:
void do_raw_spin_lock(raw_spinlock_t *lock)
{
debug_spin_lock_before(lock);//输出lock的信息
arch_spin_lock(&lock->raw_lock);//加锁操作
debug_spin_lock_after(lock);//输出lock的信息
}
#define arch_spin_lock(l) queued_spin_lock(l)
static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
u32 val;
val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);//如果锁没有被占用,则直接加锁
if (likely(val == 0))
return;
queued_spin_lock_slowpath(lock, val);//如果锁被占用了,慢速排队加锁
}
atomic_cmpxchg_acquire函数:
val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);//如果锁没有被占用,则直接加锁
#define atomic_cmpxchg_acquire(v, old, new) \
cmpxchg_acquire(&((v)->counter), (old), (new))
#define cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__)
#define __cmpxchg_wrapper(sfx, ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__cmpxchg##sfx((ptr), (unsigned long)(o), \
(unsigned long)(n), sizeof(*(ptr))); \
__ret; \
})
atomic_cmpxchg_acquire作用是比较lock->val与0,相同则设置lock->val = _Q_LOCKED_VAL,并且返回val的旧值,不相同则直接返回val。lock->val是联合体的所有成员的结合,这里判断如果三个部分都为0,即无人持锁,无人持有pending,无人在等待队列,那么直接获取锁。也就是比较lock->val与0,结果相同,然后设置val的locked为1。
不相同则会走queued_spin_lock_slowpath函数:
/**
* queued_spin_lock_slowpath - acquire the queued spinlock
* @lock: Pointer to queued spinlock structure
* @val: Current value of the queued spinlock 32-bit word
*
* (queue tail, pending bit, lock value)
*
* fast : slow : unlock
* : :
* uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0)
* : | ^--------.------. / :
* : v \ \ | :
* pending : (0,1,1) +--> (0,1,0) \ | :
* : | ^--' | | :
* : v | | :
* uncontended : (n,x,y) +--> (n,0,0) --' | :
* queue : | ^--' | :
* : v | :
* contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' :
* queue : ^--' :
*/
void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
struct mcs_spinlock *prev, *next, *node;
u32 old, tail;
int idx;
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
if (pv_enabled())
goto pv_queue;
if (virt_spin_lock(lock))
return;
/*
* Wait for in-progress pending->locked hand-overs with a bounded
* number of spins so that we guarantee forward progress.
*
* 0,1,0 -> 0,0,1
*/
//如果快速加锁路劲返回的值表示目前已经在pending状态,则再次读取val值,
if (val == _Q_PENDING_VAL) {
int cnt = _Q_PENDING_LOOPS;
val = atomic_cond_read_relaxed(&lock->val,
(VAL != _Q_PENDING_VAL) || !cnt--);
}
/*
* If we observe any contention; queue.
*/
//根据刚刚读取到的val值,看看锁是否还在占用,如果还在占用,我们就去排队,如果没有占用了,我们就往下走,看看是否可以上锁
if (val & ~_Q_LOCKED_MASK)
goto queue;
/*
* trylock || pending
*
* 0,0,0 -> 0,0,1 ; trylock
* 0,0,1 -> 0,1,1 ; pending
*/
//我们这里尝试加锁,返回值有两种状态,一是上锁成功,二是上锁失败,进入pending状态
val = queued_fetch_set_pending_acquire(lock);
/*
* If we observe any contention; undo and queue.
*/
//如果上锁失败,我们就清除pending状态,就去排队
if (unlikely(val & ~_Q_LOCKED_MASK)) {
if (!(val & _Q_PENDING_MASK))
clear_pending(lock);
goto queue;
}
/*
* We're pending, wait for the owner to go away.
*
* 0,1,1 -> 0,1,0
*
* this wait loop must be a load-acquire such that we match the
* store-release that clears the locked bit and create lock
* sequentiality; this is because not all
* clear_pending_set_locked() implementations imply full
* barriers.
*/
//走到这里,说明我们竞争锁成功了,我们读取val看看。
if (val & _Q_LOCKED_MASK)
atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK));
/*
* take ownership and clear the pending bit.
*
* 0,1,0 -> 0,0,1
*/
clear_pending_set_locked(lock);//我们清除pending 位,设置val为1,表示上锁了。如果有debug,还要设置ownership
qstat_inc(qstat_lock_pending, true);//队列锁才用的东西,我们没有使用队列锁,所以这里是空操作
return;
/*
* End of pending bit optimistic spinning and beginning of MCS
* queuing.
*/
queue:
qstat_inc(qstat_lock_slowpath, true);//队列锁才用的东西,我们没有使用队列锁,所以这里是空操作
pv_queue:
node = this_cpu_ptr(&mcs_nodes[0]);
idx = node->count++;
tail = encode_tail(smp_processor_id(), idx);
node += idx;
/*
* Ensure that we increment the head node->count before initialising
* the actual node. If the compiler is kind enough to reorder these
* stores, then an IRQ could overwrite our assignments.
*/
barrier();//内存屏障
node->locked = 0;
node->next = NULL;
pv_init_node(node);//pv锁才用的东西,这里是空操作
/*
* We touched a (possibly) cold cacheline in the per-cpu queue node;
* attempt the trylock once more in the hope someone let go while we
* weren't watching.
*/
if (queued_spin_trylock(lock))//尝试加锁
goto release;
/*
* Ensure that the initialisation of @node is complete before we
* publish the updated tail via xchg_tail() and potentially link
* @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
*/
smp_wmb();//内存写屏障
/*
* Publish the updated tail.
* We have already touched the queueing cacheline; don't bother with
* pending stuff.
*
* p,*,* -> n,*,*
*/
old = xchg_tail(lock, tail);//更新tail信息
next = NULL;
/*
* if there was a previous node; link it and wait until reaching the
* head of the waitqueue.
*/
if (old & _Q_TAIL_MASK) {
prev = decode_tail(old);
/* Link @node into the waitqueue. */
WRITE_ONCE(prev->next, node);
pv_wait_node(node, prev);
arch_mcs_spin_lock_contended(&node->locked);
/*
* While waiting for the MCS lock, the next pointer may have
* been set by another lock waiter. We optimistically load
* the next pointer & prefetch the cacheline for writing
* to reduce latency in the upcoming MCS unlock operation.
*/
next = READ_ONCE(node->next);
if (next)
prefetchw(next);
}
/*
* we're at the head of the waitqueue, wait for the owner & pending to
* go away.
*
* *,x,y -> *,0,0
*
* this wait loop must use a load-acquire such that we match the
* store-release that clears the locked bit and create lock
* sequentiality; this is because the set_locked() function below
* does not imply a full barrier.
*
* The PV pv_wait_head_or_lock function, if active, will acquire
* the lock and return a non-zero value. So we have to skip the
* atomic_cond_read_acquire() call. As the next PV queue head hasn't
* been designated yet, there is no way for the locked value to become
* _Q_SLOW_VAL. So both the set_locked() and the
* atomic_cmpxchg_relaxed() calls will be safe.
*
* If PV isn't active, 0 will be returned instead.
*
*/
if ((val = pv_wait_head_or_lock(lock, node)))
goto locked;
val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK));
locked:
/*
* claim the lock:
*
* n,0,0 -> 0,0,1 : lock, uncontended
* *,*,0 -> *,*,1 : lock, contended
*
* If the queue head is the only one in the queue (lock value == tail)
* and nobody is pending, clear the tail code and grab the lock.
* Otherwise, we only need to grab the lock.
*/
/*
* In the PV case we might already have _Q_LOCKED_VAL set.
*
* The atomic_cond_read_acquire() call above has provided the
* necessary acquire semantics required for locking.
*/
if (((val & _Q_TAIL_MASK) == tail) &&
atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
goto release; /* No contention */
/* Either somebody is queued behind us or _Q_PENDING_VAL is set */
set_locked(lock);
/*
* contended path; wait for next if not observed yet, release.
*/
if (!next)
next = smp_cond_load_relaxed(&node->next, (VAL));
arch_mcs_spin_unlock_contended(&next->locked);
pv_kick_node(lock, next);
release:
/*
* release the node
*/
__this_cpu_dec(mcs_nodes[0].count);
}
EXPORT_SYMBOL(queued_spin_lock_slowpath);
看queued_spin_lock_slowpath函数前面的注释,我们知道锁已经被占用了,val已经不是0了,而是变为1。具体详情请看上面代码的注释,这里不多加说明。
最后我们看看spin_lock_irqsave和spin_lock的区别吧:
#define spin_lock_irqsave(lock, flags) \
do { \
raw_spin_lock_irqsave(spinlock_check(lock), flags); \
} while (0)
#define raw_spin_lock_irqsave(lock, flags) \
do { \
typecheck(unsigned long, flags); \
flags = _raw_spin_lock_irqsave(lock); \
} while (0)
#define _raw_spin_lock_irqsave(lock) __raw_spin_lock_irqsave(lock)
static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
{
unsigned long flags;
local_irq_save(flags);//保存irq flags
preempt_disable();
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
/*
* On lockdep we dont want the hand-coded irq-enable of
* do_raw_spin_lock_flags() code, because lockdep assumes
* that interrupts are not re-enabled during lock-acquire:
*/
#ifdef CONFIG_LOCKDEP
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
#else
do_raw_spin_lock_flags(lock, &flags);
#endif
return flags;
}
看到这里我们知道,就是在spin_acquire和LOCK_CONTENDED前进行了local_irq_save,保存保存irq flags。
我们也看看spin_lock_bh和spin_lock的区别吧:
static __always_inline void spin_lock_bh(spinlock_t *lock)
{
raw_spin_lock_bh(&lock->rlock);
}
#define raw_spin_lock_bh(lock) _raw_spin_lock_bh(lock)
#define _raw_spin_lock_bh(lock) __raw_spin_lock_bh(lock)
static inline void __raw_spin_lock_bh(raw_spinlock_t *lock)
{
__local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}
看到这里我们知道,就是在spin_acquire和LOCK_CONTENDED前进行了__local_bh_disable_ip来关闭软中断,我们看看__local_bh_disable_ip是啥:
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
{
unsigned long flags;
WARN_ON_ONCE(in_irq());
raw_local_irq_save(flags);
/*
* The preempt tracer hooks into preempt_count_add and will break
* lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
* is set and before current->softirq_enabled is cleared.
* We must manually increment preempt_count here and manually
* call the trace_preempt_off later.
*/
__preempt_count_add(cnt);
/*
* Were softirqs turned off above:
*/
if (softirq_count() == (cnt & SOFTIRQ_MASK))
trace_softirqs_off(ip);
raw_local_irq_restore(flags);
if (preempt_count() == cnt) {
#ifdef CONFIG_DEBUG_PREEMPT
current->preempt_disable_ip = get_lock_parent_ip();
#endif
trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
}
}
4.解锁操作
- spin_unlock(lock)//解锁操作
- spin_unlock_irqrestore(lock, flags) //解锁操作,并且打开硬中断
- spin_unlock_bh(lock) //解锁操作,并且打开软中断
spin_unlock解锁是没有竞争的,所以解锁就简单很多:
static __always_inline void spin_unlock(spinlock_t *lock)
{
raw_spin_unlock(&lock->rlock);
}
#define raw_spin_unlock(lock) _raw_spin_unlock(lock)
#define _raw_spin_unlock(lock) __raw_spin_unlock(lock)
static inline void __raw_spin_unlock(raw_spinlock_t *lock)
{
spin_release(&lock->dep_map, 1, _RET_IP_);//空操作
do_raw_spin_unlock(lock);//解锁
preempt_enable();//开启抢占
}
static inline void do_raw_spin_unlock(raw_spinlock_t *lock)
{
arch_spin_unlock(&lock->raw_lock);
__release(lock);
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
barrier();
lock->slock = 1;
}
spin_unlock_irqrestore和spin_unlock仅仅是do_raw_spin_unlock之后进行local_irq_restore操作而已:
static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
{
raw_spin_unlock_irqrestore(&lock->rlock, flags);
}
#define raw_spin_unlock_irqrestore(lock, flags) \
do { \
typecheck(unsigned long, flags); \
_raw_spin_unlock_irqrestore(lock, flags); \
} while (0)
#define _raw_spin_unlock_irqrestore(lock, flags) __raw_spin_unlock_irqrestore(lock, flags)
static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock,
unsigned long flags)
{
spin_release(&lock->dep_map, 1, _RET_IP_);
do_raw_spin_unlock(lock);
local_irq_restore(flags);
preempt_enable();
}
二、armv8架构自旋锁的底层原理——指令独占
ARMv8 架构引入了一种称为 “Exclusive Access” 的指令独占(也称为 “Exclusive Monitors” 或 “Load-Exclusive/Store-Exclusive”)的机制,用于实现多处理器(多核心)系统中的原子性操作。这个机制允许处理器(核心)在访问共享内存时执行原子性读-改-写操作,以防止竞态条件和数据不一致问题,这种机制完成自旋锁最重要的操作——原子加一。
Load-Exclusive (LDXR) 这个指令用于加载内存中的数据到寄存器,并标记该内存位置为 “独占状态”。如果其他处理器在这个指令执行期间访问了相同的内存位置,那么这个指令的执行将失败,指令不会加载数据,并且处理器会得到一个失败的标志。Store-Exclusive (STXR) 这个指令用于将数据从寄存器写回内存,并检查内存位置是否仍然处于 “独占状态”。如果是,写操作成功,内存位置的独占状态被释放。如果其他处理器在这个指令执行期间访问了相同的内存位置,那么写操作将失败,指令不会写入数据,并且处理器会得到一个失败的标志。
由于 LDXR 和 STXR 指令的机制,它们允许处理器在执行这两个指令期间保持内存位置的独占状态,从而实现了原子性操作。这意味着在多理器访问了相同的内存位置),处理器通常会尝试重复执行这些指令,直到成功为止。这种重试机制确保了原子性操作的完成。ARMv8 的指令独占机制有助于避免竞态条件,即多个处理器尝试同时访问和修改共享内存位置的情况。通过将内存位置标记为 “独占状态”,只有一个处理器能够成功地修改它,从而避免了数据不一致性和竞态条件。
我们继续看源代码,linux的源代码翻到头疼也看不懂,因为函数层层包含宏定义太多了,追到最后看到汇编了又要回头找参数是啥我就放弃了。但是,我们可以看ATF或者optee的代码,optee的自旋锁代码很简单,这里是optee3.21中关于自旋锁最底层的汇编代码,代码在core/arch/arm/kernel/spin_lock_a64.S文件中:
/* void __cpu_spin_lock(unsigned int *lock); */
FUNC __cpu_spin_lock , :
mov w2, #SPINLOCK_LOCK
sevl
l1: wfe
l2: ldaxr w1, [x0]
cbnz w1, l1
stxr w1, w2, [x0]
cbnz w1, l2
ret
END_FUNC __cpu_spin_lock
/* unsigned int __cpu_spin_trylock(unsigned int *lock); */
FUNC __cpu_spin_trylock , :
mov x1, x0
mov w2, #SPINLOCK_LOCK
.loop: ldaxr w0, [x1]
cbnz w0, .cpu_spin_trylock_out
stxr w0, w2, [x1]
cbnz w0, .loop
.cpu_spin_trylock_out:
ret
END_FUNC __cpu_spin_trylock
/* void __cpu_spin_unlock(unsigned int *lock); */
FUNC __cpu_spin_unlock , :
stlr wzr, [x0]
ret
END_FUNC __cpu_spin_unlock
sevl指令用于实现在多核心处理器上的同步和协调。这个指令的主要作用是向其他处理器核心发送一个事件,用于实现同步和协调,通常是等待或进入休眠状态。
我们看到第一行代码,自旋锁会先把SPINLOCK_LOCK(实际上是1)这个数赋值给w2寄存器,然后执行sevl指令,这个指令会向其他核心发送一个事件,说我要进行同步