出于性能的考虑,请使对应的LDREX和STREX指令之间的指令数量最少。
举例:spin_lock源码(arm架构)
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned long tmp;
u32 newval;
arch_spinlock_t lockval;
prefetchw(&lock->slock); /* ldrex和strex都是arm架构的独占读写指令. */
__asm__ __volatile__( /* ldrex:从内存加载数据;strex:一定条件下向内存存储数据 */
"1: ldrex %0, [%3]\n" /* lockval = lock->slock(lock是个union,占4字节。也是next(高16位)和owner(低16位)) 同时标记对lock->slock内存区域的独占访问 */
" add %1, %0, %4\n" /* newval = next + (1 << 16);相当于next+1 */
" strex %2, %1, [%3]\n" /* 如果lock->slock内存区域被标记独占访问,则将newval的值更新到lock->slock,且tmp=0。没被标记,则不执行,tmp=1 */
" teq %2, #0\n" /* 如果tmp不为0,则跳转到1处继续执行。确保next+1 */
" bne 1b"
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
: "cc");
while (lockval.tickets.next != lockval.tickets.owner) {
wfe();
lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner);
}
smp_mb();
}