Linux中的锁机制 —— osq lock

jianchi88

已于 2022-03-08 11:38:12 修改

阅读量3.5k

点赞数

分类专栏：内核同步文章标签： linux 运维服务器

于 2022-03-07 15:53:31 首次发布

本文链接：https://blog.csdn.net/jianchi88/article/details/123249958

版权

内核同步专栏收录该内容

7 篇文章 5 订阅

订阅专栏

osq 数据结构

6  /*
7   * An MCS like lock especially tailored for optimistic spinning for sleeping
8   * lock implementations (mutex, rwsem, etc).
9   *
10   * Using a single mcs node per CPU is safe because sleeping locks should not be
11   * called from interrupt context and we have preemption disabled while
12   * spinning.
13   */

作为 MCS 锁的衍化，专门为 mutex, rwsem 等睡眠锁量身定制了可以乐观自旋的 osq 锁（optimistic spinning queue）。

9  struct optimistic_spin_node {
10  	struct optimistic_spin_node *next, *prev;-------------------next和prev指针可以组成一个双向链表。
11  	int locked; /* 1 if lock acquired */------------------------表示加锁状态。
12  	int cpu; /* encoded CPU # + 1 value */----------------------用于重新编码CPU编号，表示该node在那个CPU上。
13  };
14  
15  struct optimistic_spin_queue {
16  	/*
17  	 * Stores an encoded value of the CPU # of the tail node in the queue.
18  	 * If the queue is empty, then it's set to OSQ_UNLOCKED_VAL.
19  	 */
20  	atomic_t tail;
21  };

struct optimistic_spin_node数据结构会定义成 per-CPU 变量，即每个 CPU 有一个 node 结构。

static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);

osq 初始化

把队列 tail 设置为 OSQ_UNLOCKED_VAL，即 0。

23  #define OSQ_UNLOCKED_VAL (0)
24  
25  /* Init macro and function. */
26  #define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }
27  
28  static inline void osq_lock_init(struct optimistic_spin_queue *lock)
29  {
30  	atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
31  }

加锁/解锁

90  bool osq_lock(struct optimistic_spin_queue *lock)
91  {
92  	struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);-----------node 指向当前 CPU 的 struct optimistic_spin_node 节点。
93  	struct optimistic_spin_node *prev, *next;
94  	int curr = encode_cpu(smp_processor_id());-----------------------------表示当前 CPU 编号，0 表示没有 CPU，1 表示 CPU0，以此类推。
95  	int old;
96  
97  	node->locked = 0;
98  	node->next = NULL;
99  	node->cpu = curr;
100  
101  	/*
102  	 * We need both ACQUIRE (pairs with corresponding RELEASE in
103  	 * unlock() uncontended, or fastpath) and RELEASE (to publish
104  	 * the node fields we just initialised) semantics when updating
105  	 * the lock tail.
106  	 */
107  	old = atomic_xchg(&lock->tail, curr);-----------使用原子交换函数 atomic_xchg() 交换全局 lock->tail 和当前 CPU 号，如果 lock->tail 就只等于初始化 OSQ_UNLOCKED_VAL，说明没有人持锁，那么让 lock->tail 等于当前 CPU 标号表示成功持锁。（atomic_xchg则是将新值存入变量，并将变量的旧值返回）
108  	if (old == OSQ_UNLOCKED_VAL) -------------------如果 lock->tail 就只等于初始化 OSQ_UNLOCKED_VAL，说明没有人持锁
109  		return true;
110  
111  	prev = decode_cpu(old);-------------------------之前获取锁失败，prev 表示 old 指向的 CPU 所属节点的 struct optimistic_spin_node 数据结构。
112  	node->prev = prev;
113  
114  	/*
115  	 * osq_lock()			unqueue
116  	 *
117  	 * node->prev = prev		osq_wait_next()
118  	 * WMB				MB
119  	 * prev->next = node		next->prev = prev // unqueue-C
120  	 *
121  	 * Here 'node->prev' and 'next->prev' are the same variable and we need
122  	 * to ensure these stores happen in-order to avoid corrupting the list.
123  	 */
124  	smp_wmb();
125  
126  	WRITE_ONCE(prev->next, node);
127  
128  	/*
129  	 * Normally @prev is untouchable after the above store; because at that
130  	 * moment unlock can proceed and wipe the node element from stack.
131  	 *
132  	 * However, since our nodes are static per-cpu storage, we're
133  	 * guaranteed their existence -- this allows us to apply
134  	 * cmpxchg in an attempt to undo our queueing.
135  	 */
136  
137  	while (!READ_ONCE(node->locked)) {------------一直查询当前节点 node->locked 是否变成了1，因为前继节点 prev 释放锁时会把它的下一个节点中的 locked 成员置为1，然后才能成功释放锁。
138  		/*
139  		 * If we need to reschedule bail... so we can block.
140  		 * Use vcpu_is_preempted() to avoid waiting for a preempted
141  		 * lock holder:
142  		 */
143  		if (need_resched() || vcpu_is_preempted(node_cpu(node->prev)))-------------------------在自旋等待过程中，如果有更高优先级进程抢占或者被调度器要求调度出去，那应该放弃自旋等待，退出 MCS 链表，跳转到 unqueue 标签处处理 MCS 链表删除节点的情况。
144  			goto unqueue;
145  
146  		cpu_relax();
147  	}
148  	return true;
149  
150  unqueue:
151  	/*
152  	 * Step - A  -- stabilize @prev
153  	 *
154  	 * Undo our @prev->next assignment; this will make @prev's
155  	 * unlock()/unqueue() wait for a next pointer since @lock points to us
156  	 * (or later).
157  	 */
158  
159  	for (;;) {
160  		if (prev->next == node &&
161  		    cmpxchg(&prev->next, node, NULL) == node)---------------如果 prev->next 等于 node，就把 NULL 赋值给 prev->next（Undo our @prev->next assignment）返回 node
162  			break;
163  
164  		/*
165  		 * We can only fail the cmpxchg() racing against an unlock(),
166  		 * in which case we should observe @node->locked becomming
167  		 * true.
168  		 */
169  		if (smp_load_acquire(&node->locked))
170  			return true;
171  
172  		cpu_relax();
173  
174  		/*
175  		 * Or we race against a concurrent unqueue()'s step-B, in which
176  		 * case its step-C will write us a new @node->prev pointer.
177  		 */
178  		prev = READ_ONCE(node->prev);
179  	}
180  
181  	/*
182  	 * Step - B -- stabilize @next
183  	 *
184  	 * Similar to unlock(), wait for @node->next or move @lock from @node
185  	 * back to @prev.
186  	 */
187  
188  	next = osq_wait_next(lock, node, prev);
189  	if (!next)
190  		return false;
191  
192  	/*
193  	 * Step - C -- unlink
194  	 *
195  	 * @prev is stable because its still waiting for a new @prev->next
196  	 * pointer, @next is stable because our @node->next pointer is NULL and
197  	 * it will wait in Step-A.
198  	 */
199  
200  	WRITE_ONCE(next->prev, prev);
201  	WRITE_ONCE(prev->next, next);
202  
203  	return false;
204  }

41  static inline struct optimistic_spin_node *
42  osq_wait_next(struct optimistic_spin_queue *lock,
43  	      struct optimistic_spin_node *node,
44  	      struct optimistic_spin_node *prev)
45  {
46  	struct optimistic_spin_node *next = NULL;
47  	int curr = encode_cpu(smp_processor_id());
48  	int old;
49  
50  	/*
51  	 * If there is a prev node in queue, then the 'old' value will be
52  	 * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
53  	 * we're currently last in queue, then the queue will then become empty.
54  	 */
55  	old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;--------------如果 prev 节点存在， old 值为前 node cpu, 如果没有 prev，则设置 old为0，表示我们当前是最后一个在队列，然后会变空队列
56  
57  	for (;;) {
58  		if (atomic_read(&lock->tail) == curr &&
59  		    atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {-----------tail等于当前 cpu，说明是队列最后一个，把 old 值 OSQ_UNLOCKED_VAL 设置给 tail. 返回 next = NULL.
60  			/*
61  			 * We were the last queued, we moved @lock back. @prev
62  			 * will now observe @lock and will complete its
63  			 * unlock()/unqueue().
64  			 */
65  			break;
66  		}
67  
68  		/*
69  		 * We must xchg() the @node->next value, because if we were to
70  		 * leave it in, a concurrent unlock()/unqueue() from
71  		 * @node->next might complete Step-A and think its @prev is
72  		 * still valid.
73  		 *
74  		 * If the concurrent unlock()/unqueue() wins the race, we'll
75  		 * wait for either @lock to point to us, through its Step-B, or
76  		 * wait for a new @node->next from its Step-C.
77  		 */
78  		if (node->next) {-----------------------如果不是队列最后一个 node, 使用 xchg 设置 node->next =NULL，跳出
79  			next = xchg(&node->next, NULL);
80  			if (next)
81  				break;
82  		}
83  
84  		cpu_relax();
85  	}
86  
87  	return next;
88  }

osq加锁有几种情况：

无人持有锁，那是最理想的状态，直接返回；
有人持有锁，将当前的Node加入到 osq 队列中，在没有高优先级任务抢占时，自旋等待前驱节点释放锁；
自旋等待过程中，如果遇到高优先级任务抢占，那么需要做的事情就是将之前加入到 osq 队列中的当前节点，从 osq 队列中移除，移除的过程又分为三个步骤，分别是处理 prev 前驱节点的 next 指针指向、当前节点 Node 的 next 指针指向、以及将 prev 节点与 next 后继节点连接；

加锁过程中使用了原子操作，来确保正确性；
在这里插入图片描述
解锁时也分为几种情况：

无人争用该锁，那直接可以释放锁；
获取当前节点指向的下一个节点，如果下一个节点不为NULL，则将下一个节点解锁；
当前节点的下一个节点为NULL，则调用osq_wait_next，来等待获取下一个节点，并在获取成功后对下一个节点进行解锁；

从解锁的情况可以看出，这个过程相当于锁的传递，从上一个节点传递给下一个节点；
在这里插入图片描述
在加锁和解锁的过程中，由于可能存在操作来更改 osq 队列，因此都调用了 osq_wait_next 来获取下一个确定的节点：（理论上是短暂的操作等待）

unqueue:

step A:
如果 prev->next 为 node，prev->next = NULL
如果 prev->next 不为 node, spin
如果 node->locked 为 1，获取锁
step B:
如果 node 为队列最后一个，设置 tail=0
如果 node->next 为空，spin
如果 node->next 非空，node->next = NULL
step C:
unlink, next->prev=prev; prev->next=next;

场景 step-by-step

下图是两个任务竞争出队退出的情况，第二个和第三个节点几乎同时运行到step-A，然后step-B运行微妙，第二个节点先进入spin。

第二个节点通过将null分配给step-A来断开与前一个节点的连接：prev->next=null。
第三个节点通过给step-A赋值null来断开与前一个节点的连接：prev->next=null。
第二个节点 spin 直到 node->next 不为空，因为 step-B: node->next 为空
第三个节点是step-B：node->next不为null，所以替换为null，断开与下一个节点的连接。
6 第三个节点是步骤-C：next->prev = prev; prev->next = next;
在第二个节点 spin 后，只要步骤-B: node->next 变为非空，就分配空值并断开与下一个节点的连接。
9 第二个节点是步骤-C：next->prev = prev; prev->next = next;

在这里插入图片描述
下图是两个任务竞争出队退出的情况，这是第二个节点进入的稍早一点，在第三个节点进入step-A之前已经进入step-B的情况。

第二个节点通过将null分配给step-A来断开与前一个节点的连接：prev->next=null。
第二个节点断开与下一个节点的连接，用null代替step-B：node->next=null。
第三个节点 spin 直到 prev->next 不为空，因为 step-A: prev->next 为空。
5 第二个节点是step-C：将前一个节点和下一个节点相互连接。
在第三个节点 spin 后，只要 step-A: prev->next 变为非空值，就分配空值并断开与下一个节点的连接。
第三个节点断开与下一个节点的连接，用null代替step-B：node->next。
9 第三个节点是步骤-C：将前一个节点和下一个节点相互连接。

在这里插入图片描述
下图是midpath同时收到多个互斥请求时调用osq_lock()函数获取OSQ（MCS）锁的过程。

红色locked=1 将OSQ 节点中的第二个节点标记为locked=1，当调用osq_unlock() 时，第二个节点可以脱离osq_lock 自旋。

a 时刻，cpu-2 获取 osq lock
b 时刻，cpu-1 排队要获取 osq lock，进入 osq spin
c 时刻，cpu-0 排队要获取 osq lock，进入 osq spin
d 时刻，cpu-3 排队要获取 osq lock，进入 osq spin
e 时刻，cpu-2 释放锁，cpu-1获取锁，此时队列第一个节点 cpu-0 locked=1, 且 cpu-0 还在 spin
f 时刻，cpu-1释放锁，cpu-0获取锁，此时队列第一个节点 cpu-3 locked=1, 且 cpu3 还在 spin
g 时刻，cpu-0释放锁，cpu-3获取锁，此时队列只有 cpu-3，tail = 4
h 时刻，cpu-3释放锁，tail = 0

tail 始终记录真实 cpu + 1 值

在这里插入图片描述

参考：
http://jake.dothome.co.kr/mutex/
https://www.cnblogs.com/LoyenWang/p/12826811.html

jianchi88

关注

0
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
Linux中的锁机制 —— osq lock

osq 数据结构6 /*7 * An MCS like lock especially tailored for optimistic spinning for sleeping8 * lock implementations (mutex, rwsem, etc).9 *10 * Using a single mcs node per CPU is safe because sleeping locks should not be11 * called from inte
复制链接

扫一扫

专栏目录