顺序锁和读写锁代码走读

最新推荐文章于 2024-03-08 16:43:07 发布

flc2762

最新推荐文章于 2024-03-08 16:43:07 发布

阅读量454

点赞数

本文链接：https://blog.csdn.net/flc2762/article/details/107790178

版权

在移植speculative page fault代码时发现开发者巧妙的使用seqcount的特性来快速判断和标识vma区的变化，由于vma的修改由mmap_sem保证，所以开发只是用的seqcount而没有使用seqlock。
将vma作为保护区，在开始结束位置调用begin和end。通过seqcount的值来快速判断vma区是否改变，只要seqcount值改变了就说明vma区改变了。
对vma区进行保护的mmap_sem是rw_semaphore，该信号量适用于读多写少情况。
借熟悉移植代码的机会，熟悉了一下seqlock和rw_semaphore的代码。

spin_lock无法区分保护区的内容。对于读多写少的区域使用spin_lock显示不合算。
对于读多写少的临界区进行保护时，使用顺序锁（写优先，写阻塞读）是个很好的方法。

针对读多写少的情况，读写信号量和顺序锁都能有很好的性能。两个都是读并发；顺序锁实现了写优先，写阻塞读；而读写信号量则是读能阻塞写，写也能阻塞读，在读写同时发生时写优先。由此可知在特别强调写优先时使用顺序锁。

1.顺序锁
实现思路，写写互斥，读读互不影响。写进入临界区获取spin_lock，seqcount++；操作临界区；seqcount++，退出临界区。读进入临界区前先保存seqcount值，判断seqcount是不是偶数，偶数说明没有写在临界区，非偶数就循环等待seqcount变成偶数，读取临界区，判断此时seqcount值是否和开始值一致，一致说明此次读取值有效，如果不一致，就再读一次。

具体实现
1.1 定义
在include/linux/seqlock.h中定义了seqlock
405 typedef struct {
406 struct seqcount seqcount;
407 spinlock_t lock; //为了保证写写互斥，增加spin_lock
408 } seqlock_t;

seqcount定义如下：
48 typedef struct seqcount {
49 unsigned sequence; //整型计数
50 #ifdef CONFIG_DEBUG_LOCK_ALLOC
51 struct lockdep_map dep_map; //死锁检测
52 #endif
53 } seqcount_t;

1.2 初始化
420 #define seqlock_init(x) \
421 do { \
422 seqcount_init(&(x)->seqcount); \ //初始化计数
423 spin_lock_init(&(x)->lock); \ //初始化写写互斥的spin_lock
424 } while (0)

55 static inline void __seqcount_init(seqcount_t *s, const char *name,
56 struct lock_class_key *key)
57 {
58 /*
59 * Make sure we are not reinitializing a held lock:
60 */
61 lockdep_init_map(&s->dep_map, name, key, 0); //初始化死锁检测内容
62 s->sequence = 0; //初始化计数为0
63 }

69 # define seqcount_init(s) \
70 do { \
71 static struct lock_class_key __key; \
72 __seqcount_init((s), #s, &__key); \
73 } while (0)

如没定义CONFIG_DEBUG_LOCK_ALLOC，# define seqcount_init(s) __seqcount_init(s, NULL, NULL)

1.3 写操作
write_seqlock / write_sequnlock //获取锁 / 释放锁
write_seqlock_irq / write_sequnlock_irq //获取锁的同时disable中断 / 释放锁，enable中断
write_seqlock_bh / write_sequnlock_bh //获取锁同时disable中断下半部 / 释放锁，enable中断下半部
write_seqlock_irqsave / write_sequnlock_irqrestore //获取锁，保存中断标志位，disable中断 / 恢复中断标志，enable中断，释放锁

以write_seqlock为例：
写获取锁
447 static inline void write_seqlock(seqlock_t *sl)
448 {
449 spin_lock(&sl->lock); //获取spin_lock
450 write_seqcount_begin(&sl->seqcount); //计数加1
451 }

381 static inline void write_seqcount_begin(seqcount_t *s)
382 {
383 write_seqcount_begin_nested(s, 0);
384 }

375 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
376 {
377 raw_write_seqcount_begin(s);
378 seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); //死锁检测
379 }

226 static inline void raw_write_seqcount_begin(seqcount_t *s)
227 {
228 s->sequence++; //计数加1
229 smp_wmb(); //添加写屏障。保证临界区的操作和计数加1操作不会被smp乱序执行。保证是seqcount计数增加后才操作临界区。
230 }

写释放锁
453 static inline void write_sequnlock(seqlock_t *sl)
454 {
455 write_seqcount_end(&sl->seqcount); //先计数加1
456 spin_unlock(&sl->lock); //然后才释放spin_lock
457 }

386 static inline void write_seqcount_end(seqcount_t *s)
387 {
388 seqcount_release(&s->dep_map, 1, _RET_IP_);
389 raw_write_seqcount_end(s);
390 }

232 static inline void raw_write_seqcount_end(seqcount_t *s)
233 {
234 smp_wmb(); //添加写屏障，保证临界区的操作和计数加1操作不会被smp乱序执行。保证临界区操作完成后seqcount计数才增加。
235 s->sequence++; //计数加1
236 }

1.4 读操作
read_seqbegin / read_seqretry //获取seqcount，当没有写操作时返回seqcount值 / 判断seqcount值是否变化.如果变化则读操作失败,重读

432 static inline unsigned read_seqbegin(const seqlock_t *sl)
433 {
434 return read_seqcount_begin(&sl->seqcount); //读取seqcount值
435 }

162 static inline unsigned read_seqcount_begin(const seqcount_t *s)
163 {
164 seqcount_lockdep_reader_access(s);
165 return raw_read_seqcount_begin(s);
166 }

146 static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
147 {
148 unsigned ret = __read_seqcount_begin(s);
149 smp_rmb();
150 return ret;
151 }

108 static inline unsigned __read_seqcount_begin(const seqcount_t *s)
109 {
110 unsigned ret;
111
112 repeat:
113 ret = READ_ONCE(s->sequence); //最终的读操作
114 if (unlikely(ret & 1)) { //如果seqcount值为奇数说明有写操作临界区就释放cpu，重读seqcount。达到写阻塞读的效果
115 cpu_relax();
116 goto repeat;
117 }
118 return ret;
119 }

seqcount值判断
437 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
438 {
439 return read_seqcount_retry(&sl->seqcount, start);
440 }

218 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
219 {
220 smp_rmb();
221 return __read_seqcount_retry(s, start);
222 }

203 static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
204 {
205 return unlikely(s->sequence != start); //如果seqcount值和开始值不一致则返回1，即需要重做返回1，不需要重做返回0
206 }

一般read_seqretry和read_seqbegin配套使用。在读取临界区前调用read_seqbegin获取seqcount值，在结束位置用read_seqretry进行判断，如果read_seqretry返回值为1则说明在读期间临界区有变化需要重新读。
一般操作如下：
do {
seqcount = read_seqbegin(&seq_lock); //进入临界区前先获取seqcount值
do_something();
} while (read_seqretry(&seq_lock, seqcount)); //如果为1，则需要重新读

2 读写信号量
实现思路，写写互斥，读写互斥，读读并发。原子操作计数count表示是读还是写，-1表示1个写操作，0标识没有读写操作，大于0标识读操作的个数。通过对count计数和wait_list的判断来实现，读并发，写互斥。

2.1 定义
在文件include/linux/rwsem.h定义如下：
30 struct rw_semaphore {
31 atomic_long_t count; //原子操作计数
32 struct list_head wait_list; //等待列表
33 raw_spinlock_t wait_lock; //对列表操作时加锁
34 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
35 struct optimistic_spin_queue osq; /* spinner MCS lock */
36 /*
37 * Write owner. Used as a speculative check to see
38 * if the owner is running on the cpu.
39 */
40 struct task_struct *owner; //锁的持有者
41 #endif
42 #ifdef CONFIG_DEBUG_LOCK_ALLOC
43 struct lockdep_map dep_map; //死锁检测
44 #endif
45 };

2.2 初始化

99 #define init_rwsem(sem) \
100 do { \
101 static struct lock_class_key __key; \
102 \
103 __init_rwsem((sem), #sem, &__key); \
104 } while (0)

41 void __init_rwsem(struct rw_semaphore *sem, const char *name,
42 struct lock_class_key *key)
43 {
44 #ifdef CONFIG_DEBUG_LOCK_ALLOC
45 /*
46 * Make sure we are not reinitializing a held semaphore:
47 */
48 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
49 lockdep_init_map(&sem->dep_map, name, key, 0);
50 #endif
51 sem->count = 0; //计数初始值为0，表示没有读写操作
52 raw_spin_lock_init(&sem->wait_lock); //初始化spin_lock，保护count计数和wait_list
53 INIT_LIST_HEAD(&sem->wait_list); //初始化列表
54 }
55 EXPORT_SYMBOL(__init_rwsem);

2.3 读操作
读加锁
21 void __sched down_read(struct rw_semaphore *sem)
22 {
23 might_sleep();
24 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
25
26 LOCK_CONTENDED(sem, __down_read_trylock, __down_read); //进行读操作，先进行try，成功了进行真实读操作
27 rwsem_set_reader_owned(sem);
28 }

609 #define LOCK_CONTENDED(_lock, try, lock) \
610 do { \
611 if (!try(_lock)) { \
612 lock_contended(&(_lock)->dep_map, _RET_IP_); \
613 lock(_lock); \ //真实读操作
614 } \
615 lock_acquired(&(_lock)->dep_map, _RET_IP_); \
616 } while (0)

178 void __sched __down_read(struct rw_semaphore *sem)
179 {
180 __down_read_common(sem, TASK_UNINTERRUPTIBLE);
181 }

130 int __sched __down_read_common(struct rw_semaphore *sem, int state)
131 {
132 struct rwsem_waiter waiter;
133 unsigned long flags;
134
135 raw_spin_lock_irqsave(&sem->wait_lock, flags); //获取sem中的spin_lock,保证count和waitlist值使用时无其他人修改
136
137 if (sem->count >= 0 && list_empty(&sem->wait_list)) { //判断计数，如无写操作（count值不为-1），且wait_list为空，则读操作可以进行
138 /* granted */
139 sem->count++; //count加1，表示读操作多了1个
140 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); //spin_lock解锁
141 goto out;
142 }
143
144 /* set up my own style of waitqueue */
145 waiter.task = current;
146 waiter.type = RWSEM_WAITING_FOR_READ; //表示此操作时读，一个读操作加入到了等待队列
147 get_task_struct(current);
148
149 list_add_tail(&waiter.list, &sem->wait_list); //加入队列
150
151 /* wait to be given the lock */
152 for (;;) {
153 if (!waiter.task) //task为null时跳出循环，在写解锁时wait up读操作时会设置task为null
154 break;
155 if (signal_pending_state(state, current))
156 goto out_nolock;
157 set_current_state(state); //设置task状态为TASK_UNINTERRUPTIBLE
158 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); //spin_lock解锁
159 schedule(); //切换出去，等待唤醒
160 raw_spin_lock_irqsave(&sem->wait_lock, flags);
161 }
162
163 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); //spin_lock解锁
164 out:
165 return 0;
166
167 out_nolock:
168 /*
169 * We didn't take the lock, so that there is a writer, which
170 * is owner or the first waiter of the sem. If it's a waiter,
171 * it will be woken by current owner. Not need to wake anybody.
172 */
173 list_del(&waiter.list);
174 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
175 return -EINTR;
176 }

读解锁
101 void up_read(struct rw_semaphore *sem)
102 {
103 rwsem_release(&sem->dep_map, 1, _RET_IP_);
104
105 __up_read(sem);
106 }
107
108 EXPORT_SYMBOL(up_read);

295 void __up_read(struct rw_semaphore *sem)
296 {
297 unsigned long flags;
298
299 raw_spin_lock_irqsave(&sem->wait_lock, flags);
300
301 if (--sem->count == 0 && !list_empty(&sem->wait_list)) //count减1，判断是否为0，不为0说明还有读操作在，不唤醒写；没有读操作，且wait_list为空，则唤醒wait_list中的第一个（此时第一个肯定是写，读可以并发，在读操作过程中能加入到wait_list第一个肯定是写）。
302 sem = __rwsem_wake_one_writer(sem); //唤醒写操作，唤醒写操作后写操作会见count值改为-1，标识有写操作
303
304 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
305 }

116 static inline struct rw_semaphore *
117 __rwsem_wake_one_writer(struct rw_semaphore *sem)
118 {
119 struct rwsem_waiter *waiter;
120
121 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); //获取第一个task
122 wake_up_process(waiter->task); //wake up task
123
124 return sem;
125 }

2.4 写操作
写加锁
51 void __sched down_write(struct rw_semaphore *sem)
52 {
53 might_sleep();
54 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
55
56 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
57 rwsem_set_owner(sem);
58 }
59
60 EXPORT_SYMBOL(down_write);

261 void __sched __down_write(struct rw_semaphore *sem)
262 {
263 __down_write_common(sem, TASK_UNINTERRUPTIBLE);
264 }

213 int __sched __down_write_common(struct rw_semaphore *sem, int state)
214 {
215 struct rwsem_waiter waiter;
216 unsigned long flags;
217 int ret = 0;
218
219 raw_spin_lock_irqsave(&sem->wait_lock, flags);
220
221 /* set up my own style of waitqueue */
222 waiter.task = current;
223 waiter.type = RWSEM_WAITING_FOR_WRITE; //标识自己是写操作的等待
224 list_add_tail(&waiter.list, &sem->wait_list); //写操作先将自己加入到wait_list中，阻塞后面的读操作，先加入list再判断可以更早的阻塞读
225
226 /* wait for someone to release the lock */
227 for (;;) {
228 /*
229 * That is the key to support write lock stealing: allows the
230 * task already on CPU to get the lock soon rather than put
231 * itself into sleep and waiting for system woke it or someone
232 * else in the head of the wait list up.
233 */
234 if (sem->count == 0) //等待所有读操作结束
235 break;
236 if (signal_pending_state(state, current))
237 goto out_nolock;
238
239 set_current_state(state);
240 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
241 schedule(); //切换出去，等待唤醒
242 raw_spin_lock_irqsave(&sem->wait_lock, flags);
243 }
244 /* got the lock */
245 sem->count = -1; //设置count值为-1，表示有写操作
246 list_del(&waiter.list); //将自己从wait_list中删除
247
248 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
249
250 return ret;
251
252 out_nolock:
253 list_del(&waiter.list);
254 if (!list_empty(&sem->wait_list) && sem->count >= 0)
255 __rwsem_do_wake(sem, 0);
256 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
257
258 return -EINTR;
259 }

读解锁
113 void up_write(struct rw_semaphore *sem)
114 {
115 rwsem_release(&sem->dep_map, 1, _RET_IP_);
116
117 rwsem_clear_owner(sem);
118 __up_write(sem);
119 }

310 void __up_write(struct rw_semaphore *sem)
311 {
312 unsigned long flags;
313
314 raw_spin_lock_irqsave(&sem->wait_lock, flags);
315
316 sem->count = 0; //设置conunt值为0
317 if (!list_empty(&sem->wait_list)) //在写操作时，读操作也写操作都可能加入到wait_list,唤醒wait_list中等待的task
318 sem = __rwsem_do_wake(sem, 1);
319
320 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); //在wake过程中需要修改count所有需要加spin_lock
321 }

66 static inline struct rw_semaphore *
67 __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
68 {
69 struct rwsem_waiter *waiter;
70 struct task_struct *tsk;
71 int woken;
72
73 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
74
75 if (waiter->type == RWSEM_WAITING_FOR_WRITE) { //唤醒写操作的等待任务，写互斥所有只能唤醒一个，然后就结束操作
76 if (wakewrite)
77 /* Wake up a writer. Note that we do not grant it the
78 * lock - it will have to acquire it when it runs. */
79 wake_up_process(waiter->task); //唤醒写操作，在唤醒后，写操作会修改count值为-1，标识写操作正在进行
80 goto out;
81 }
82
83 /* grant an infinite number of read locks to the front of the queue */
84 woken = 0; //设置读操作计数为0
85 do {
86 struct list_head *next = waiter->list.next;
87
88 list_del(&waiter->list);
89 tsk = waiter->task;
90 /*
91 * Make sure we do not wakeup the next reader before
92 * setting the nil condition to grant the next reader;
93 * otherwise we could miss the wakeup on the other
94 * side and end up sleeping again. See the pairing
95 * in rwsem_down_read_failed().
96 */
97 smp_mb();
98 waiter->task = NULL; //设置task为null，让读操作跳出for循环
99 wake_up_process(tsk); //唤醒读操作
100 put_task_struct(tsk);
101 woken++; //唤醒的读操作计数加1
102 if (next == &sem->wait_list) //wait_list为空跳出循环
103 break;
104 waiter = list_entry(next, struct rwsem_waiter, list);
105 } while (waiter->type != RWSEM_WAITING_FOR_WRITE); //一次唤醒wait_list上所有连续的读操作，直到又遇到写等待
106
107 sem->count += woken; //修改count值，表示一共唤醒了多少个读操作，在上一层加上了spin_lock，这里修改时安全的。
108
109 out:
110 return sem;
111 }

2.5 rw_semaphore使用例子
1.一个A读操作，获取到rw_semaphore，进入临界区，count++，count=1.
2.一个B写操作，由于A读操作存在导致获取rw_semaphore，写操作task加入到wait_list，count=1值不变.
3.C、D两个读操作，由于wait_list不为空（B写操作在wait_lsit）,C、D读操作task加入到wait_list,count=1值不变。
4.A读操作解锁，count--，发现wait_list不为空，唤醒写操作B，count=0。
5.B写操作被唤醒，从wait_list中被删除，C读操作现在是wait_list中第一个task，count==0，跳出for循环，设置count=-1，标识有写操作在临界区，count=-1。
6.B写操作解锁，count=0，发现wait_list不为空，唤醒wait_list中task，count=0。
7.在wait_list中唤醒第一个task（即C读操作），发现下一个task D也是读操作，继续唤醒D读操作，wait_list为空，跳出唤醒task循环，设置count值为2，count=2（表示有2个读操作）.
8.C读操作解锁，count--，count=2-1=1，count不为0，直接结束，count=1；
9.D读操作解锁，count--，count=1-1=0，count为0，wait_list为空，直接结束，count=0（恢复到最开始状态）；

代码理解上如有错误，欢迎各个大牛指正。