等待队列和完成量

最新推荐文章于 2024-04-17 13:34:37 发布

hs6662668

最新推荐文章于 2024-04-17 13:34:37 发布

阅读量594

点赞数

分类专栏：内核并发控制

内核并发控制专栏收录该内容

3 篇文章 0 订阅

订阅专栏

1 等待队列的实现类似于进程中异步中断的原理，都是为了最大化cpu的利用率，避免了cpu在资源无法满足时的轮询或空等待。

等待队列的实现原理举例：cpu向磁盘发出读数据请求，由于磁盘是速度慢，当前进程通过wait_queue_t结构睡眠在内核数据缓冲区的wait_queue_head_t中，并且发生进程切换；当磁盘准备好数据，向cpu发出异步中断请求，在中断处理过程中的软中断部分，cpu将数据拷贝到内核数据缓冲中，然后唤醒睡眠在等待队列中的进程，在调度的适当时机，该进程得到执行等待队列在system v进程间通信中和网络通信中大量使用

2 结构体
2.1 等待队列头部
struct __wait_queue_head {
 spinlock_t lock;
 struct list_head task_list;
};
typedef struct __wait_queue_head wait_queue_head_t;

2.2 等待队列项
struct __wait_queue {
 unsigned int flags;
#define WQ_FLAG_EXCLUSIVE 0x01
 void *private;  //一般指向task_struct
 wait_queue_func_t func;
 struct list_head task_list;
};
typedef struct __wait_queue wait_queue_t;

3  等待队列使用流程
3-1 初始化等待队列头部
静态初始化：
#define DECLARE_WAIT_QUEUE_HEAD(name) \
 wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {    \
 .lock  = __SPIN_LOCK_UNLOCKED(name.lock),  \
 .task_list = { &(name).task_list, &(name).task_list } }
动态初始化
static wait_queue_head_t head;
#define init_waitqueue_head(q)    \
 do {      \
  static struct lock_class_key __key; \
       \
  __init_waitqueue_head((q), &__key); \
 } while (0)

3-2 初始化等待队列项
静态初始化
#define DEFINE_WAIT(name)  DEFINE_WAIT_FUNC(name, autoremove_wake_function)
#define DEFINE_WAIT_FUNC(name, function)    \
 wait_queue_t name = {      \
  .private = current,    \
  .func  = function,    \
  .task_list = LIST_HEAD_INIT((name).task_list), \
 }
动态初始化
static wait_queue_t item;
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
{
 q->flags = 0;
 q->private = p;
 q->func = default_wake_function;
}

3.3 将当前任务加入到等待队列，是进程睡眠
3.3.1 主函数 wait_event(wait_queue_head_t,c 表达式条件)
/**
 * wait_event - sleep until a condition gets true
 * @wq: the waitqueue to wait on
 * @condition: a C expression for the event to wait for
 *
 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
 * @condition evaluates to true. The @condition is checked each time
 * the waitqueue @wq is woken up.
 *
 * wake_up() has to be called after changing any variable that could
 * change the result of the wait condition.
 */
#define wait_event(wq, condition)      \
do {         \
 if (condition)        \
  break;       \
 __wait_event(wq, condition);     \
} while (0)
3.3.2  
 #define __wait_event(wq, condition)      \
do {         \
 DEFINE_WAIT(__wait);      \     通过静态方式新建wait_queue_t结构体，其private=task_struct
         \
 for (;;) {       \
  prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \    睡眠过程
  if (condition)      \
    break;      \
  schedule();      \
 }        \
 finish_wait(&wq, &__wait);     \   进程满足条件后，执行扫尾工作：修改进程状态，从等待队列中移除
} while (0)
3.3.3   prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE)执行
    /*
 * Note: we use "set_current_state()" _after_ the wait-queue add,
 * because we need a memory barrier there on SMP, so that any
 * wake-function that tests for the wait-queue being active
 * will be guaranteed to see waitqueue addition _or_ subsequent
 * tests in this thread will see the wakeup having taken place.
 *
 * The spin_unlock() itself is semi-permeable and only protects
 * one way (it only protects stuff inside the critical region and
 * stops them from bleeding out - it would still allow subsequent
 * loads to move into the critical region).
 */
void
prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
 unsigned long flags;

 wait->flags &= ~WQ_FLAG_EXCLUSIVE;
 spin_lock_irqsave(&q->lock, flags);
 if (list_empty(&wait->task_list))
  __add_wait_queue(q, wait);    //list_add(&wait->task_list,&q->task_list)
 set_current_state(state);
 spin_unlock_irqrestore(&q->lock, flags);
}
3.3.4   finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
/*
 * finish_wait - clean up after waiting in a queue
 * @q: waitqueue waited on
 * @wait: wait descriptor
 *
 * Sets current thread back to running state and removes
 * the wait descriptor from the given waitqueue if still
 * queued.
 */
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
 unsigned long flags;

 __set_current_state(TASK_RUNNING);
 /*
  * We can check for list emptiness outside the lock
  * IFF:
  *  - we use the "careful" check that verifies both
  *    the next and prev pointers, so that there cannot
  *    be any half-pending updates in progress on other
  *    CPU's that we haven't seen yet (and that might
  *    still change the stack area.
  * and
  *  - all other users take the lock (ie we can only
  *    have _one_ other CPU that looks at or modifies
  *    the list).
  */
 if (!list_empty_careful(&wait->task_list)) {
  spin_lock_irqsave(&q->lock, flags);
  list_del_init(&wait->task_list);
  spin_unlock_irqrestore(&q->lock, flags);
 }
}

4 等待队列进程睡眠的其他函数
4-1 可中断睡眠
#define wait_event_interruptible(wq, condition)    \  ({         \
 int __ret = 0;       \
 if (!(condition))      \
  __wait_event_interruptible(wq, condition, __ret); \
 __ret;        \
})
该等待进程如果由于condition满足被唤醒，返回0,如果由于信号被唤醒(当进程收到信号后，该进程的
thread->flag |= TIF_SIGPENDING )，返回-ERESTARTSYS
4-2  超时唤醒的睡眠
#define wait_event_timeout(wq, condition, timeout)   \
({         \
 long __ret = timeout;      \
 if (!(condition))       \
  __wait_event_timeout(wq, condition, __ret);  \   //__ret =0 或条件满足，都会退出
 __ret;        \
})
signed long __sched schedule_timeout(signed long timeout)
{
 struct timer_list timer;
 unsigned long expire;

 switch (timeout)
 {
 case MAX_SCHEDULE_TIMEOUT:
  /*
   * These two special cases are useful to be comfortable
   * in the caller. Nothing more. We could take
   * MAX_SCHEDULE_TIMEOUT from one of the negative value
   * but I' d like to return a valid offset (>=0) to allow
   * the caller to do everything it want with the retval.
   */
  schedule();
  goto out;
 default:
  /*
   * Another bit of PARANOID. Note that the retval will be
   * 0 since no piece of kernel is supposed to do a check
   * for a negative retval of schedule_timeout() (since it
   * should never happens anyway). You just have the printk()
   * that will tell you if something is gone wrong and where.
   */
  if (timeout < 0) {
   printk(KERN_ERR "schedule_timeout: wrong timeout "
    "value %lx\n", timeout);
   dump_stack();
   current->state = TASK_RUNNING;
   goto out;
  }
 }

 expire = timeout + jiffies;

 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
 __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
 schedule();
 del_singleshot_timer_sync(&timer);

 /* Remove the timer from the object tracker */
 destroy_timer_on_stack(&timer);

 timeout = expire - jiffies;

 out:
 return timeout < 0 ? 0 : timeout;
} 
当定时器唤醒该进程时，由于返回值timeout=0，所以该进程从睡眠等待中退出
4-3 超时或者信号均可唤醒的睡眠过程
#define wait_event_interruptible_timeout(wq, condition, timeout) \
({         \
 long __ret = timeout;      \
 if (!(condition))      \
  __wait_event_interruptible_timeout(wq, condition, __ret); \
 __ret;        \
})
返回0代表超时;返回正数返回剩余时间数，此时condition满足；返回-ERESTARTSYS代表被信号唤醒

6  完成量
完成量类似信号量，但是其实现基于等待队列，完成量结构中有两个参与者：若干个进程等待某个操作完成，而一个进程在操作完成时发出声明，从而唤醒等待着进程
6-1 完成量结构体             文件：include/linux/completion.h
/**
 * struct completion - structure used to maintain state for a "completion"
 *
 * This is the opaque structure used to maintain the state for a "completion".
 * Completions currently use a FIFO to queue threads that have to wait for
 * the "completion" event.
 *
 * See also:  complete(), wait_for_completion() (and friends _timeout,
 * _interruptible, _interruptible_timeout, and _killable), init_completion(),
 * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and
 * INIT_COMPLETION().
 */
struct completion {
 unsigned int done;  
 wait_queue_head_t  wait;
};
done >=0,
当done大于0时，描述了操作已经被完成的次数，每当事件完成一次done+=1。
当done=0时，代表着该操作无法完成，等待进程睡眠，如果该操作被另一进程完成，则其会将done += 1，使done >0；并且调度唤醒睡眠进程，如果有睡眠进程且睡眠进程唤醒后再次执行前，done -=1，使done重新为0，这样以后到来的进程又会等待，而如果此时没有进程睡眠，done=1不变，之后如果有进程在等待该操作，则会使done-=1，该进程直接执行。
6-2 等待操作完成而睡眠的函数
extern void wait_for_completion(struct completion *);
extern int wait_for_completion_interruptible(struct completion *x);
extern int wait_for_completion_killable(struct completion *x);
extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout);
extern unsigned long wait_for_completion_interruptible_timeout(struct completion *x, unsigned long timeout);
extern bool try_wait_for_completion(struct completion *x);
下面分析 wait_for_completion_interruptible
int __sched wait_for_completion_interruptible(struct completion *x)
{
 long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
 if (t == -ERESTARTSYS)  //如果因为信号而被唤醒
  return t;
 return 0;  //操作已完成
}
static long __sched
wait_for_common(struct completion *x, long timeout, int state)
{
 might_sleep();

 spin_lock_irq(&x->wait.lock);  //加锁
 timeout = do_wait_for_common(x, timeout, state);
 spin_unlock_irq(&x->wait.lock);
 return timeout;
}
static inline long __sched
do_wait_for_common(struct completion *x, long timeout, int state)
{
 if (!x->done) {                                   //完成量以完成
  DECLARE_WAITQUEUE(wait, current); //申请等待队列项

  wait.flags |= WQ_FLAG_EXCLUSIVE; //每次只能唤醒一个进程
  __add_wait_queue_tail(&x->wait, &wait); //加入到等待队列尾部
  do {
   if (signal_pending_state(state, current)) {  //如果由于信号被唤醒
    timeout = -ERESTARTSYS;
    break;
   }
   __set_current_state(state);
   spin_unlock_irq(&x->wait.lock);
   timeout = schedule_timeout(timeout); 
   spin_lock_irq(&x->wait.lock);
  } while (!x->done && timeout);  //直到超时发生或操作完成(completion->done !=0)
  __remove_wait_queue(&x->wait, &wait);
  if (!x->done)
   return timeout;   //timeout可以为剩余时间或者-ERESTARTSYS
 }
 x->done--;  //进程唤醒后，操作--，代表该操作等待进程减少了一个
 return timeout ?: 1;
}
6-2 操作完成而唤醒睡眠的进程的函数
extern void complete(struct completion *);
extern void complete_all(struct completion *); 与complete类似，不过x->done += UINT_MAX/2;
分析 
void complete(struct completion *x)
{
 unsigned long flags;

 spin_lock_irqsave(&x->wait.lock, flags);
 x->done++;   //默认每次只能唤醒一个等待进程
 __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
 spin_unlock_irqrestore(&x->wait.lock, flags);
}
/*
 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
 * number) then we wake all the non-exclusive tasks and one exclusive task.
 *
 * There are circumstances in which we can try to wake a task which has already
 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
 * zero in this (rare) case, and we handle it by continuing to scan the queue.
 */
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
   int nr_exclusive, int wake_flags, void *key)
{
 wait_queue_t *curr, *next;

 list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
  unsigned flags = curr->flags;

  if (curr->func(curr, mode, wake_flags, key) &&    //唤醒函数
    (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
   break;
 }
}
/**
 * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
 * @pos: the type * to use as a loop cursor.
 * @n:  another type * to use as temporary storage
 * @head: the head for your list.
 * @member: the name of the list_struct within the struct.
 */
#define list_for_each_entry_safe(pos, n, head, member)   \
 for (pos = list_entry((head)->next, typeof(*pos), member), \
  n = list_entry(pos->member.next, typeof(*pos), member); \
      &pos->member != (head);      \
      pos = n, n = list_entry(n->member.next, typeof(*n), member))
6-3 完成量举例