Linux等待队列waitqueue

最新推荐文章于 2023-12-13 10:51:42 发布

瑜珈山神

最新推荐文章于 2023-12-13 10:51:42 发布

阅读量1.5k

点赞数

分类专栏： Linux 文章标签： linux 内核

本文链接：https://blog.csdn.net/fickyou/article/details/73277514

版权

Linux 专栏收录该内容

40 篇文章 3 订阅

订阅专栏

waitqueue

- waitqueue
  - 创建一个等待队列
  - 让当前进程开始等待
- 唤醒等待队列上的进程

内核中提供了等待队列，作用是实现阻塞操作。等待队列用于使进程等待某一特定的事件发生而无需频繁的轮询，进程在等待期间睡眠，在某些事件发生时，由内核自动唤醒。

首先，Linux中所有的进程都是由task_struct这个结构管理。在生成进程的时候会分配一个task_struct结构，之后将通过这个结构对进程进行管理。task_struct位于独立的连续区间。task_struct结构中有一个state成员，有下面几种状态：

状态	说明
TASK_RUNNING	执行可能状态
TASK_INTERRUPTIBLE	等待状态，可接受信号
TASK_UNINTERRUPTIBLE	等待状态，不能接受信号
TASK_ZOMBIE	僵尸状态，exit后的状态
TASK_STOPPED	延缓状态

1. 创建一个等待队列

Linux内核中，wait_queue_head_t代表一个等待队列头，wait_queue_head_t数据结构如下：

struct __wait_queue_head {
    spinlock_t lock;            // 自旋锁，确保对链表操作的原子性
    struct list_head task_list; // 链表
};
typedef struct __wait_queue_head wait_queue_head_t;

等待队列中每个元素用wait_queue_t来表示，wait_queue_t数据结构如下：

typedef struct __wait_queue wait_queue_t;
struct __wait_queue {
    unsigned int flags;         // WQ_FLAG_EXCLUSIVE-表示等待进程想要被独占地唤醒； 0-可以和其他进程一起唤醒。
#define WQ_FLAG_EXCLUSIVE 0x01  // 在结构体中定义宏跟一般的宏没区别，这里表示flags会用到该宏，提高直观性。
    void *private;              // 指向等待进程的task_struct地址
    wait_queue_func_t func;     // 用于唤醒被挂起任务的回调函数
    struct list_head task_list; // 链表元素，用于链接到wait_queue_head_t中的task_list链表中
};

① 可以调用init_waitqueue_head接口来初始化此队列，init_waitqueue_head主要是将wait_queue_head_t结构体中的两个成员进行初始化。

staitc wait_queue_head_t prod_wq;
init_waitqueue_head(&prod_wq);

#define init_waitqueue_head(q)                  \
    do {                                        \
        static struct lock_class_key __key;     \
                                                \
        __init_waitqueue_head((q), #q, &__key); \
    } while (0)

void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
{
    spin_lock_init(&q->lock);   // 初始化自旋锁
    lockdep_set_class_and_name(&q->lock, key, name);    //和防止死锁有关
    INIT_LIST_HEAD(&q->task_list);  // 初始化链表
}

② 也可以使用DECLARE_WAIT_QUEUE_HEAD来定义和初始化等待队列头。

#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {                           \
        .lock           = __SPIN_LOCK_UNLOCKED(name.lock),              \
        .task_list      = { &(name).task_list, &(name).task_list } }

#define DECLARE_WAIT_QUEUE_HEAD(name) \
        wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)

③ 定义和初始化等待队列项

#define __WAITQUEUE_INITIALIZER(name, tsk) {                            \
        .private        = tsk,                                          \
        .func           = default_wake_function,                        \
        .task_list      = { NULL, NULL } }

#define DECLARE_WAITQUEUE(name, tsk)                                    \
        wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)

2. 让当前进程开始等待

内核提供了如下的接口来让当前进程在条件不满足的情况下，阻塞等待：

wait_event(wq, condition)
wait_event_timeout(wq, condition, timeout)
wait_event_interruptible(wq, condition)
wait_event_interruptible_timeout(wq, condition, timeout)

2.1 wait_event

wait_event的实现如下：

void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
{
        unsigned long flags;

        wait->flags |= WQ_FLAG_EXCLUSIVE;
        spin_lock_irqsave(&q->lock, flags);
        __add_wait_queue_tail(q, wait);
        spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(add_wait_queue_exclusive);

void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
        unsigned long flags;

        wait->flags &= ~WQ_FLAG_EXCLUSIVE;
        spin_lock_irqsave(&q->lock, flags);
        if (list_empty(&wait->task_list))
                __add_wait_queue(q, wait);
        set_current_state(state);
        spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(prepare_to_wait);

void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
        unsigned long flags;

        __set_current_state(TASK_RUNNING);
        /*
         * We can check for list emptiness outside the lock
         * IFF:
         *  - we use the "careful" check that verifies both
         *    the next and prev pointers, so that there cannot
         *    be any half-pending updates in progress on other
         *    CPU's that we haven't seen yet (and that might
         *    still change the stack area.
         * and
         *  - all other users take the lock (ie we can only
         *    have _one_ other CPU that looks at or modifies
         *    the list).
         */
        if (!list_empty_careful(&wait->task_list)) {
                spin_lock_irqsave(&q->lock, flags);
                list_del_init(&wait->task_list);
                spin_unlock_irqrestore(&q->lock, flags);
        }
}

#define __wait_event(wq, condition)                                     \
do {                                                                    \
        DEFINE_WAIT(__wait);                                            \
                                                                        \
        for (;;) {                                                      \
                prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);    \
                if (condition)                                          \
                        break;                                          \
                schedule();                                             \
        }                                                               \
        finish_wait(&wq, &__wait);                                      \
} while (0)

#define wait_event(wq, condition)                                       \
do {                                                                    \
        if (condition)                                                  \
                break;                                                  \
        __wait_event(wq, condition);                                    \
} while (0)

里面有个宏定义即DEFINE_WAIT，详细如下：

#define DEFINE_WAIT_FUNC(name, function)                                \
        wait_queue_t name = {                                           \
                .private        = current,                              \
                .func           = function,                             \
                .task_list      = LIST_HEAD_INIT((name).task_list),     \
        }

#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)

可以看到private成员是当前task对象的地址current， func成员是autoremove_wake_function(在下面wake_up时再做说明)。
所以整个wait_event的逻辑就是：
① 首先判断条件是否满足，如果满足，直接退出；如果不满足，调用__wait_event

② __wait_event中首先基于当前经常构建一个等待队列项；然后进入死循环：
- 调用prepare_to_wait，该函数将新建的等待队列项加入到等待队列中，并修改当前任务的state为TASK_UNINTERRUPTIBLE；（注，该函数flags的结果必然是0，也就是说这个函数是将非独占进程添加到等待队列当中。而add_wait_queue_exclusive函数则是将独占进程添加到等待队列的尾部，也就是说一个等待队列，非独占进程总是在前面，独占进程总是在后面）
- 判断condition条件，满足就退出循环，不满足继续
- 调用schedule()进行任务调度后，重新开始循环

③ 退出循环后调用finish_wait，将当前任务的state设置为TASK_RUNNING，并将新建的等待队列从任务队列中删除。

2.2 wait_event_timeout

wait_event_timeout 的实现如下：

signed long __sched schedule_timeout(signed long timeout)
{
        struct timer_list timer;
        unsigned long expire;

        switch (timeout)
        {
        case MAX_SCHEDULE_TIMEOUT:
                /*
                 * These two special cases are useful to be comfortable
                 * in the caller. Nothing more. We could take
                 * MAX_SCHEDULE_TIMEOUT from one of the negative value
                 * but I' d like to return a valid offset (>=0) to allow
                 * the caller to do everything it want with the retval.
                 */
                schedule();
                goto out;
        default:
                /*
                 * Another bit of PARANOID. Note that the retval will be
                 * 0 since no piece of kernel is supposed to do a check
                 * for a negative retval of schedule_timeout() (since it
                 * should never happens anyway). You just have the printk()
                 * that will tell you if something is gone wrong and where.
                 */
                if (timeout < 0) {
                        printk(KERN_ERR "schedule_timeout: wrong timeout "
                              "value %lx\n", timeout);
                        dump_stack();
                        current->state = TASK_RUNNING;
                        goto out;
                }
        }

        expire = timeout + jiffies;

        setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
        __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
        schedule();
        del_singleshot_timer_sync(&timer);

        /* Remove the timer from the object tracker */
        destroy_timer_on_stack(&timer);

        timeout = expire - jiffies;

 out:
        return timeout < 0 ? 0 : timeout;
}

#define __wait_event_timeout(wq, condition, ret)                        \
do {                                                                    \
        DEFINE_WAIT(__wait);                                            \
                                                                        \
        for (;;) {                                                      \
                prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);    \
                if (condition)                                          \
                        break;                                          \
                ret = schedule_timeout(ret);                            \
                if (!ret)                                               \
                        break;                                          \
        }                                                               \
        if (!ret && (condition))                                        \
                ret = 1;                                                \
        finish_wait(&wq, &__wait);                                      \
} while (0)

#define wait_event_timeout(wq, condition, timeout)                      \
({                                                                      \
        long __ret = timeout;                                           \
        if (!(condition))                                               \
                __wait_event_timeout(wq, condition, __ret);             \
        __ret;                                                          \
})

wait_event_timeout 和 wait_event逻辑类似，就一个地方差异较大，即schedule_timeout。
schedule_timeout中构建了一个定时器，该定时器到期后将调用process_timeout(通过中断的形式)，传入的参数则是当前进程的指针current。然后调用schedule，等待调度器回到该位置(由于任务状态为UNINTERRUPTABLE，不能通过调度或信号回到该位置)。这个时候就有两种情况(唤醒在后面wake_up部分详细说明)：
① 超时了，调用process_timeout函数，该函数调用wake_up_process函数，核心代码类似wake_up_xxx(current)
② 在其他任务中调用了wake_up_xxx(wq)函数，将任务状态修改为TASK_RUNNING
一旦任务状态为TASK_RUNNING，就又回到了cpu的run queue中，可以通过调度回到函数中的schedule位置。

wait_event_timeout 返回值如下：
- 大于0: 表示condition满足，返回值表示距离设定超时还有多久(jiffies)
- 等于0: 表示超时发生

2.3 wait_event_interruptible

wait_event_interruptible 的实现如下：

#define __wait_event_interruptible(wq, condition, ret)                  \
do {                                                                    \
        DEFINE_WAIT(__wait);                                            \
                                                                        \
        for (;;) {                                                      \
                prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);      \
                if (condition)                                          \
                        break;                                          \
                if (!signal_pending(current)) {                         \
                        schedule();                                     \
                        continue;                                       \
                }                                                       \
                ret = -ERESTARTSYS;                                     \
                break;                                                  \
        }                                                               \
        finish_wait(&wq, &__wait);                                      \
} while (0)

#define wait_event_interruptible(wq, condition)                         \
({                                                                      \
        int __ret = 0;                                                  \
        if (!(condition))                                               \
                __wait_event_interruptible(wq, condition, __ret);       \
        __ret;                                                          \
})

wait_event_interruptible的实现和wait_event类似，区别是有多了一个signal_pending操作。
signal_pending检查给定进程是否有信号需要处理，返回0表示没有信号需要处理。
所以此时退出循环的条件是：满足 condition 和有信号两者之一就行 (如果执行到schedule，需要另外一个进程调用wake_up_xxx(&wq)操作，或者该进程收到了信号，将任务加入到run queue中。)

wait_event_interruptible 返回值如下：
- -ERESTARTSYS: 表示被信号激活唤醒。该错误的意思表示发生系统调用，任务正处在睡眠状态，等wakeup之后，会重新调用一次系统调用。
- 等于0：表示condition满足

2.4 wait_event_interruptible_timeout

wait_event_interruptible_timeout 的实现如下：

#define __wait_event_interruptible_timeout(wq, condition, ret)          \
do {                                                                    \
        DEFINE_WAIT(__wait);                                            \
                                                                        \
        for (;;) {                                                      \
                prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);      \
                if (condition)                                          \
                        break;                                          \
                if (!signal_pending(current)) {                         \
                        ret = schedule_timeout(ret);                    \
                        if (!ret)                                       \
                                break;                                  \
                        continue;                                       \
                }                                                       \
                ret = -ERESTARTSYS;                                     \
                break;                                                  \
        }                                                               \
        if (!ret && (condition))                                        \
                ret = 1;                                                \
        finish_wait(&wq, &__wait);                                      \
} while (0)

#define wait_event_interruptible_timeout(wq, condition, timeout)        \
({                                                                      \
        long __ret = timeout;                                           \
        if (!(condition))                                               \
                __wait_event_interruptible_timeout(wq, condition, __ret); \
        __ret;                                                          \
})

wait_event_interruptible_timeout的实现和上面wait_event等类似，退出循环的条件是：满足 condition、timeout 和有信号三者之一(如果执行到schedule，需要另外一个进程调用wake_up_xxx(&wq)操作，或者超时了，或者该进程收到了信号，这三者都会将任务加入到run queue中。)。

wait_event_interruptible_timeout 返回值如下：
- -ERESTARTSYS: 表示被信号激活唤醒
- 大于0: 表示condition满足，返回值表示距离设定超时还有多久(jiffies)
- 等于0: 表示超时发生

3. 唤醒等待队列上的进程

内核提供了如下接口来唤醒等待队列上的进程：

#define wake_up(x)                      __wake_up(x, TASK_NORMAL, 1, NULL)
#define wake_up_nr(x, nr)               __wake_up(x, TASK_NORMAL, nr, NULL)
#define wake_up_all(x)                  __wake_up(x, TASK_NORMAL, 0, NULL)
#define wake_up_locked(x)               __wake_up_locked((x), TASK_NORMAL, 1)
#define wake_up_all_locked(x)           __wake_up_locked((x), TASK_NORMAL, 0)
#define wake_up_interruptible(x)        __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
#define wake_up_interruptible_all(x)    __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
#define wake_up_interruptible_sync(x)   __wake_up_sync((x), TASK_INTERRUPTIBLE, 1)

可以到这些接口调用了三个函数__wake_up，__wake_up_locked，__wake_up_sync。先看看__wake_up的实现:

static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
                        int nr_exclusive, int wake_flags, void *key)
{
        wait_queue_t *curr, *next;

        list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
                unsigned flags = curr->flags;

                if (curr->func(curr, mode, wake_flags, key) &&
                                (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
                        break;
        }
}

void __wake_up(wait_queue_head_t *q, unsigned int mode,
                        int nr_exclusive, void *key)
{
        unsigned long flags;

        spin_lock_irqsave(&q->lock, flags);
        __wake_up_common(q, mode, nr_exclusive, 0, key);
        spin_unlock_irqrestore(&q->lock, flags);
}

可以看到__wake_up会调到__wake_up_common函数，该函数的逻辑是，遍历等待队列上的的wait_queue_t结构体，进行如下的操作：
① 获取curr->flags值放入flags中。
② 进行判断，如果同时满足三个条件就退出循环。（注对于if来说，如果前面有一项不满足，后续的判断就不会做）
第一个条件是curr->func的返回结果，依据前面的说明，该函数实际上就是autoremove_wake_function，其详细说明如下，如果返回1，表明已经将相关的任务加入到cpu的run queue，并修改任务的状态成功。依据前面定义的wait_event_xxx的实现，该项正常来说均返回1。
第二个条件是flags & WQ_FLAG_EXCLUSIVE，如果该wait_queue_t是独占的，就为真。对于一个任务队列来说，只有前面的非互斥项执行curr->func之后，才轮到互斥进程，也只有互斥进程flags & WQ_FLAG_EXCLUSIVE才为真。
第三个条件是！–nr_exclusive，如果nr_exclusive为0，依据常理，该项始终为假；如果nr_exclusive为1，则第一次就为真；如果nr_exclusive为一个整数nr，则第nr次，该项为真。
所以可以得出：
wake_up 唤醒全部的非独占任务，唤醒一个独占任务。
wake_up_nr 唤醒全部的非独占任务，唤醒nr个独占任务。
wake_up_all 唤醒全部的非独占任务，唤醒全部独占任务。
wake_up_interruptible_xxx等函数类似上面。

static int
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
{
        unsigned long flags;
        int cpu, success = 0;

        /*
         * If we are going to wake up a thread waiting for CONDITION we
         * need to ensure that CONDITION=1 done by the caller can not be
         * reordered with p->state check below. This pairs with mb() in
         * set_current_state() the waiting thread does.
         */
        smp_mb__before_spinlock();
        raw_spin_lock_irqsave(&p->pi_lock, flags);
        if (!(p->state & state))
                goto out;

        success = 1; /* we're going to change ->state */
        cpu = task_cpu(p);  /* 获取最后执行该任务的CPU */

        if (p->on_rq && ttwu_remote(p, wake_flags))
                goto stat;

    /* support smp 在很多架构上还不支持smp可以忽略此处
     * 判断是否要将任务转移到另外一个CPU的执行队列上，负载均衡
     * /
#ifdef CONFIG_SMP
        /*
         * If the owning (remote) cpu is still in the middle of schedule() with
         * this task as prev, wait until its done referencing the task.
         */
        while (p->on_cpu)
                cpu_relax();
        /*
         * Pairs with the smp_wmb() in finish_lock_switch().
         */
        smp_rmb();

        p->sched_contributes_to_load = !!task_contributes_to_load(p);
        p->state = TASK_WAKING;

        if (p->sched_class->task_waking)
                p->sched_class->task_waking(p);

        cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
        if (task_cpu(p) != cpu) {
                wake_flags |= WF_MIGRATED;
                set_task_cpu(p, cpu);
        }
#endif /* CONFIG_SMP */

        ttwu_queue(p, cpu);
stat:
        ttwu_stat(p, cpu, wake_flags);
out:
        raw_spin_unlock_irqrestore(&p->pi_lock, flags);

        return success;
}

int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
                          void *key)
{
        return try_to_wake_up(curr->private, mode, wake_flags);
}

int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
        int ret = default_wake_function(wait, mode, sync, key);

        if (ret) // 如果返回真，将该任务从等待队列中移除。
                list_del_init(&wait->task_list);
        return ret;
}

以上是autoremove_wake_function的实现，具体看try_to_wake_up函数。
该函数有3个参数：
- p 任务结构体指针
- state 需要唤醒的进程状态掩码，即需要唤醒符合该状态掩码的进程
- wake_flags 此处等待队列传过来的值为0。表示是同步唤醒sync，还是异步唤醒 async；

瑜珈山神

关注

0
点赞
踩
4

收藏

觉得还不错? 一键收藏
1
评论
Linux等待队列waitqueue

waitqueuewaitqueue创建一个等待队列让当前进程开始等待1 wait_event2 wait_event_timeout3 wait_event_interruptible4 wait_event_interruptible_timeout唤醒等待队列上的进程内核中提供了等待队列，作用是实现阻塞操作。等待队列用于使进程等待某一特定的事件发生而无需频繁的轮询，进程在等待
复制链接

扫一扫

专栏目录