linux基础之completion

最新推荐文章于 2024-03-13 00:35:39 发布

chenpuo

最新推荐文章于 2024-03-13 00:35:39 发布

阅读量644

点赞数

分类专栏： kernel

本文链接：https://blog.csdn.net/chenpuo/article/details/54287408

版权

kernel 专栏收录该内容

49 篇文章 3 订阅

订阅专栏

completion也是一种同步机制，与信号量类似。

completion 是基于等待队列实现的,只是多了一个done

struct completion {
unsigned int done;
wait_queue_head_t wait;

};

信号量可以用于实现同步，但往往可能会出现一些不好的结果。例如：当进程A分配了一个临时信号量变量，把它初始化为关闭的MUTEX，并把其地址传递给进程B,然后在A之上调用down()，进程A打算一旦被唤醒就撤销给信号量。随后，运行在不同CPU上的进程B在同一个信号量上调用up()。然而，up()和down()的目前实现还允许这两个函数在同一个信号量上并发。因此，进程A可以被唤醒并撤销临时信号量，而进程B还在运行up()函数。结果p()可能试图访问一个不存在的数据结构。这样就会出现错误。为了防止发生这种错误就专门设计了completion机制专门用于同步。

关于completion 比信号量完善。专业解释如下

http://bbs.chinaunix.net/thread-4156398-1-1.html

引入completion时（应该是2.4.7版本）的semaphore确实存在问题，问题模型为：
http://lkml.iu.edu//hypermail/linux/kernel/0107.3/0674.html，google上应该还有其它的邮件。
那时的semaphore是没有锁保护的。到少我手上2.6.24的源码里就没有。

现在semaphore已经没有问题了。甚至有人还提议用semaphore从新实现completion：
http://lwn.net/Articles/277621/
从git来看，最终社区并没有接纳。

现在高版本的semaphore 已经不存在问题了。只是从概念层面，completion 和semaphore 应该被用来不同的
场合，completion 是用来等待一个条件成力，而semaphore应该是等待一个资源。条件是没有数量的，而资源
是有数量的。从实现来说，completion更轻（我没有验正过）
请在http://www.makelinux.net/ldd3/chp-5-sect-4里搜lightweight。

个人理解如下：

相同点：在拿锁睡眠流程上是一样的， down是发现count>0就减一后成功拿取，否则定义semaphore_waiter加入链表。

wait completion也是发现done>0就减一后成功拿取，否则就定义 DECLARE_WAITQUEUE(wait, current);加入等待队列头里。

不同点：信号量up时是先判断链表里释放为空，为空的话count++，不为空就唤醒链表里等待者，此时count不增加。也就是只有没有等待者时count才将count++。

完成量complete时直接先将done++，若等待队列为空就不处理，若等待队列项有人则唤醒后count--，又减回去了。

完成量可以complete all，但是信号量似乎没法一下自唤醒所有等待者。

信号量改变count时需要加锁保护，但是完成量修改done时并没有加锁保护。这也许就是完成量一个轻量级的体现把，复杂重大场合还得上信号量。简单场合用完成量。完成量的done初始化肯定为0，信号量可自定义。

两者还是相当的像，一个时count表示资源数，一个done 表示完成数

常用API

wait_for_completion等待在completion上；如果加了interruptible，就表示线程等待可被外部发来的信号打断；如果加了killable，就表示线程只可被kill信号打断；如果加了timeout，表示等待超出一定时间会自动结束等待，timeout的单位是系统所用的时间片jiffies(多为1ms)。try_wait_for_completion则是非阻塞地获取completion；completion_done检查是否有线程阻塞在completion上；

complete唤醒阻塞在completion上的首个线程；complete_all唤醒阻塞在completion上的所有线程。

看源码就知道了：

1. 两种方式定义都是初始化done成员为0；初始化一个spinlock和一个list_head

1）动态初始化completion

static struct completion marlin_ack = {0};

/kernel/include/linux/completion.h
static inline void init_completion(struct completion *x)
{
x->done = 0;
init_waitqueue_head(&x->wait);

}

#define init_waitqueue_head(q)                          \
        do {                                            \
                static struct lock_class_key __key;     \
                                                        \
                __init_waitqueue_head((q), #q, &__key); \
        } while (0)

void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)

{
spin_lock_init(&q->lock);
lockdep_set_class_and_name(&q->lock, key, name);
INIT_LIST_HEAD(&q->task_list);
}

2）静态宏定义方式

#define DECLARE_COMPLETION(work) \
struct completion work = COMPLETION_INITIALIZER(work)

#define COMPLETION_INITIALIZER(work) \

{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }

#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
.task_list = { &(name).task_list, &(name).task_list } }

1.2 在wait completion时，静态定义队列项加入完成量的等待队列头里，然后睡下去。下面就是队列项实现

struct __wait_queue {
        unsigned int            flags;
        void                    *private;
        wait_queue_func_t       func;
        struct list_head        task_list;
};
#define __WAITQUEUE_INITIALIZER(name, tsk) {                            \
        .private        = tsk,                                          \
        .func           = default_wake_function,                        \
        .task_list      = { NULL, NULL } }

#define DECLARE_WAITQUEUE(name, tsk)                                    \

wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)

#define __WAITQUEUE_INITIALIZER(name, tsk) {                            \
        .private        = tsk,                                          \
        .func           = default_wake_function,                        \                                    //最后wakeup唤醒就是此函数唤醒的
        .task_list      = { NULL, NULL } }

kernel/kernel/sched/core.c

/**
* wait_for_completion: - waits for completion of a task
* @x: holds the state of this particular completion
*
* This waits to be signaled for completion of a specific task. It is NOT
* interruptible and there is no timeout.
*
* See also similar routines (i.e. wait_for_completion_timeout()) with timeout
* and interrupt capability. Also see complete().
*/
void __sched wait_for_completion(struct completion *x)
{
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion);

static long __sched
wait_for_common(struct completion *x, long timeout, int state)
{
return __wait_for_common(x, schedule_timeout, timeout, state);
}

static inline long __sched
__wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
might_sleep();
// 这个锁是在睡眠之前和醒来之后这两段时间保护链表操作，在action 函数里 shedule之前会unlock一下，不会造成死锁的
spin_lock_irq(&x->wait.lock);

timeout = do_wait_for_common(x, action, timeout, state);
spin_unlock_irq(&x->wait.lock);
return timeout;
}

//completion 机制，进入等待完成量时所有API，都是通过下面函数实现的，静态定义一个等待队列，加入到完成量的等待队列头里。

static inline long __sched
do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)

{

//completion与队列区别就在这里，completion 可以先调用wakeup，然后这里wait执行时会自动跳过休眠；相当与条件已经满足了，无需进入睡眠。但是等待队列就不行，先调用了wakeup的话，再次执行wait的话还是会进睡眠。这一点类似于信号量和互斥量

if (!x->done) {

//定义等待队列项，加入完成量的等待队列头里，等着被唤醒

DECLARE_WAITQUEUE(wait, current);

__add_wait_queue_tail_exclusive(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__set_current_state(state);
spin_unlock_irq(&x->wait.lock); //这里解锁了，开抢占，开中断了
timeout = action(timeout); // 这里action 函数即是schedule_timeout；进程就睡眠在这个函数了，直到timeout时间到。
spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
__remove_wait_queue(&x->wait, &wait);
if (!x->done)
return timeout;
}
x->done--;
return timeout ?: 1;
}

这里action对应的 schedule_timeout 定义如下，可以看到是通过timer实现的。

schedule_timeout这个函数除了对当前进程调用schedule之外，还有一个功能，如同其名字中暗示的，在指定的时间到期后(timeout了)将进程唤醒。我们知道，进程一旦进入睡眠状态，就会从cpu的run queue中移走，直觉是系统将不会维护散落到系统各处（等待队列等）的这些睡眠进程的时间信息，那么如何在指定的时间到期时唤醒这些进程呢？Linux内核使用了timer机制来完成，timer不依赖于进程，依赖于处理器的中断

signed long __sched schedule_timeout(signed long timeout)
{
struct timer_list timer;
unsigned long expire;

switch (timeout)
{
case MAX_SCHEDULE_TIMEOUT:
/*
* These two special cases are useful to be comfortable
* in the caller. Nothing more. We could take
* MAX_SCHEDULE_TIMEOUT from one of the negative value
* but I' d like to return a valid offset (>=0) to allow
* the caller to do everything it want with the retval.
*/

/* 这个schedule()很强大，它的任务是从运行队列的链表rq中找到一个进程，并随后将CPU分配给这个进程

详细请见 http://blog.chinaunix.net/uid-20341252-id-1702110.html */

schedule();
goto out;
default:
/*Another bit of PARANOID. Note that the retval will be
* 0 since no piece of kernel is supposed to do a check
* for a negative retval of schedule_timeout() (since it
* should never happens anyway). You just have the printk()
* that will tell you if something is gone wrong and where. */
if (timeout < 0) {
printk(KERN_ERR "schedule_timeout: wrong timeout "
"value %lx\n", timeout);
dump_stack();
current->state = TASK_RUNNING;
goto out;
}
}

expire = timeout + jiffies;

setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
__mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
schedule();
del_singleshot_timer_sync(&timer);

/* Remove the timer from the object tracker */
destroy_timer_on_stack(&timer);

timeout = expire - jiffies;

out:
return timeout < 0 ? 0 : timeout;
}

唤醒流程

/**
* complete: - signals a single thread waiting on this completion
* @x: holds the state of this particular completion
*
* This will wake up a single thread waiting on this completion. Threads will be
* awakened in the same order in which they were queued.
*
* See also complete_all(), wait_for_completion() and related routines.
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void complete(struct completion *x)
{
unsigned long flags;

spin_lock_irqsave(&x->wait.lock, flags);
x->done++;
__wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
spin_unlock_irqrestore(&x->wait.lock, flags);
}

/*
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
* wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
* number) then we wake all the non-exclusive tasks and one exclusive task.
*
* There are circumstances in which we can try to wake a task which has already
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next;

list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;

//这个func是 DECLARE_WAITQUEUE时，默认的default_wake_function，这里除了complete all，其他completeAPI都只能唤醒一个队列项，跟信号量一致，也是先进先出策略。
if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}