completion也是一种同步机制,与信号量类似。
completion 是基于等待队列实现的,只是多了一个done
struct completion {
unsigned int done;
wait_queue_head_t wait;
};
信号量可以用于实现同步,但往往可能会出现一些不好的结果。例如:当进程A分配了一个临时信号量变量,把它初始化为关闭的MUTEX,并把其地址传递给进程B,然后在A之上调用down(),进程A打算一旦被唤醒就撤销给信号量。随后,运行在不同CPU上的进程B在同一个信号量上调用up()。然而,up()和down()的目前实现还允许这两个函数在同一个信号量上并发。因此,进程A可以被唤醒并撤销临时信号量,而进程B还在运行up()函数。结果p()可能试图访问一个不存在的数据结构。这样就会出现错误。为了防止发生这种错误就专门设计了completion机制专门用于同步。
关于completion 比信号量完善。专业解释如下
http://bbs.chinaunix.net/thread-4156398-1-1.html
| |
个人理解如下:
相同点: 在拿锁睡眠流程上是一样的, down是发现count>0就减一后成功拿取,否则定义semaphore_waiter加入链表。
wait completion也是发现done>0就减一后成功拿取,否则就定义 DECLARE_WAITQUEUE(wait, current);加入等待队列头里。
不同点: 信号量up时是先判断链表里释放为空,为空的话count++,不为空就唤醒链表里等待者,此时count不增加。也就是只有没有等待者时count才将count++。
完成量complete时直接先将done++,若等待队列为空就不处理,若等待队列项有人则唤醒后count--,又减回去了。
完成量可以complete all,但是信号量似乎没法一下自唤醒所有等待者。
信号量改变count时需要加锁保护,但是完成量修改done时并没有加锁保护。这也许就是完成量一个轻量级的体现把,复杂重大场合还得上信号量。简单场合用完成量。完成量的done初始化肯定为0,信号量可自定义。
两者还是相当的像,一个时count表示资源数,一个done 表示完成数
常用API
wait_for_completion等待在completion上;如果加了interruptible,就表示线程等待可被外部发来的信号打断;如果加了killable,就表示线程只可被kill信号打断;如果加了timeout,表示等待超出一定时间会自动结束等待,timeout的单位是系统所用的时间片jiffies(多为1ms)。try_wait_for_completion则是非阻塞地获取completion;completion_done检查是否有线程阻塞在completion上;
complete唤醒阻塞在completion上的首个线程;complete_all唤醒阻塞在completion上的所有线程。
看源码就知道了:
1. 两种方式定义都是初始化done成员为0;初始化一个spinlock和一个list_head
1)动态初始化completion
static struct completion marlin_ack = {0};
/kernel/include/linux/completion.h
static inline void init_completion(struct completion *x)
{
x->done = 0;
init_waitqueue_head(&x->wait);
}
#define init_waitqueue_head(q) \
do { \
static struct lock_class_key __key; \
\
__init_waitqueue_head((q), #q, &__key); \
} while (0)
void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key)
{
spin_lock_init(&q->lock);
lockdep_set_class_and_name(&q->lock, key, name);
INIT_LIST_HEAD(&q->task_list);
}
2)静态宏定义方式
#define DECLARE_COMPLETION(work) \
struct completion work = COMPLETION_INITIALIZER(work)
#define COMPLETION_INITIALIZER(work) \
{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
.task_list = { &(name).task_list, &(name).task_list } }
1.2 在wait completion时, 静态定义队列项加入完成量的等待队列头里,然后睡下去。 下面就是队列项实现
struct __wait_queue {
unsigned int flags;
void *private;
wait_queue_func_t func;
struct list_head task_list;
};
#define __WAITQUEUE_INITIALIZER(name, tsk) { \
.private = tsk, \
.func = default_wake_function, \
.task_list = { NULL, NULL } }
#define DECLARE_WAITQUEUE(name, tsk) \
wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
#define __WAITQUEUE_INITIALIZER(name, tsk) { \
.private = tsk, \
.func = default_wake_function, \ //最后wakeup唤醒就是此函数唤醒的
.task_list = { NULL, NULL } }
kernel/kernel/sched/core.c
/**
* wait_for_completion: - waits for completion of a task
* @x: holds the state of this particular completion
*
* This waits to be signaled for completion of a specific task. It is NOT
* interruptible and there is no timeout.
*
* See also similar routines (i.e. wait_for_completion_timeout()) with timeout
* and interrupt capability. Also see complete().
*/
void __sched wait_for_completion(struct completion *x)
{
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_for_completion);
static long __sched
wait_for_common(struct completion *x, long timeout, int state)
{
return __wait_for_common(x, schedule_timeout, timeout, state);
}
static inline long __sched
__wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
might_sleep();
// 这个锁是在睡眠之前和醒来之后这两段时间保护链表操作,在action 函数里 shedule之前会unlock一下,不会造成死锁的
spin_lock_irq(&x->wait.lock);
timeout = do_wait_for_common(x, action, timeout, state);
spin_unlock_irq(&x->wait.lock);
return timeout;
}
//completion 机制,进入等待完成量时所有API,都是通过下面函数实现的,静态定义一个等待队列,加入到完成量的等待队列头里。
static inline long __sched
do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
//completion与 队列区别就在这里,completion 可以先调用wakeup,然后这里wait执行时会自动跳过休眠;相当与条件已经满足了,无需进入睡眠。 但是等待队列就不行,先调用了wakeup的话,再次执行wait的话还是会进睡眠。这一点类似于信号量和互斥量
if (!x->done) {
//定义等待队列项,加入完成量的等待队列头里,等着被唤醒
DECLARE_WAITQUEUE(wait, current);
__add_wait_queue_tail_exclusive(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__set_current_state(state);
spin_unlock_irq(&x->wait.lock); //这里解锁了,开抢占,开中断了
timeout = action(timeout); // 这里action 函数即是schedule_timeout;进程就睡眠在这个函数了,直到timeout时间到。
spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
__remove_wait_queue(&x->wait, &wait);
if (!x->done)
return timeout;
}
x->done--;
return timeout ?: 1;
}
这里action对应的 schedule_timeout 定义如下,可以看到是通过timer实现的。
schedule_timeout这个函数除了对当前进程调用schedule之外,还有一个功能,如同其名字中暗示的,在指定的时间到期后(timeout了)将进程唤醒。我们知道,进程一旦进入睡眠状态,就会从cpu的run queue中移走,直觉是系统将不会维护散落到系统各处(等待队列等)的这些睡眠进程的时间信息,那么如何在指定的时间到期时唤醒这些进程呢?Linux内核使用了timer机制来完成,timer不依赖于进程,依赖于处理器的中断
signed long __sched schedule_timeout(signed long timeout)
{
struct timer_list timer;
unsigned long expire;
switch (timeout)
{
case MAX_SCHEDULE_TIMEOUT:
/*
* These two special cases are useful to be comfortable
* in the caller. Nothing more. We could take
* MAX_SCHEDULE_TIMEOUT from one of the negative value
* but I' d like to return a valid offset (>=0) to allow
* the caller to do everything it want with the retval.
*/
/* 这个schedule()很强大,它的任务是从运行队列的链表rq中找到一个进程,并随后将CPU分配给这个进程
详细请见 http://blog.chinaunix.net/uid-20341252-id-1702110.html */
schedule();
goto out;
default:
/*Another bit of PARANOID. Note that the retval will be
* 0 since no piece of kernel is supposed to do a check
* for a negative retval of schedule_timeout() (since it
* should never happens anyway). You just have the printk()
* that will tell you if something is gone wrong and where. */
if (timeout < 0) {
printk(KERN_ERR "schedule_timeout: wrong timeout "
"value %lx\n", timeout);
dump_stack();
current->state = TASK_RUNNING;
goto out;
}
}
expire = timeout + jiffies;
setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
__mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
schedule();
del_singleshot_timer_sync(&timer);
/* Remove the timer from the object tracker */
destroy_timer_on_stack(&timer);
timeout = expire - jiffies;
out:
return timeout < 0 ? 0 : timeout;
}
唤醒流程
/**
* complete: - signals a single thread waiting on this completion
* @x: holds the state of this particular completion
*
* This will wake up a single thread waiting on this completion. Threads will be
* awakened in the same order in which they were queued.
*
* See also complete_all(), wait_for_completion() and related routines.
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void complete(struct completion *x)
{
unsigned long flags;
spin_lock_irqsave(&x->wait.lock, flags);
x->done++;
__wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
spin_unlock_irqrestore(&x->wait.lock, flags);
}
/*
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
* wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
* number) then we wake all the non-exclusive tasks and one exclusive task.
*
* There are circumstances in which we can try to wake a task which has already
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next;
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;
//这个func是 DECLARE_WAITQUEUE时,默认的default_wake_function,这里除了complete all,其他completeAPI都只能唤醒一个队列项,跟信号量一致,也是先进先出策略。
if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}