工作队列 work queue

最新推荐文章于 2021-03-31 20:15:59 发布

lamdoc

最新推荐文章于 2021-03-31 20:15:59 发布

阅读量1.4k

点赞数

分类专栏： interrupt_delay 文章标签：工作 struct delay timer list 数据结构

本文链接：https://blog.csdn.net/lamdoc/article/details/7663043

版权

interrupt_delay 专栏收录该内容

20 篇文章 0 订阅

订阅专栏

工作队列是实现延期执行的另一种手段，它是通过守护进程在进程上下文中执行的，函数可以睡眠任意长时间。
它和其他类型的延期执行方式都不一样，工作队列可以把工作推后，交由一个内核线程去执行。
选择使用工作队列还是软中断/tasklet 的要点:
如果推后执行的任务需要睡眠，那就选择工作队列。
如果推后执行的任务不需要睡眠，那就选择软中断/tasklet。
如果需要用一个新的线程来执行你的下半部处理，那就应该使用工作队列。工作队列是唯一能在进程上下文中运行的下半部实现机制，工作队列能睡眠。
这意味着，在中断下半部中，如果你需要获得大量的内存，或者需要获取信号量，或者需要执行阻塞式的I/O操作时，都得使用工作队列。
如果不需要一个内核线程来推后执行，那就可以使用tasklet。

工作队列子系统是一个用于创建内核线程的接口，他创建的内核线程负责执行排在工作队列里的任务。
工作队列其实就是把需要推后执行的任务交给特定的通用线程来处理的一种实现方式。这种线程也叫工作者线程（worker thread）.
每个处理器对应一个工作队列线程,名为 events/n.
例如单处理器的系统只有 events/0; 双处理器系统会多一个 events/1 线程。

1. 工作队列数据结构

工作者线程（worker thread）用 workqueue_struct 表示：

/*
 * The externally visible workqueue abstraction is an array of
 * per-CPU workqueues:
 */
struct workqueue_struct {
	unsigned int		flags;		/* I: WQ_* flags */
	union {
		struct cpu_workqueue_struct __percpu	*pcpu;   //多核时用这个
		struct cpu_workqueue_struct		*single;  //单核时用这个
		unsigned long				v;
	} cpu_wq;				/* I: cwq's */
	struct list_head	list;		/* W: list of all workqueues */

	struct mutex		flush_mutex;	/* protects wq flushing */
	int			work_color;	/* F: current work color */
	int			flush_color;	/* F: current flush color */
	atomic_t		nr_cwqs_to_flush; /* flush in progress */
	struct wq_flusher	*first_flusher;	/* F: first flusher */
	struct list_head	flusher_queue;	/* F: flush waiters */
	struct list_head	flusher_overflow; /* F: flush overflow list */

	mayday_mask_t		mayday_mask;	/* cpus requesting rescue */
	struct worker		*rescuer;	/* I: rescue worker */

	int			saved_max_active; /* W: saved cwq max_active */
	const char		*name;		/* I: workqueue name */
#ifdef CONFIG_LOCKDEP
	struct lockdep_map	lockdep_map;
#endif
};

workqueue_struct 主要内容是 struct cpu_workqueue_struct cpu_wq, 每一个cpu_wq对应系统中一个cpu的工作者线程，
每个工作者线程关联一个自己的 workqueue_struct, 该结构体中，给每个线程分配了一个 cpu_workqueue_struct, 因而也就是每个CPU分配一个工作者线程。
cpu_workqueue_struct 定义如下：

/*
 * The per-CPU workqueue.  The lower WORK_STRUCT_FLAG_BITS of
 * work_struct->data are used for flags and thus cwqs need to be
 * aligned at two's power of the number of flag bits.
 */
struct cpu_workqueue_struct {
	struct global_cwq	*gcwq;		/* I: the associated gcwq */
	struct workqueue_struct *wq;		/* I: the owning workqueue */
	int			work_color;	/* L: current color */
	int			flush_color;	/* L: flushing color */
	int			nr_in_flight[WORK_NR_COLORS];
						/* L: nr of in_flight works */
	int			nr_active;	/* L: nr of active works */
	int			max_active;	/* L: max active works */
	struct list_head	delayed_works;	/* L: delayed works */
};

global_cwq 定义如下：

/*
 * Global per-cpu workqueue.  There's one and only one for each cpu
 * and all works are queued and processed here regardless of their
 * target workqueues.
 */
struct global_cwq {
	spinlock_t		lock;		/* the gcwq lock */
	struct list_head	worklist;	/* L: list of pending works */
	unsigned int		cpu;		/* I: the associated cpu */
	unsigned int		flags;		/* L: GCWQ_* flags */

	int			nr_workers;	/* L: total number of workers */
	int			nr_idle;	/* L: currently idle ones */

	/* workers are chained either in the idle_list or busy_hash */
	struct list_head	idle_list;	/* X: list of idle workers */
	struct hlist_head	busy_hash[BUSY_WORKER_HASH_SIZE];
						/* L: hash of busy workers */

	struct timer_list	idle_timer;	/* L: worker idle timeout */
	struct timer_list	mayday_timer;	/* L: SOS timer for dworkers */

	struct ida		worker_ida;	/* L: for worker IDs */

	struct task_struct	*trustee;	/* L: for gcwq shutdown */
	unsigned int		trustee_state;	/* L: trustee state */
	wait_queue_head_t	trustee_wait;	/* trustee wait */
	struct worker		*first_idle;	/* L: first idle worker */
} ____cacheline_aligned_in_smp;

2. 表示工作的数据结构：
工作用 work_struct 结构体表示：

struct work_struct {
	atomic_long_t data;
	struct list_head entry;
	work_func_t func;
#ifdef CONFIG_LOCKDEP
	struct lockdep_map lockdep_map;
#endif
};

这些工作会被连接成链表list_head, 每个处理器上的每种类型的队列都对应这样一个链表。
所有的工作(work_struct)都会在工作者线程（work_thread()）中，被工作者（worker）执行。
工作者用 worker 结构体表示：

/*
 * The poor guys doing the actual heavy lifting.  All on-duty workers
 * are either serving the manager role, on idle list or on busy hash.
 */
struct worker {
	/* on idle list while idle, on busy hash table while busy */
	union {
		struct list_head	entry;	/* L: while idle */
		struct hlist_node	hentry;	/* L: while busy */
	};

	struct work_struct	*current_work;	/* L: work being processed */
	struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
	struct list_head	scheduled;	/* L: scheduled works */
	struct task_struct	*task;		/* I: worker task */
	struct global_cwq	*gcwq;		/* I: the associated gcwq */
	/* 64 bytes boundary on 64bit, 32 on 32bit */
	unsigned long		last_active;	/* L: last active timestamp */
	unsigned int		flags;		/* X: flags */
	int			id;		/* I: worker id */
	struct work_struct	rebind_work;	/* L: rebind worker to cpu */
};

所有的工作者线程都是用普通的内核线程实现的，它们都要执行 worker_thread() 函数：

/**
 * worker_thread - the worker thread function
 * @__worker: self
 *
 * The gcwq worker thread function.  There's a single dynamic pool of
 * these per each cpu.  These workers process all works regardless of
 * their specific target workqueue.  The only exception is works which
 * belong to workqueues with a rescuer which will be explained in
 * rescuer_thread().
 */
static int worker_thread(void *__worker)
{
	struct worker *worker = __worker;
	struct global_cwq *gcwq = worker->gcwq;

	/* tell the scheduler that this is a workqueue worker */
	worker->task->flags |= PF_WQ_WORKER;
woke_up:
	spin_lock_irq(&gcwq->lock);

	/* DIE can be set only while we're idle, checking here is enough */
	if (worker->flags & WORKER_DIE) {
		spin_unlock_irq(&gcwq->lock);
		worker->task->flags &= ~PF_WQ_WORKER;
		return 0;
	}

	worker_leave_idle(worker);
recheck:
	/* no more worker necessary? */
	if (!need_more_worker(gcwq))
		goto sleep;

	/* do we need to manage? */
	if (unlikely(!may_start_working(gcwq)) && manage_workers(worker))
		goto recheck;

	/*
	 * ->scheduled list can only be filled while a worker is
	 * preparing to process a work or actually processing it.
	 * Make sure nobody diddled with it while I was sleeping.
	 */
	BUG_ON(!list_empty(&worker->scheduled));

	/*
	 * When control reaches this point, we're guaranteed to have
	 * at least one idle worker or that someone else has already
	 * assumed the manager role.
	 */
	worker_clr_flags(worker, WORKER_PREP);

	do {
		struct work_struct *work =
			list_first_entry(&gcwq->worklist,
					 struct work_struct, entry);

		if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
			/* optimization path, not strictly necessary */
			process_one_work(worker, work);
			if (unlikely(!list_empty(&worker->scheduled)))
				process_scheduled_works(worker);
		} else {
			move_linked_works(work, &worker->scheduled, NULL);
			process_scheduled_works(worker);
		}
	} while (keep_working(gcwq));

	worker_set_flags(worker, WORKER_PREP, false);
sleep:
	if (unlikely(need_to_manage_workers(gcwq)) && manage_workers(worker))
		goto recheck;

	/*
	 * gcwq->lock is held and there's no work to process and no
	 * need to manage, sleep.  Workers are woken up only while
	 * holding gcwq->lock or from local cpu, so setting the
	 * current state before releasing gcwq->lock is enough to
	 * prevent losing any event.
	 */
	worker_enter_idle(worker);
	__set_current_state(TASK_INTERRUPTIBLE);
	spin_unlock_irq(&gcwq->lock);
	schedule();   //线程调用schedule(),函数进入休眠状态
	goto woke_up;
}

该函数就是工作者线程，内容是：
完成初始化后，就执行一个do ... wihle()死循环, 并开始休眠。
当有操作被加入到队列中的时候，线程会被唤醒，以便执行这些操作。
当没有剩余操作的时候，他又会继续睡眠。
该函数最终会调用 process_scheduled_works(struct worker *worker) 函数，就是处理被调度的工作。

/**
 * process_scheduled_works - process scheduled works
 * @worker: self
 *
 * Process all scheduled works.  Please note that the scheduled list
 * may change while processing a work, so this function repeatedly
 * fetches a work from the top and executes it.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
 * multiple times.
 */
static void process_scheduled_works(struct worker *worker)
{
	while (!list_empty(&worker->scheduled)) { //如果空，直接返回
		struct work_struct *work = list_first_entry(&worker->scheduled, //如果不空，遍历链表的每个元素
						struct work_struct, entry);
		process_one_work(worker, work);
	}
}

process_one_work(worker, work); 定义如下：

/**
 * process_one_work - process single work
 * @worker: self
 * @work: work to process
 *
 * Process @work.  This function contains all the logics necessary to
 * process a single work including synchronization against and
 * interaction with other workers on the same cpu, queueing and
 * flushing.  As long as context requirement is met, any worker can
 * call this function to process a work.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
 */
static void process_one_work(struct worker *worker, struct work_struct *work)
__releases(&gcwq->lock)
__acquires(&gcwq->lock)
{
	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
	struct global_cwq *gcwq = cwq->gcwq;
	struct hlist_head *bwh = busy_worker_head(gcwq, work);
	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
	work_func_t f = work->func;
	int work_color;
	struct worker *collision;
#ifdef CONFIG_LOCKDEP
	/*
	 * It is permissible to free the struct work_struct from
	 * inside the function that is called from it, this we need to
	 * take into account for lockdep too.  To avoid bogus "held
	 * lock freed" warnings as well as problems when looking into
	 * work->lockdep_map, make a copy and use that here.
	 */
	struct lockdep_map lockdep_map = work->lockdep_map;
#endif
	/*
	 * A single work shouldn't be executed concurrently by
	 * multiple workers on a single cpu.  Check whether anyone is
	 * already processing the work.  If so, defer the work to the
	 * currently executing one.
	 */
	collision = __find_worker_executing_work(gcwq, bwh, work);
	if (unlikely(collision)) {
		move_linked_works(work, &collision->scheduled, NULL);
		return;
	}

	/* claim and process */
	debug_work_deactivate(work);
	hlist_add_head(&worker->hentry, bwh);
	worker->current_work = work;                //当前的工作 置位 work
	worker->current_cwq = cwq;                   
	work_color = get_work_color(work);          //获得work_color

	/* record the current cpu number in the work data and dequeue */
	set_work_cpu(work, gcwq->cpu);
	list_del_init(&work->entry);

	/*
	 * If HIGHPRI_PENDING, check the next work, and, if HIGHPRI,
	 * wake up another worker; otherwise, clear HIGHPRI_PENDING.
	 */
	if (unlikely(gcwq->flags & GCWQ_HIGHPRI_PENDING)) {
		struct work_struct *nwork = list_first_entry(&gcwq->worklist,
						struct work_struct, entry);

		if (!list_empty(&gcwq->worklist) &&
		    get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI)
			wake_up_worker(gcwq);
		else
			gcwq->flags &= ~GCWQ_HIGHPRI_PENDING;
	}

	/*
	 * CPU intensive works don't participate in concurrency
	 * management.  They're the scheduler's responsibility.
	 */
	if (unlikely(cpu_intensive))
		worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);

	spin_unlock_irq(&gcwq->lock);

	work_clear_pending(work);
	lock_map_acquire(&cwq->wq->lockdep_map);
	lock_map_acquire(&lockdep_map);
	trace_workqueue_execute_start(work);
	f(work);
	/*
	 * While we must be careful to not use "work" after this, the trace
	 * point will only record its address.
	 */
	trace_workqueue_execute_end(work);
	lock_map_release(&lockdep_map);
	lock_map_release(&cwq->wq->lockdep_map);

	if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
		printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
		       "%s/0x%08x/%d\n",
		       current->comm, preempt_count(), task_pid_nr(current));
		printk(KERN_ERR "    last function: ");
		print_symbol("%s\n", (unsigned long)f);
		debug_show_held_locks(current);
		dump_stack();
	}

	spin_lock_irq(&gcwq->lock);

	/* clear cpu intensive status */
	if (unlikely(cpu_intensive))
		worker_clr_flags(worker, WORKER_CPU_INTENSIVE);

	/* we're done with it, release */
	hlist_del_init(&worker->hentry);
	worker->current_work = NULL;
	worker->current_cwq = NULL;
	cwq_dec_nr_in_flight(cwq, work_color, false);
}

该函数把 work 赋给了worker中的当前工作，work->current_work，进而执行推迟函数。
下图是工作者，工作，工作队列，工作者线程之间的关系：

3. 使用缺省的events工作队列
3.1 创建推后的工作：
可以使用：
DECLARE_WORK(name, void (*func)(void *), void *data);
静态的创建一个名为name，处理函数为func，参数为data的 work_struct 结构体。
函数定义如下

#define DECLARE_WORK(n, f)					\
	struct work_struct n = __WORK_INITIALIZER(n, f)

也可以使用：
INIT_WORK(struct work_struct *work, void (*func)(void *), void *data);
动态的初始化一个由work指向的工作，处理函数为func，参数为data.
函数定义如下：

#define INIT_WORK(_work, _func)					\
	do {							\
		__INIT_WORK((_work), (_func), 0);		\
	} while (0)

/*
 * initialize all of a work item in one go
 *
 * NOTE! No point in using "atomic_long_set()": using a direct
 * assignment of the work data initializer allows the compiler
 * to generate better code.
 */
#ifdef CONFIG_LOCKDEP
#define __INIT_WORK(_work, _func, _onstack)				\
	do {								\
		static struct lock_class_key __key;			\
									\
		__init_work((_work), _onstack);				\
		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
		INIT_LIST_HEAD(&(_work)->entry);			\
		PREPARE_WORK((_work), (_func));				\
	} while (0)
#else
#define __INIT_WORK(_work, _func, _onstack)				\
	do {								\
		__init_work((_work), _onstack);				\
		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
		INIT_LIST_HEAD(&(_work)->entry);			\
		PREPARE_WORK((_work), (_func));				\
	} while (0)
#endif

3.2 工作队列处理函数：
原型：
void work_handler(void *data);
该函数运行在进程上下文中，允许中断，可以睡眠。但不能访问用户控件，因为这是一个内核线程，在用户空间没有相关的内存映射。
通常只有发生系统调用时，内核此时才能访问用户空间，才会在用户空间有内存映射。
3.3 调度工作队列：
创建工作后，就可以调度它了，也就是把工作的处理函数，交给缺省的events工作线程。
schedule_work(&work);
work会被马上调度，一旦其所在处理器上的工作者线程被唤醒，他就会被执行。
函数定义如下：

/**
 * schedule_work - put work task in global workqueue
 * @work: job to be done
 *
 * Returns zero if @work was already on the kernel-global workqueue and
 * non-zero otherwise.
 *
 * This puts a job in the kernel-global workqueue if it was not already
 * queued and leaves it in the same position on the kernel-global
 * workqueue otherwise.
 */
int schedule_work(struct work_struct *work)
{
	return queue_work(system_wq, work);  //就是把work工作加入到系统默认的工作队列中
}
EXPORT_SYMBOL(schedule_work);

/**
 * queue_work - queue work on a workqueue
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 *
 * We queue the work to the CPU on which it was submitted, but if the CPU dies
 * it can be processed by another CPU.
 */
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
	int ret;

	ret = queue_work_on(get_cpu(), wq, work); //把工作 入队到 指定cpu上的 工作队列
	put_cpu();

	return ret;
}
EXPORT_SYMBOL_GPL(queue_work);

/**
 * queue_work_on - queue work on specific cpu
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 *
 * We queue the work to a specific CPU, the caller must ensure it
 * can't go away.
 */
int
queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
	int ret = 0;

	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
		__queue_work(cpu, wq, work);   //最终会调到__queue_work():
		ret = 1;
	}
	return ret;
}
EXPORT_SYMBOL_GPL(queue_work_on);

static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
			 struct work_struct *work)
{
	struct global_cwq *gcwq;
	struct cpu_workqueue_struct *cwq;
	struct list_head *worklist;
	unsigned int work_flags;
	unsigned long flags;

	debug_work_activate(work);

	if (WARN_ON_ONCE(wq->flags & WQ_DYING))
		return;

	/* determine gcwq to use */
	if (!(wq->flags & WQ_UNBOUND)) {
		struct global_cwq *last_gcwq;

		if (unlikely(cpu == WORK_CPU_UNBOUND))
			cpu = raw_smp_processor_id();

		/*
		 * It's multi cpu.  If @wq is non-reentrant and @work
		 * was previously on a different cpu, it might still
		 * be running there, in which case the work needs to
		 * be queued on that cpu to guarantee non-reentrance.
		 */
		gcwq = get_gcwq(cpu);
		if (wq->flags & WQ_NON_REENTRANT &&
		    (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
			struct worker *worker;

			spin_lock_irqsave(&last_gcwq->lock, flags);

			worker = find_worker_executing_work(last_gcwq, work);

			if (worker && worker->current_cwq->wq == wq)
				gcwq = last_gcwq;
			else {
				/* meh... not running there, queue here */
				spin_unlock_irqrestore(&last_gcwq->lock, flags);
				spin_lock_irqsave(&gcwq->lock, flags);
			}
		} else
			spin_lock_irqsave(&gcwq->lock, flags);
	} else {
		gcwq = get_gcwq(WORK_CPU_UNBOUND);
		spin_lock_irqsave(&gcwq->lock, flags);
	}

	/* gcwq determined, get cwq and queue */
	cwq = get_cwq(gcwq->cpu, wq);
	trace_workqueue_queue_work(cpu, cwq, work);

	BUG_ON(!list_empty(&work->entry));

	cwq->nr_in_flight[cwq->work_color]++;
	work_flags = work_color_to_flags(cwq->work_color);

	if (likely(cwq->nr_active < cwq->max_active)) {
		trace_workqueue_activate_work(work);
		cwq->nr_active++;
		worklist = gcwq_determine_ins_pos(gcwq, cwq);
	} else {
		work_flags |= WORK_STRUCT_DELAYED;
		worklist = &cwq->delayed_works;
	}

	insert_work(cwq, work, worklist, work_flags);

	spin_unlock_irqrestore(&gcwq->lock, flags);
}

有时并不希望马上执行工作，而是希望经过一段延时，再执行。
这种情况可以使用：
schedule_delayed_work(&work, delay);
delay 就是添加的时间延时。
定义如下：

/**
 * schedule_delayed_work - put work task in global workqueue after delay
 * @dwork: job to be done
 * @delay: number of jiffies to wait or 0 for immediate execution
 *
 * After waiting for a given time this puts a job in the kernel-global
 * workqueue.
 */
int schedule_delayed_work(struct delayed_work *dwork,
					unsigned long delay)
{
	return queue_delayed_work(system_wq, dwork, delay);
}
EXPORT_SYMBOL(schedule_delayed_work);

/**
 * queue_delayed_work - queue work on a workqueue after delay
 * @wq: workqueue to use
 * @dwork: delayable work to queue
 * @delay: number of jiffies to wait before queueing
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 */
int queue_delayed_work(struct workqueue_struct *wq,
			struct delayed_work *dwork, unsigned long delay)
{
	if (delay == 0)
		return queue_work(wq, &dwork->work);

	return queue_delayed_work_on(-1, wq, dwork, delay);
}
EXPORT_SYMBOL_GPL(queue_delayed_work);

/**
 * queue_delayed_work_on - queue work on specific CPU after delay
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @dwork: work to queue
 * @delay: number of jiffies to wait before queueing
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 */
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
			struct delayed_work *dwork, unsigned long delay)
{
	int ret = 0;
	struct timer_list *timer = &dwork->timer;
	struct work_struct *work = &dwork->work;

	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
		unsigned int lcpu;

		BUG_ON(timer_pending(timer));
		BUG_ON(!list_empty(&work->entry));

		timer_stats_timer_set_start_info(&dwork->timer);

		/*
		 * This stores cwq for the moment, for the timer_fn.
		 * Note that the work's gcwq is preserved to allow
		 * reentrance detection for delayed works.
		 */
		if (!(wq->flags & WQ_UNBOUND)) {
			struct global_cwq *gcwq = get_work_gcwq(work);

			if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
				lcpu = gcwq->cpu;
			else
				lcpu = raw_smp_processor_id();
		} else
			lcpu = WORK_CPU_UNBOUND;

		set_work_cwq(work, get_cwq(lcpu, wq), 0);

		timer->expires = jiffies + delay;
		timer->data = (unsigned long)dwork;
		timer->function = delayed_work_timer_fn;

		if (unlikely(cpu >= 0))
			add_timer_on(timer, cpu);
		else
			add_timer(timer);
		ret = 1;
	}
	return ret;
}
EXPORT_SYMBOL_GPL(queue_delayed_work_on);

3.4 刷新工作队列
排入队列的工作，会在工作者线程下一次被唤醒时执行。
刷新工作队列，用来保证在进行下一步工作之前，所有工作队列中的工作都已经执行完毕。
void flush_scheduled_work(void);
该函数会一直等待wait_for_completion()，直到队列中所有对象都被执行以后才返回。
在等待所有待处理工作执行时，该函数会进入睡眠，所以只有在进程上下文中才能使用它。

/**
 * flush_scheduled_work - ensure that any scheduled work has run to completion.
 *
 * Forces execution of the kernel-global workqueue and blocks until its
 * completion.
 *
 * Think twice before calling this function!  It's very easy to get into
 * trouble if you don't take great care.  Either of the following situations
 * will lead to deadlock:
 *
 *	One of the work items currently on the workqueue needs to acquire
 *	a lock held by your code or its caller.
 *
 *	Your code is running in the context of a work routine.
 *
 * They will be detected by lockdep when they occur, but the first might not
 * occur very often.  It depends on what work items are on the workqueue and
 * what locks they need, which you have no control over.
 *
 * In most situations flushing the entire workqueue is overkill; you merely
 * need to know that a particular work item isn't queued and isn't running.
 * In such cases you should use cancel_delayed_work_sync() or
 * cancel_work_sync() instead.
 */
void flush_scheduled_work(void)
{
	flush_workqueue(system_wq);  //其实就是刷新了下 系统的工作队列
}
EXPORT_SYMBOL(flush_scheduled_work);

/**
 * flush_workqueue - ensure that any scheduled work has run to completion.
 * @wq: workqueue to flush
 *
 * Forces execution of the workqueue and blocks until its completion.
 * This is typically used in driver shutdown handlers.
 *
 * We sleep until all works which were queued on entry have been handled,
 * but we are not livelocked by new incoming ones.
 */
void flush_workqueue(struct workqueue_struct *wq)
{
    struct wq_flusher this_flusher = {
        .list = LIST_HEAD_INIT(this_flusher.list),
        .flush_color = -1,
        .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
    };
    int next_color;

    lock_map_acquire(&wq->lockdep_map);
    lock_map_release(&wq->lockdep_map);

    mutex_lock(&wq->flush_mutex);

    /*
     * Start-to-wait phase
     */
    next_color = work_next_color(wq->work_color);

    if (next_color != wq->flush_color) {
        /*
         * Color space is not full.  The current work_color
         * becomes our flush_color and work_color is advanced
         * by one.
         */
        BUG_ON(!list_empty(&wq->flusher_overflow));
        this_flusher.flush_color = wq->work_color;
        wq->work_color = next_color;

        if (!wq->first_flusher) {
            /* no flush in progress, become the first flusher */
            BUG_ON(wq->flush_color != this_flusher.flush_color);

            wq->first_flusher = &this_flusher;

            if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
                               wq->work_color)) {
                /* nothing to flush, done */
                wq->flush_color = next_color;
                wq->first_flusher = NULL;
                goto out_unlock;
            }
        } else {
            /* wait in queue */
            BUG_ON(wq->flush_color == this_flusher.flush_color);
            list_add_tail(&this_flusher.list, &wq->flusher_queue);
            flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
        }
    } else {
        /*
         * Oops, color space is full, wait on overflow queue.
         * The next flush completion will assign us
         * flush_color and transfer to flusher_queue.
         */
        list_add_tail(&this_flusher.list, &wq->flusher_overflow);
    }

    mutex_unlock(&wq->flush_mutex);

    wait_for_completion(&this_flusher.done);

    /*
     * Wake-up-and-cascade phase
     *
     * First flushers are responsible for cascading flushes and
     * handling overflow.  Non-first flushers can simply return.
     */
    if (wq->first_flusher != &this_flusher)
        return;

    mutex_lock(&wq->flush_mutex);

    /* we might have raced, check again with mutex held */
    if (wq->first_flusher != &this_flusher)
        goto out_unlock;

    wq->first_flusher = NULL;

    BUG_ON(!list_empty(&this_flusher.list));
    BUG_ON(wq->flush_color != this_flusher.flush_color);

    while (true) {
        struct wq_flusher *next, *tmp;

        /* complete all the flushers sharing the current flush color */
        list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
            if (next->flush_color != wq->flush_color)
                break;
            list_del_init(&next->list);
            complete(&next->done);
        }

        BUG_ON(!list_empty(&wq->flusher_overflow) &&
               wq->flush_color != work_next_color(wq->work_color));

        /* this flush_color is finished, advance by one */
        wq->flush_color = work_next_color(wq->flush_color);

        /* one color has been freed, handle overflow queue */
        if (!list_empty(&wq->flusher_overflow)) {
            /*
             * Assign the same color to all overflowed
             * flushers, advance work_color and append to
             * flusher_queue.  This is the start-to-wait
             * phase for these overflowed flushers.
             */
            list_for_each_entry(tmp, &wq->flusher_overflow, list)
                tmp->flush_color = wq->work_color;

            wq->work_color = work_next_color(wq->work_color);

            list_splice_tail_init(&wq->flusher_overflow,
                          &wq->flusher_queue);
            flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
        }

        if (list_empty(&wq->flusher_queue)) {
            BUG_ON(wq->flush_color != wq->work_color);
            break;
        }

        /*
         * Need to flush more colors.  Make the next flusher
         * the new first flusher and arm cwqs.
         */
        BUG_ON(wq->flush_color == wq->work_color);
        BUG_ON(wq->flush_color != next->flush_color);

        list_del_init(&next->list);
        wq->first_flusher = next;

        if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
            break;

        /*
         * Meh... this color is already done, clear first
         * flusher and repeat cascading.
         */
        wq->first_flusher = NULL;
    }

out_unlock:
    mutex_unlock(&wq->flush_mutex);
}
EXPORT_SYMBOL_GPL(flush_workqueue);

注意该函数并不取消任何延时执行的工作。
也就是任何通过 schedule_delayed_work()调度的工作，如果其延迟时间未结束，他并不会因为调用flush_scheduled_work()而被取消掉。
3.5 取消延迟执行的工作
int cancel_delayed_work(struct wrok_struct *work);
该函数可以取消任何与 work_struct 相关的挂起的工作。

/*
 * Kill off a pending schedule_delayed_work().  Note that the work callback
 * function may still be running on return from cancel_delayed_work(), unless
 * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
 * cancel_work_sync() to wait on it.
 */
static inline bool cancel_delayed_work(struct delayed_work *work)
{
	bool ret;

	ret = del_timer_sync(&work->timer);
	if (ret)
		work_clear_pending(&work->work);  //取消工作的挂起状态
	return ret;
}

/**
 * work_clear_pending - for internal use only, mark a work item as not pending
 * @work: The work item in question
 */
#define work_clear_pending(work) \
	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))

/*
 * The first word is the work queue pointer and the flags rolled into
 * one
 */
#define work_data_bits(work) ((unsigned long *)(&(work)->data))

enum {
	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
	WORK_STRUCT_DELAYED_BIT	= 1,	/* work item is delayed */
	WORK_STRUCT_CWQ_BIT	= 2,	/* data points to cwq */
	WORK_STRUCT_LINKED_BIT	= 3,	/* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
	WORK_STRUCT_STATIC_BIT	= 4,	/* static initializer (debugobjects) */
	WORK_STRUCT_COLOR_SHIFT	= 5,	/* color for workqueue flushing */
#else
	WORK_STRUCT_COLOR_SHIFT	= 4,	/* color for workqueue flushing */
#endif

	WORK_STRUCT_COLOR_BITS	= 4,

	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
	WORK_STRUCT_DELAYED	= 1 << WORK_STRUCT_DELAYED_BIT,
	WORK_STRUCT_CWQ		= 1 << WORK_STRUCT_CWQ_BIT,
	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
#ifdef CONFIG_DEBUG_OBJECTS_WORK
	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
#else
	WORK_STRUCT_STATIC	= 0,
#endif

	/*
	 * The last color is no color used for works which don't
	 * participate in workqueue flushing.
	 */
	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS) - 1,
	WORK_NO_COLOR		= WORK_NR_COLORS,

	/* special cpu IDs */
	WORK_CPU_UNBOUND	= NR_CPUS,
	WORK_CPU_NONE		= NR_CPUS + 1,
	WORK_CPU_LAST		= WORK_CPU_NONE,

	/*
	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
	 * off.  This makes cwqs aligned to 256 bytes and allows 15
	 * workqueue flush colors.
	 */
	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
				  WORK_STRUCT_COLOR_BITS,

	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
	WORK_STRUCT_NO_CPU	= WORK_CPU_NONE << WORK_STRUCT_FLAG_BITS,

	/* bit mask for work_busy() return values */
	WORK_BUSY_PENDING	= 1 << 0,
	WORK_BUSY_RUNNING	= 1 << 1,
};

4. 创建新的工作队列
如果缺省的工作队列不能满足要求，你可以创建一个新的工作队列和与之相应的工作者线程。
由于这么做会在每一个处理器上都创建一个工作者线程，所以必须要确认这套线程能提高性能，再创建自己的工作队列。
4.1 创建一个新的工作队列和与之对应的工作者线程：
struct workqueue_struct *create_workqueue(const char *name);
name ---表示创建的守护进程在进程列表中显示的名称。
这样就会给每个处理器创建一个工作者线程，并且做好开始处理工作的准备。

#define create_workqueue(name)					\
	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)

#ifdef CONFIG_LOCKDEP
#define alloc_workqueue(name, flags, max_active)		\
({								\
	static struct lock_class_key __key;			\
	const char *__lock_name;				\
								\
	if (__builtin_constant_p(name))				\
		__lock_name = (name);				\
	else							\
		__lock_name = #name;				\
								\
	__alloc_workqueue_key((name), (flags), (max_active),	\
			      &__key, __lock_name);		\
})
#else
#define alloc_workqueue(name, flags, max_active)		\
	__alloc_workqueue_key((name), (flags), (max_active), NULL, NULL)
#endif

struct workqueue_struct *__alloc_workqueue_key(const char *name,
					       unsigned int flags,
					       int max_active,
					       struct lock_class_key *key,
					       const char *lock_name)
{
	struct workqueue_struct *wq;
	unsigned int cpu;

	/*
	 * Workqueues which may be used during memory reclaim should
	 * have a rescuer to guarantee forward progress.
	 */
	if (flags & WQ_MEM_RECLAIM)
		flags |= WQ_RESCUER;

	/*
	 * Unbound workqueues aren't concurrency managed and should be
	 * dispatched to workers immediately.
	 */
	if (flags & WQ_UNBOUND)
		flags |= WQ_HIGHPRI;

	max_active = max_active ?: WQ_DFL_ACTIVE;
	max_active = wq_clamp_max_active(max_active, flags, name);

	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
	if (!wq)
		goto err;

	wq->flags = flags;
	wq->saved_max_active = max_active;
	mutex_init(&wq->flush_mutex);
	atomic_set(&wq->nr_cwqs_to_flush, 0);
	INIT_LIST_HEAD(&wq->flusher_queue);
	INIT_LIST_HEAD(&wq->flusher_overflow);

	wq->name = name;
	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
	INIT_LIST_HEAD(&wq->list);

	if (alloc_cwqs(wq) < 0)
		goto err;

	for_each_cwq_cpu(cpu, wq) {
		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
		struct global_cwq *gcwq = get_gcwq(cpu);

		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
		cwq->gcwq = gcwq;
		cwq->wq = wq;
		cwq->flush_color = -1;
		cwq->max_active = max_active;
		INIT_LIST_HEAD(&cwq->delayed_works);
	}

	if (flags & WQ_RESCUER) {
		struct worker *rescuer;

		if (!alloc_mayday_mask(&wq->mayday_mask, GFP_KERNEL))
			goto err;

		wq->rescuer = rescuer = alloc_worker();
		if (!rescuer)
			goto err;

		rescuer->task = kthread_create(rescuer_thread, wq, "%s", name);
		if (IS_ERR(rescuer->task))
			goto err;

		rescuer->task->flags |= PF_THREAD_BOUND;
		wake_up_process(rescuer->task);
	}

	/*
	 * workqueue_lock protects global freeze state and workqueues
	 * list.  Grab it, set max_active accordingly and add the new
	 * workqueue to workqueues list.
	 */
	spin_lock(&workqueue_lock);

	if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
		for_each_cwq_cpu(cpu, wq)
			get_cwq(cpu, wq)->max_active = 0;

	list_add(&wq->list, &workqueues);

	spin_unlock(&workqueue_lock);

	return wq;
err:
	if (wq) {
		free_cwqs(wq);
		free_mayday_mask(wq->mayday_mask);
		kfree(wq->rescuer);
		kfree(wq);
	}
	return NULL;
}
EXPORT_SYMBOL_GPL(__alloc_workqueue_key);

如：系统缺省的工作队列events定义：

	system_wq = alloc_workqueue("events", 0, 0);

这里events 就是工作队列名name。
上面的定义等价于:

system_wq = create_workqueue("events");

4.2 创建工作时，无需考虑工作队列的类型。
4.3 创建完工作队列和工作后，就可以调用下面函数，
int queue_work(struct wrokequeue_struct *wq, struct work_struct *work);
int queue_delayed_work(struct workqueue_struct *wq,
struct work_struct *work,
unsigned long delay);
上面有定义。
这两个函数作用是，向一个工作队列中添加 work_struct 实例。
这两个函数与 schedule_work() 和 schedule_delayed_work()类似，唯一的区别在于他们针对的是指定的工作队列，而不是系统默认的events工作队列。
4.4 刷新指定的工作队列:
flush_workqueue(struct wrokqueue_struct *wq);
刷新指定的工作队列，和前面讨论的 flush_scheduled_work()作用相同。
上面有定义。

5. 内核创建了一个标准的工作队列，称为events。

内核的各个部分中，凡是没必要创建独立的工作队列的，均可以使用该队列。

内核提供了下面2个函数，用来将新的工作添加到该标准队列中。

    /**
     * schedule_work - put work task in global workqueue
     * @work: job to be done
     *
     * This puts a job in the kernel-global workqueue.
     */
    int fastcall schedule_work(struct work_struct *work)
    {
        return queue_work(keventd_wq, work);
    }
    EXPORT_SYMBOL(schedule_work);
    /**
     * schedule_delayed_work - put work task in global workqueue after delay
     * @dwork: job to be done
     * @delay: number of jiffies to wait or 0 for immediate execution
     *
     * After waiting for a given time this puts a job in the kernel-global
     * workqueue.
     */
    int fastcall schedule_delayed_work(struct delayed_work *dwork,
                        unsigned long delay)
    {
        timer_stats_timer_set_start_info(&dwork->timer);
        return queue_delayed_work(keventd_wq, dwork, delay);
    }
    EXPORT_SYMBOL(schedule_delayed_work);

lamdoc

关注

0
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
工作队列 work queue

工作队列是实现延期执行的另一种手段，它是通过守护进程在进程上下文中执行的，函数可以睡眠任意长时间。它和其他类型的延期执行方式都不一样，工作队列可以把工作推后，交由一个内核线程去执行。选择使用工作队列还是软中断/tasklet 的要点:如果推后执行的任务需要睡眠，那就选择工作队列。如果推后执行的任务不需要睡眠，那就选择软中断/tasklet。如果需要用一个新的线程来执行你
复制链接

扫一扫