1 工作队列的接口函数
include/linux/workqueue.h
#define alloc_workqueue(fmt, flags, max_active, args...) \
({ \
static struct lock_class_key __key; \
const char *__lock_name; \
\
if (__builtin_constant_p(fmt)) \
__lock_name = (fmt); \
else \
__lock_name = #fmt; \
\
__alloc_workqueue_key((fmt), (flags), (max_active), \
&__key, __lock_name, ##args); \
})
#define INIT_WORK(_work, _func) \
do { \
__INIT_WORK((_work), (_func), 0); \
} while (0)
#define __INIT_WORK(_work, _func, _onstack) \
do { \
static struct lock_class_key __key; \
\
__init_work((_work), _onstack); \
(_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \
INIT_LIST_HEAD(&(_work)->entry); \
PREPARE_WORK((_work), (_func)); \
} while (0)
/**
* queue_work - queue work on a workqueue
* @wq: workqueue to use
* @work: work to queue
*
* Returns %false if @work was already on a queue, %true otherwise.
*
* We queue the work to the CPU on which it was submitted, but if the CPU dies
* it can be processed by another CPU.
*/
static inline bool queue_work(struct workqueue_struct *wq,
struct work_struct *work)
{
return queue_work_on(WORK_CPU_UNBOUND, wq, work);
}
这些接口的用法,先使用alloc_workqueue创建一个工作队列,使用INIT_WORK创建一个work,使用queue_work把工作放到工作队列中。
2. 工作队列的内部实现机制
kernel/workqueue.c
4121 struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
4122 unsigned int flags,
4123 int max_active,
4124 struct lock_class_key *key,
4125 const char *lock_name, ...)
4126 {
4127 size_t tbl_size = 0;
4128 va_list args;
4129 struct workqueue_struct *wq;
4130 struct pool_workqueue *pwq;
4131
4132 /* allocate wq and format name */
4133 if (flags & WQ_UNBOUND)
4134 tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
4135
4136 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
4137 if (!wq)
4138 return NULL;
4139
4140 if (flags & WQ_UNBOUND) {
4141 wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
4142 if (!wq->unbound_attrs)
4143 goto err_free_wq;
4144 }
4145
4146 va_start(args, lock_name);
4147 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4148 va_end(args);
4149
4150 max_active = max_active ?: WQ_DFL_ACTIVE;
4151 max_active = wq_clamp_max_active(max_active, flags, wq->name);
4152
4153 /* init wq */
4154 wq->flags = flags;
4155 wq->saved_max_active = max_active;
4156 mutex_init(&wq->mutex);
4157 atomic_set(&wq->nr_pwqs_to_flush, 0);
4158 INIT_LIST_HEAD(&wq->pwqs);
4159 INIT_LIST_HEAD(&wq->flusher_queue);
4160 INIT_LIST_HEAD(&wq->flusher_overflow);
4161 INIT_LIST_HEAD(&wq->maydays);
4162
4163 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
4164 INIT_LIST_HEAD(&wq->list);
4165
4166 if (alloc_and_link_pwqs(wq) < 0)
4167 goto err_free_wq;
4168
4169 /*
4170 * Workqueues which may be used during memory reclaim should
4171 * have a rescuer to guarantee forward progress.
4172 */
4173 if (flags & WQ_MEM_RECLAIM) {
4174 struct worker *rescuer;
4175
4176 rescuer = alloc_worker();
4177 if (!rescuer)
4178 goto err_destroy;
4179
4180 rescuer->rescue_wq = wq;
4181 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
4182 wq->name);
4183 if (IS_ERR(rescuer->task)) {
4184 kfree(rescuer);
4185 goto err_destroy;
4186 }
4187
4188 wq->rescuer = rescuer;
4189 rescuer->task->flags |= PF_NO_SETAFFINITY;
4190 wake_up_process(rescuer->task);
4191 }
4192
4193 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4194 goto err_destroy;
4195
4196 /*
4197 * wq_pool_mutex protects global freeze state and workqueues list.
4198 * Grab it, adjust max_active and add the new @wq to workqueues
4199 * list.
4200 */
4201 mutex_lock(&wq_pool_mutex);
4202
4203 mutex_lock(&wq->mutex);
4204 for_each_pwq(pwq, wq)
4205 pwq_adjust_max_active(pwq);
4206 mutex_unlock(&wq->mutex);
4207
4208 list_add(&wq->list, &workqueues);
4209
4210 mutex_unlock(&wq_pool_mutex);
4211
4212 return wq;
4213
4214 err_free_wq:
4215 free_workqueue_attrs(wq->unbound_attrs);
4216 kfree(wq);
4217 return NULL;
4218 err_destroy:
4219 destroy_workqueue(wq);
4220 return NULL;
4221 }
4222 EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4127- 4207 主要是对工作队列数据结构进行初始化。
4208 把工作队列加入到&workqueues。
1414 bool queue_work_on(int cpu, struct workqueue_struct *wq,
1415 struct work_struct *work)
1416 {
1417 bool ret = false;
1418 unsigned long flags;
1419
1420 local_irq_save(flags);
1421
1422 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1423 __queue_work(cpu, wq, work);
1424 ret = true;
1425 }
1426
1427 local_irq_restore(flags);
1428 return ret;
1429 }
1430 EXPORT_SYMBOL(queue_work_on);
1296 static void __queue_work(int cpu, struct workqueue_struct *wq,
1297 struct work_struct *work)
1298 {
1299 struct pool_workqueue *pwq;
1300 struct worker_pool *last_pool;
1301 struct list_head *worklist;
1302 unsigned int work_flags;
1303 unsigned int req_cpu = cpu;
1304
1305 /*
1306 * While a work item is PENDING && off queue, a task trying to
1307 * steal the PENDING will busy-loop waiting for it to either get
1308 * queued or lose PENDING. Grabbing PENDING and queueing should
1309 * happen with IRQ disabled.
1310 */
1311 WARN_ON_ONCE(!irqs_disabled());
1312
1313 debug_work_activate(work);
1314
1315 /* if dying, only works from the same workqueue are allowed */
1316 if (unlikely(wq->flags & __WQ_DRAINING) &&
1317 WARN_ON_ONCE(!is_chained_work(wq)))
1318 return;
1319 retry:
1320 if (req_cpu == WORK_CPU_UNBOUND)
1321 cpu = raw_smp_processor_id();
1322
1323 /* pwq which will be used unless @work is executing elsewhere */
1324 if (!(wq->flags & WQ_UNBOUND))
1325 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1326 else
1327 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1328
1329 /*
1330 * If @work was previously on a different pool, it might still be
1331 * running there, in which case the work needs to be queued on that
1332 * pool to guarantee non-reentrancy.
1333 */
1334 last_pool = get_work_pool(work);
1335 if (last_pool && last_pool != pwq->pool) {
1336 struct worker *worker;
1337
1338 spin_lock(&last_pool->lock);
1339
1340 worker = find_worker_executing_work(last_pool, work);
1341
1342 if (worker && worker->current_pwq->wq == wq) {
1343 pwq = worker->current_pwq;
1344 } else {
1345 /* meh... not running there, queue here */
1346 spin_unlock(&last_pool->lock);
1347 spin_lock(&pwq->pool->lock);
1348 }
1349 } else {
1350 spin_lock(&pwq->pool->lock);
1351 }
1352
1353 /*
1354 * pwq is determined and locked. For unbound pools, we could have
1355 * raced with pwq release and it could already be dead. If its
1356 * refcnt is zero, repeat pwq selection. Note that pwqs never die
1357 * without another pwq replacing it in the numa_pwq_tbl or while
1358 * work items are executing on it, so the retrying is guaranteed to
1359 * make forward-progress.
1360 */
1361 if (unlikely(!pwq->refcnt)) {
1362 if (wq->flags & WQ_UNBOUND) {
1363 spin_unlock(&pwq->pool->lock);
1364 cpu_relax();
1365 goto retry;
1366 }
1367 /* oops */
1368 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1369 wq->name, cpu);
1370 }
1371
1372 /* pwq determined, queue */
1373 trace_workqueue_queue_work(req_cpu, pwq, work);
1374 #ifdef CONFIG_MTK_WQ_DEBUG
1375 mttrace_workqueue_queue_work(cpu, work);
1376 #endif //CONFIG_MTK_WQ_DEBUG
1377
1378 if (WARN_ON(!list_empty(&work->entry))) {
1379 spin_unlock(&pwq->pool->lock);
1380 return;
1381 }
1382
1383 pwq->nr_in_flight[pwq->work_color]++;
1384 work_flags = work_color_to_flags(pwq->work_color);
1385
1386 if (likely(pwq->nr_active < pwq->max_active)) {
1387 trace_workqueue_activate_work(work);
1388 #ifdef CONFIG_MTK_WQ_DEBUG
1389 mttrace_workqueue_activate_work(work);
1390 #endif //CONFIG_MTK_WQ_DEBUG
1391 pwq->nr_active++;
1392 worklist = &pwq->pool->worklist;
1393 } else {
1394 work_flags |= WORK_STRUCT_DELAYED;
1395 worklist = &pwq->delayed_works;
1396 }
1397
1398 insert_work(pwq, work, worklist, work_flags);
1399
1400 spin_unlock(&pwq->pool->lock);
1401 }
通过查找工作队列池,进行相应的检测,调用insert_work。
1259 static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1260 struct list_head *head, unsigned int extra_flags)
1261 {
1262 struct worker_pool *pool = pwq->pool;
1263
1264 /* we own @work, set data and link */
1265 set_work_pwq(work, pwq, extra_flags);
1266 list_add_tail(&work->entry, head);
1267 get_pwq(pwq);
1268
1269 /*
1270 * Ensure either wq_worker_sleeping() sees the above
1271 * list_add_tail() or we see zero nr_running to avoid workers lying
1272 * around lazily while there are works to be processed.
1273 */
1274 smp_mb();
1275
1276 if (__need_more_worker(pool))
1277 wake_up_worker(pool);
1278 }
加入队列,如果需要唤醒相应的进程。
3 工作队列池
4968 static int __init init_workqueues(void)
4969 {
4970 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
4971 int i, cpu;
4972
4973 /* make sure we have enough bits for OFFQ pool ID */
4974 BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
4975 WORK_CPU_END * NR_STD_WORKER_POOLS);
4976
4977 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
4978
4979 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
4980
4981 cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
4982 hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
4983
4984 wq_numa_init();
4985
4986 /* initialize CPU pools */
4987 for_each_possible_cpu(cpu) {
4988 struct worker_pool *pool;
4989
4990 i = 0;
4991 for_each_cpu_worker_pool(pool, cpu) {
4992 BUG_ON(init_worker_pool(pool));
4993 pool->cpu = cpu;
4994 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
4995 pool->attrs->nice = std_nice[i++];
4996 pool->node = cpu_to_node(cpu);
4997
4998 /* alloc pool ID */
4999 mutex_lock(&wq_pool_mutex);
5000 BUG_ON(worker_pool_assign_id(pool));
5001 mutex_unlock(&wq_pool_mutex);
5002 }
5003 }
5004
5005 /* create the initial worker */
5006 for_each_online_cpu(cpu) {
5007 struct worker_pool *pool;
5008
5009 for_each_cpu_worker_pool(pool, cpu) {
5010 pool->flags &= ~POOL_DISASSOCIATED;
5011 BUG_ON(create_and_start_worker(pool) < 0);
5012 }
5013 }
5014
5015 /* create default unbound wq attrs */
5016 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
5017 struct workqueue_attrs *attrs;
5018
5019 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
5020 attrs->nice = std_nice[i];
5021 unbound_std_wq_attrs[i] = attrs;
5022 }
5023
5024 system_wq = alloc_workqueue("events", 0, 0);
5025 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
5026 system_long_wq = alloc_workqueue("events_long", 0, 0);
5027 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
5028 WQ_UNBOUND_MAX_ACTIVE);
5029 system_freezable_wq = alloc_workqueue("events_freezable",
5030 WQ_FREEZABLE, 0);
5031 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
5032 !system_unbound_wq || !system_freezable_wq);
5033 return 0;
5034 }
5035 early_initcall(init_workqueues);
创建工作队列池,创建工作队列。
create_and_start_worker,
1795 static int create_and_start_worker(struct worker_pool *pool)
1796 {
1797 struct worker *worker;
1798
1799 mutex_lock(&pool->manager_mutex);
1800
1801 worker = create_worker(pool);
1802 if (worker) {
1803 spin_lock_irq(&pool->lock);
1804 start_worker(worker);
1805 spin_unlock_irq(&pool->lock);
1806 }
1807
1808 mutex_unlock(&pool->manager_mutex);
1809
1810 return worker ? 0 : -ENOMEM;
1811 }
1697 static struct worker *create_worker(struct worker_pool *pool)
1698 {
1699 struct worker *worker = NULL;
1700 int id = -1;
1701 char id_buf[16];
1702
1703 lockdep_assert_held(&pool->manager_mutex);
1704
1705 /*
1706 * ID is needed to determine kthread name. Allocate ID first
1707 * without installing the pointer.
1708 */
1709 idr_preload(GFP_KERNEL);
1710 spin_lock_irq(&pool->lock);
1711
1712 id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT);
1713
1714 spin_unlock_irq(&pool->lock);
1715 idr_preload_end();
1716 if (id < 0)
1717 goto fail;
1718
1719 worker = alloc_worker();
1720 if (!worker)
1721 goto fail;
1722
1723 worker->pool = pool;
1724 worker->id = id;
1725
1726 if (pool->cpu >= 0)
1727 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1728 pool->attrs->nice < 0 ? "H" : "");
1729 else
1730 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1731
1732 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1733 "kworker/%s", id_buf);
1734 if (IS_ERR(worker->task))
1735 goto fail;
1736
1737 /*
1738 * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
1739 * online CPUs. It'll be re-applied when any of the CPUs come up.
1740 */
1741 set_user_nice(worker->task, pool->attrs->nice);
1742 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1743
1744 /* prevent userland from meddling with cpumask of workqueue workers */
1745 worker->task->flags |= PF_NO_SETAFFINITY;
1746
1747 /*
1748 * The caller is responsible for ensuring %POOL_DISASSOCIATED
1749 * remains stable across this function. See the comments above the
1750 * flag definition for details.
1751 */
1752 if (pool->flags & POOL_DISASSOCIATED)
1753 worker->flags |= WORKER_UNBOUND;
1754
1755 /* successful, commit the pointer to idr */
1756 spin_lock_irq(&pool->lock);
1757 idr_replace(&pool->worker_idr, worker, worker->id);
1758 spin_unlock_irq(&pool->lock);
1759
1760 return worker;
1761
1762 fail:
1763 if (id >= 0) {
1764 spin_lock_irq(&pool->lock);
1765 idr_remove(&pool->worker_idr, id);
1766 spin_unlock_irq(&pool->lock);
1767 }
1768 kfree(worker);
1769 return NULL;
1770 }
1771
在1732创建一个内核进程,运行worker_thread。
1781 static void start_worker(struct worker *worker)
1782 {
1783 worker->flags |= WORKER_STARTED;
1784 worker->pool->nr_workers++;
1785 worker_enter_idle(worker);
1786 wake_up_process(worker->task);
1787 }
2268 static int worker_thread(void *__worker)
2269 {
2270 struct worker *worker = __worker;
2271 struct worker_pool *pool = worker->pool;
2272
2273 /* tell the scheduler that this is a workqueue worker */
2274 worker->task->flags |= PF_WQ_WORKER;
2275 woke_up:
2276 spin_lock_irq(&pool->lock);
2277
2278 /* am I supposed to die? */
2279 if (unlikely(worker->flags & WORKER_DIE)) {
2280 spin_unlock_irq(&pool->lock);
2281 WARN_ON_ONCE(!list_empty(&worker->entry));
2282 worker->task->flags &= ~PF_WQ_WORKER;
2283 return 0;
2284 }
2285
2286 worker_leave_idle(worker);
2287 recheck:
2288 /* no more worker necessary? */
2289 if (!need_more_worker(pool))
2290 goto sleep;
2291
2292 /* do we need to manage? */
2293 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2294 goto recheck;
2295
2296 /*
2297 * ->scheduled list can only be filled while a worker is
2298 * preparing to process a work or actually processing it.
2299 * Make sure nobody diddled with it while I was sleeping.
2300 */
2301 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2302
2303 /*
2304 * Finish PREP stage. We're guaranteed to have at least one idle
2305 * worker or that someone else has already assumed the manager
2306 * role. This is where @worker starts participating in concurrency
2307 * management if applicable and concurrency management is restored
2308 * after being rebound. See rebind_workers() for details.
2309 */
2310 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2311
2312 do {
2313 struct work_struct *work =
2314 list_first_entry(&pool->worklist,
2315 struct work_struct, entry);
2316
2317 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2318 /* optimization path, not strictly necessary */
2319 process_one_work(worker, work);
2320 if (unlikely(!list_empty(&worker->scheduled)))
2321 process_scheduled_works(worker);
2322 } else {
2323 move_linked_works(work, &worker->scheduled, NULL);
2324 process_scheduled_works(worker);
2325 }
2326 } while (keep_working(pool));
2327
2328 worker_set_flags(worker, WORKER_PREP, false);
2329 sleep:
2330 if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker))
2331 goto recheck;
2332
2333 /*
2334 * pool->lock is held and there's no work to process and no need to
2335 * manage, sleep. Workers are woken up only while holding
2336 * pool->lock or from local cpu, so setting the current state
2337 * before releasing pool->lock is enough to prevent losing any
2338 * event.
2339 */
2340 worker_enter_idle(worker);
2341 __set_current_state(TASK_INTERRUPTIBLE);
2342 spin_unlock_irq(&pool->lock);
2343 schedule();
2344 goto woke_up;
2345 }
2319或2321处理工作队列池上的工作。
由以上分析可见,单个work是在相应的内核进程执行的。