参考:
本文中代码内核版本:3.2.0
kthreadd:这种内核线程只有一个,它的作用是管理调度其它的内核线程。这个线程不能关闭。它在内核初始化的时候被创建,会循环运行一个叫做kthreadd的函数,该函数的作用是运行kthread_create_list全局链表中维护的kthread。其他任务或代码想创建内核线程时需要调用kthread_create(或kthread_create_on_node)创建一个kthread,该kthread会被加入到kthread_create_list链表中,同时kthread_create会weak up kthreadd_task(即kthreadd)(增链表)。kthreadd再执行kthread时会调用老的接口——kernel_thread运行一个名叫“kthread”的内核线程去运行创建的kthread,被执行过的kthread会从kthread_create_list链表中删除(减链表),并且kthreadd会不断调用scheduler 让出CPU。kthreadd创建的kthread执行完后,会调到kthread_create()执行,之后再执行最初原任务或代码。
创建
在linux启动的C阶段start_kernel()的最后,rest_init()会开启两个进程:kernel_init,kthreadd,之后主线程变成idle线程,init/main.c。
linux下的3个特殊的进程:idle进程(PID=0),init进程(PID=1)和kthreadd(PID=2)。
* idle进程由系统自动创建, 运行在内核态 PID=0
idle进程其pid=0,其前身是系统创建的第一个进程,也是唯一一个没有通过fork或者kernel_thread产生的进程。完成加载系统后,演变为进程调度、交换。
* init进程由idle通过kernel_thread创建,在内核空间完成初始化后, 加载init程序, 并最终用户空间运行 PID=1 PPID=0
由0进程创建,完成系统的初始化. 是系统中所有其它用户进程的祖先进程 。
Linux中的所有进程都是有init进程创建并运行的。首先Linux内核启动,然后在用户空间中启动init进程,再启动其他系统进程。在系统启动完成完成后,init将变为守护进程监视系统其他进程。
* kthreadd进程由idle通过kernel_thread创建,并始终运行在内核空间, 负责所有内核线程的调度和管理 PID=2 PPID=0
它的任务就是管理和调度其他内核线程kernel_thread, 会循环执行一个kthreadd的函数,该函数的作用就是运行kthread_create_list全局链表中维护的kthread, 当我们调用kthread_create创建的内核线程会被加入到此链表中,因此所有的内核线程都是直接或者间接的以kthreadd为父进程。所有的内核线程的PPID都是2。
注:所有的内核线程在大部分时间里都处于阻塞状态(TASK_INTERRUPTIBLE)只有在系统满足进程需要的某种资源的情况下才会运行。
/*
* We need to finalize in a non-__init function, or else race conditions
* between the root thread and the init thread may cause start_kernel to
* be reaped by free_initmem before the root thread has proceeded to
* cpu_idle.
*
* gcc-3.4 accidentally inlines this function, so use noinline.
*/
static__initdata DECLARE_COMPLETION(kthreadd_done);static noinline void __init_refok rest_init(void)
{intpid;
rcu_scheduler_starting();/** We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.*/kernel_thread(kernel_init, NULL, CLONE_FS|CLONE_SIGHAND);
numa_default_policy();
pid= kernel_thread(kthreadd, NULL, CLONE_FS |CLONE_FILES);
rcu_read_lock();
kthreadd_task= find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);/** The boot idle thread must execute schedule()
* at least once to get things moving:*/init_idle_bootup_task(current);
preempt_enable_no_resched();
schedule();/*Call into cpu_idle with preempt disabled*/preempt_disable();
cpu_idle();
}
kthreadd任务
函数体定义在kernel/kthread.c中。
static DEFINE_SPINLOCK(kthread_create_lock);
static LIST_HEAD(kthread_create_list);
struct task_struct *kthreadd_task;
structkthread_create_info
{/*Information passed to kthread() from kthreadd.*/
int (*threadfn)(void *data);void *data;intnode;/*Result passed back to kthread_create() from kthreadd.*/
struct task_struct *result;structcompletion done;structlist_head list;
};structkthread {intshould_stop;void *data;structcompletion exited;
};
int kthreadd(void *unused)
{struct task_struct *tsk =current;/*Setup a clean context for our children to inherit.*/set_task_comm(tsk,"kthreadd");
ignore_signals(tsk);
set_cpus_allowed_ptr(tsk, cpu_all_mask);
set_mems_allowed(node_states[N_HIGH_MEMORY]);
current->flags |= PF_NOFREEZE |PF_FREEZER_NOSIG;for(;;) {
set_current_state(TASK_INTERRUPTIBLE);if (list_empty(&kthread_create_list))
schedule();
__set_current_state(TASK_RUNNING);
spin_lock(&kthread_create_lock);while (!list_empty(&kthread_create_list)) {struct kthread_create_info *create;
create=list_entry(kthread_create_list.next,structkthread_create_info, list);
list_del_init(&create->list);
spin_unlock(&kthread_create_lock);
create_kthread(create);
spin_lock(&kthread_create_lock);
}
spin_unlock(&kthread_create_lock);
}return 0;
}
static void create_kthread(struct kthread_create_info *create)
{intpid;
#ifdef CONFIG_NUMA
current->pref_node_fork = create->node;#endif
/*We want our own signal handler (we take no signals by default).*/pid= kernel_thread(kthread, create, CLONE_FS | CLONE_FILES |SIGCHLD);if (pid < 0) {
create->result =ERR_PTR(pid);
complete(&create->done);
}
}
kthread任务
static int kthread(void *_create)
{/*Copy data: it's on kthread's stack*/
struct kthread_create_info *create =_create;int (*threadfn)(void *data) = create->threadfn;void *data = create->data;structkthread self;intret;
self.should_stop= 0;
self.data=data;
init_completion(&self.exited);
current->vfork_done = &self.exited;/*OK, tell user we're spawned, wait for stop or wakeup*/__set_current_state(TASK_UNINTERRUPTIBLE);
create->result =current;
complete(&create->done);
schedule();
ret= -EINTR;if (!self.should_stop)
ret=threadfn(data);/*we can't just return, we must preserve "self" on stack*/do_exit(ret);
}
/**
* kthread_create_on_node - create a kthread.
* @threadfn: the function to run until signal_pending(current).
* @data: data ptr for @threadfn.
* @node: memory node number.
* @namefmt: printf-style name for the thread.
*
* Description: This helper function creates and names a kernel
* thread. The thread will be stopped: use wake_up_process() to start
* it. See also kthread_run().
*
* If thread is going to be bound on a particular cpu, give its node
* in @node, to get NUMA affinity for kthread stack, or else give -1.
* When woken, the thread will run @threadfn() with @data as its
* argument. @threadfn() can either call do_exit() directly if it is a
* standalone thread for which no one will call kthread_stop(), or
* return when 'kthread_should_stop()' is true (which means
* kthread_stop() has been called). The return value should be zero
* or a negative error number; it will be passed to kthread_stop().
*
* Returns a task_struct or ERR_PTR(-ENOMEM).*/
struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),void *data,intnode,const charnamefmt[],
...)
{structkthread_create_info create;
create.threadfn=threadfn;
create.data=data;
create.node=node;
init_completion(&create.done);
spin_lock(&kthread_create_lock);
list_add_tail(&create.list, &kthread_create_list);
spin_unlock(&kthread_create_lock);
wake_up_process(kthreadd_task);
wait_for_completion(&create.done);if (!IS_ERR(create.result)) {static const struct sched_param param = { .sched_priority = 0};
va_list args;
va_start(args, namefmt);
vsnprintf(create.result->comm, sizeof(create.result->comm),
namefmt, args);
va_end(args);/** root may have changed our (kthreadd's) priority or CPU mask.
* The kernel thread should not inherit these properties.*/sched_setscheduler_nocheck(create.result, SCHED_NORMAL,¶m);
set_cpus_allowed_ptr(create.result, cpu_all_mask);
}returncreate.result;
}
EXPORT_SYMBOL(kthread_create_on_node);
kernel/kthread.c的头文件include/linux/kthread.h定义kthread_create():
#define kthread_create(threadfn, data, namefmt, arg...) kthread_create_on_node(threadfn, data, -1, namefmt, ##arg)