linux内核线程的祖先,Kernel 内核线程

最新推荐文章于 2023-03-22 10:37:07 发布

FigureVideo

最新推荐文章于 2023-03-22 10:37:07 发布

阅读量323

点赞数

文章标签： linux内核线程的祖先

Linux的内核线程本质上是运行在内核态的进程，没有用户态地址空间，跟所有其他线程一起共享内核态地址空间，一同参与进程调度。同软中断和tasklet相同的是，内核线程可以将任务延后执行(软中断的守护进程softirqd也是基于内核线程实现的)。不同的是，软中断和tasklet运行在中断上下文，不能耗时很长，更不能休眠，而内核线程可以执行耗时很长的任务，也能够休眠。

内核线程的底层创建接口跟创建普通进程一样，也是通过kernel_thread/fork完成的。只是内核线程在创建的过程中不会拷贝父进程的用户地址空间。这是如何实现的？我们知道，内核在初始化启动之后，会首先创建一个0号进程(也叫idle进程)，该进程运行在内核地址空间，没有用户态地址空间，负责原始进程环境的初始化。那么接下来，0号进程在调用 rest_init()的过程中会创建两个特殊的系统进程：1号进程和2号进程。

1号进程(init进程)：系统中所有用户态进程的根进程。0号进程在 rest_init() 中首先调用 kernel_thread()/fork() 创建 1号进程，此时1号进程和0号进程还未分开，共享内核地址空间，并没有用户态地址空间。然后，1号进程在运行的过程中，调用 exec() 函数，加载 /sbin/init 等用户态可执行程序并执行，1号进程开始拥有自己的用户态地址空间。这样，在后续系统运行过程中，每个用户态进程都从1号进程直接或间接 fork 出来，1号进程称为所有用户态进程的根进程。

2号进程(kthreadd)：系统中所有内核线程的父进程。0号进程在 rest_init()中创建完1号进程之后，同样通过 kernel_thread()/fork() 创建 2号进程，只不过2号进程的执行函数是 kthreadd()，该函数不会调用任何 exec() 函数。因此，2号进程只有内核态地址空间，没有用户态地址空间。2号进程负责系统内其他所有内核线程的创建，那么所有内核线程都是只有内核态地址空间，没有用户态地址空间。

1，2号进程kthreadd的创建：

static noinline void __init_refok rest_init(void)

{

int pid;

rcu_scheduler_starting();

smpboot_thread_init();

* We need to spawn init first so that it obtains pid 1, however

* the init task will end up wanting to create kthreads, which, if

* we schedule it before we create kthreadd, will OOPS.

kernel_thread(kernel_init, NULL, CLONE_FS); // 创建1号进程

numa_default_policy();

pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); // 创建2号进程

rcu_read_lock();

kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);

rcu_read_unlock();

complete(&kthreadd_done);

* The boot idle thread must execute schedule()

* at least once to get things moving:

init_idle_bootup_task(current);

schedule_preempt_disabled();

/* Call into cpu_idle with preempt disabled */

cpu_startup_entry(CPUHP_ONLINE);

}

pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)

{

return_do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, // kernel_thread()本质上直接调用 _do_fork()，用于底层进程的创建。

(unsigned long)arg, NULL, NULL, 0);

}

_do_fork() -> copy_process() -> copy_mm() // _do_fork()最终会调用 copy_mm()拷贝父进程地址空间

static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)

{

struct mm_struct *mm, *oldmm;

int retval;

tsk->min_flt = tsk->maj_flt = 0;

tsk->nvcsw = tsk->nivcsw = 0;

#ifdef CONFIG_DETECT_HUNG_TASK

tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;

#endif

tsk->mm = NULL;

tsk->active_mm = NULL;

* Are we cloning a kernel thread?

* We need to steal a active VM for that..

// 如果父进程是2号进程，那么无需拷贝父进程用户态地址空间，直接返回。如果是1号线程，继续往下，拷贝父进程用户态地址空间。

oldmm = current->mm;

if (!oldmm)

return 0;

/* initialize the new vmacache entries */

vmacache_flush(tsk);

if (clone_flags & CLONE_VM) {

atomic_inc(&oldmm->mm_users);

mm = oldmm;

goto good_mm;

}

retval = -ENOMEM;

mm = dup_mm(tsk);

if (!mm)

goto fail_nomem;

good_mm:

tsk->mm = mm;

tsk->active_mm = mm;

return 0;

fail_nomem:

return retval;

}

2，内核线程的创建过程：

所有的内核线程都是通过2号进程 kthreadd 创建出来的，那么用户接口又是什么呢？内核设计了如下内核线程的创建框架：内核线程的创建是异步执行的，提供一个系统范围的创建请求链表，其他内核模块或线程提交请求到链表，由2号进程程kthreadd统一处理链表的所有请求。这样实际的创建都由2号进程 kthreadd 来完成。

(1)内核线程创建请求数据结构 struct kthread_create_info：

struct kthread_create_info

{

/* Information passed to kthread() from kthreadd. */

int (*threadfn)(void *data); // 内核线程执行函数

void *data; // 执行函数参数

int node;

/* Result passed back to kthread_create() from kthreadd. */

struct task_struct *result; // 异步执行结果

struct completion *done; // 用于创建结束，唤醒创建者

struct list_head list; // 链表指针

};

static LIST_HEAD(kthread_create_list); // 全局的创建请求链表

(2)内核线程创建用户接口 kthread_create：

#define kthread_create(threadfn, data, namefmt, arg...) \

kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg)

struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),

void *data, int node,

const char namefmt[],

...)

{

DECLARE_COMPLETION_ONSTACK(done); // 完成量，用于等待异步创建完成，由kthreadd唤醒

struct task_struct *task;

struct kthread_create_info *create = kmalloc(sizeof(*create),

GFP_KERNEL); // 分配创建请求

if (!create)

return ERR_PTR(-ENOMEM);

create->threadfn = threadfn;

create->data = data;

create->node = node;

create->done = &done;

spin_lock(&kthread_create_lock);

list_add_tail(&create->list, &kthread_create_list); // 加入全局创建链表

spin_unlock(&kthread_create_lock);

wake_up_process(kthreadd_task); // 唤醒 kthreadd 线程执行实际的创建

* Wait for completion in killable state, for I might be chosen by

* the OOM killer while kthreadd is trying to allocate memory for

* new kernel thread.

if (unlikely(wait_for_completion_killable(&done))) {

* If I was SIGKILLed before kthreadd (or new kernel thread)

* calls complete(), leave the cleanup of this structure to

* that thread.

if (xchg(&create->done, NULL))

return ERR_PTR(-EINTR);

* kthreadd (or new kernel thread) will call complete()

* shortly.

wait_for_completion(&done); // 睡眠在完成量上，等待 kthreadd 唤醒

}

task = create->result; // 已经被唤醒，接下来检查内核线程创建结果

if (!IS_ERR(task)) {

static const struct sched_param param = { .sched_priority = 0 };

va_list args;

va_start(args, namefmt);

vsnprintf(task->comm, sizeof(task->comm), namefmt, args);

va_end(args);

* root may have changed our (kthreadd's) priority or CPU mask.

* The kernel thread should not inherit these properties.

sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);

set_cpus_allowed_ptr(task, cpu_all_mask);

}

kfree(create);

return task;

}

(3)内核线程的实际创建 kthreadd:

int kthreadd(void *unused)

{

struct task_struct *tsk = current;

/* Setup a clean context for our children to inherit. */

set_task_comm(tsk, "kthreadd");

ignore_signals(tsk);

set_cpus_allowed_ptr(tsk, cpu_all_mask);

set_mems_allowed(node_states[N_MEMORY]);

current->flags |= PF_NOFREEZE;

for (;;) { // kthreadd是个守护进程，循环轮询创建链表

set_current_state(TASK_INTERRUPTIBLE);

if (list_empty(&kthread_create_list))

schedule(); // 如果此时链表为空，放弃处理器，调度其他线程运行

__set_current_state(TASK_RUNNING);

spin_lock(&kthread_create_lock);

while (!list_empty(&kthread_create_list)) { // 创建链表不空，有新的内核线程等待创建

struct kthread_create_info *create;

create = list_entry(kthread_create_list.next,

struct kthread_create_info, list);

list_del_init(&create->list); // 从创建链表中取出一个请求

spin_unlock(&kthread_create_lock);

create_kthread(create); // 调用 create_kthread() 执行真正的线程创建

spin_lock(&kthread_create_lock);

}

spin_unlock(&kthread_create_lock);

}

return 0;

}

static void create_kthread(struct kthread_create_info *create)

{

int pid;

#ifdef CONFIG_NUMA

current->pref_node_fork = create->node;

#endif

/* We want our own signal handler (we take no signals by default). */

pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); // 最终调用 kernel_thread 创建新的进程

if (pid < 0) {

/* If user was SIGKILLed, I release the structure. */

struct completion *done = xchg(&create->done, NULL);

if (!done) {

kfree(create);

return;

}

create->result = ERR_PTR(pid);

complete(done); // 唤醒提交创建请求的线程

}

以上就是内核线程的创建机制，可见linux内核的设计是多么的优雅！0号线程(idle线程)创建特殊的1号和2号线程后，作为系统调度线程；1号线程作为系统所有用户态进程的祖先进程；2号线称作为系统的所有内核态线程的父线程。所有用户通过 kthread_create() 接口提交内核线程创建请求，统一由 2号线程 kthreadd 完成内核线程的创建。