Kernel 内核线程

最新推荐文章于 2023-10-14 14:56:39 发布

weixin_34096182

最新推荐文章于 2023-10-14 14:56:39 发布

阅读量375

点赞数

文章标签： python 数据结构与算法

原文链接：https://my.oschina.net/yepanl/blog/3050797

版权

2019独角兽企业重金招聘Python工程师标准>>>

Linux的内核线程本质上是运行在内核态的进程，没有用户态地址空间，跟所有其他线程一起共享内核态地址空间，一同参与进程调度。同软中断和tasklet相同的是，内核线程可以将任务延后执行（软中断的守护进程softirqd也是基于内核线程实现的）。不同的是，软中断和tasklet运行在中断上下文，不能耗时很长，更不能休眠，而内核线程可以执行耗时很长的任务，也能够休眠。

内核线程的底层创建接口跟创建普通进程一样，也是通过kernel_thread/fork完成的。只是内核线程在创建的过程中不会拷贝父进程的用户地址空间。这是如何实现的？我们知道，内核在初始化启动之后，会首先创建一个0号进程（也叫idle进程），该进程运行在内核地址空间，没有用户态地址空间，负责原始进程环境的初始化。那么接下来，0号进程在调用 rest_init()的过程中会创建两个特殊的系统进程：1号进程和2号进程。

1号进程（init进程）：系统中所有用户态进程的根进程。0号进程在 rest_init() 中首先调用 kernel_thread()/fork() 创建 1号进程，此时1号进程和0号进程还未分开，共享内核地址空间，并没有用户态地址空间。然后，1号进程在运行的过程中，调用 exec() 函数，加载 /sbin/init 等用户态可执行程序并执行，1号进程开始拥有自己的用户态地址空间。这样，在后续系统运行过程中，每个用户态进程都从1号进程直接或间接 fork 出来，1号进程称为所有用户态进程的根进程。

2号进程（kthreadd）：系统中所有内核线程的父进程。0号进程在 rest_init(）中创建完1号进程之后，同样通过 kernel_thread()/fork() 创建 2号进程，只不过2号进程的执行函数是 kthreadd()，该函数不会调用任何 exec() 函数。因此，2号进程只有内核态地址空间，没有用户态地址空间。2号进程负责系统内其他所有内核线程的创建，那么所有内核线程都是只有内核态地址空间，没有用户态地址空间。

1，2号进程kthreadd的创建：

static noinline void __init_refok rest_init(void)
{
int pid;

   rcu_scheduler_starting();
   smpboot_thread_init();
   /*
   * We need to spawn init first so that it obtains pid 1, however
   * the init task will end up wanting to create kthreads, which, if
   * we schedule it before we create kthreadd, will OOPS.
   */
   kernel_thread(kernel_init, NULL, CLONE_FS); // 创建1号进程
   numa_default_policy();
   pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); // 创建2号进程
   rcu_read_lock();
   kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
   rcu_read_unlock();
   complete(&kthreadd_done);

   /*
   * The boot idle thread must execute schedule()
   * at least once to get things moving:
   */
   init_idle_bootup_task(current);
   schedule_preempt_disabled();
   /* Call into cpu_idle with preempt disabled */
   cpu_startup_entry(CPUHP_ONLINE);
}

pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, // kernel_thread()本质上直接调用 _do_fork()，用于底层进程的创建。
(unsigned long)arg, NULL, NULL, 0);
}

_do_fork() -> copy_process() -> copy_mm() // _do_fork()最终会调用 copy_mm()拷贝父进程地址空间

static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
{
struct mm_struct *mm, *oldmm;
int retval;

   tsk->min_flt = tsk->maj_flt = 0;
   tsk->nvcsw = tsk->nivcsw = 0;
#ifdef CONFIG_DETECT_HUNG_TASK
   tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
#endif

tsk->mm = NULL;
tsk->active_mm = NULL;

   /*
   * Are we cloning a kernel thread?
   *
   * We need to steal a active VM for that..
   */

// 如果父进程是2号进程，那么无需拷贝父进程用户态地址空间，直接返回。如果是1号线程，继续往下，拷贝父进程用户态地址空间。
   oldmm = current->mm;
   if (!oldmm)
       return 0;

/* initialize the new vmacache entries */
vmacache_flush(tsk);

   if (clone_flags & CLONE_VM) {
       atomic_inc(&oldmm->mm_users);
       mm = oldmm;
       goto good_mm;
   }

   retval = -ENOMEM;
   mm = dup_mm(tsk);
   if (!mm)
       goto fail_nomem;

good_mm:
   tsk->mm = mm;
   tsk->active_mm = mm;
   return 0;

fail_nomem:
return retval;
}

2，内核线程的创建过程：

所有的内核线程都是通过2号进程 kthreadd 创建出来的，那么用户接口又是什么呢？内核设计了如下内核线程的创建框架：内核线程的创建是异步执行的，提供一个系统范围的创建请求链表，其他内核模块或线程提交请求到链表，由2号进程程kthreadd统一处理链表的所有请求。这样实际的创建都由2号进程 kthreadd 来完成。

（1）内核线程创建请求数据结构 struct kthread_create_info：

struct kthread_create_info
{
   /* Information passed to kthread() from kthreadd. */
   int (*threadfn)(void *data); // 内核线程执行函数
   void *data;      // 执行函数参数
   int node;

   /* Result passed back to kthread_create() from kthreadd. */
   struct task_struct *result; // 异步执行结果
   struct completion *done; // 用于创建结束，唤醒创建者

struct list_head list; // 链表指针
};

static LIST_HEAD(kthread_create_list); // 全局的创建请求链表

（2）内核线程创建用户接口 kthread_create：

#define kthread_create(threadfn, data, namefmt, arg...) \
kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg)

struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
                   void *data, int node,
                   const char namefmt[],
                   ...)
{
   DECLARE_COMPLETION_ONSTACK(done); // 完成量，用于等待异步创建完成，由kthreadd唤醒
   struct task_struct *task;
   struct kthread_create_info *create = kmalloc(sizeof(*create),
                       GFP_KERNEL); // 分配创建请求

   if (!create)
       return ERR_PTR(-ENOMEM);
   create->threadfn = threadfn;
   create->data = data;
   create->node = node;
   create->done = &done;

   spin_lock(&kthread_create_lock);
   list_add_tail(&create->list, &kthread_create_list); // 加入全局创建链表
   spin_unlock(&kthread_create_lock);

   wake_up_process(kthreadd_task); // 唤醒 kthreadd 线程执行实际的创建
   /*
   * Wait for completion in killable state, for I might be chosen by
   * the OOM killer while kthreadd is trying to allocate memory for
   * new kernel thread.
   */
   if (unlikely(wait_for_completion_killable(&done))) {
       /*
       * If I was SIGKILLed before kthreadd (or new kernel thread)
       * calls complete(), leave the cleanup of this structure to
       * that thread.
       */
       if (xchg(&create->done, NULL))
           return ERR_PTR(-EINTR);
       /*
       * kthreadd (or new kernel thread) will call complete()
       * shortly.
       */
       wait_for_completion(&done); // 睡眠在完成量上，等待 kthreadd 唤醒
   }
   task = create->result; // 已经被唤醒，接下来检查内核线程创建结果
   if (!IS_ERR(task)) {
       static const struct sched_param param = { .sched_priority = 0 };
       va_list args;

       va_start(args, namefmt);
       vsnprintf(task->comm, sizeof(task->comm), namefmt, args);
       va_end(args);
       /*
       * root may have changed our (kthreadd's) priority or CPU mask.
       * The kernel thread should not inherit these properties.
       */
       sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
       set_cpus_allowed_ptr(task, cpu_all_mask);
   }
   kfree(create);
   return task;
}
（3）内核线程的实际创建 kthreadd:

int kthreadd(void *unused)
{
struct task_struct *tsk = current;

   /* Setup a clean context for our children to inherit. */
   set_task_comm(tsk, "kthreadd");
   ignore_signals(tsk);
   set_cpus_allowed_ptr(tsk, cpu_all_mask);
   set_mems_allowed(node_states[N_MEMORY]);

current->flags |= PF_NOFREEZE;

   for (;;) { // kthreadd是个守护进程，循环轮询创建链表
       set_current_state(TASK_INTERRUPTIBLE);
       if (list_empty(&kthread_create_list))
           schedule(); // 如果此时链表为空，放弃处理器，调度其他线程运行
       __set_current_state(TASK_RUNNING);

       spin_lock(&kthread_create_lock);
       while (!list_empty(&kthread_create_list)) { // 创建链表不空，有新的内核线程等待创建
           struct kthread_create_info *create;

           create = list_entry(kthread_create_list.next,
                   struct kthread_create_info, list);
           list_del_init(&create->list); // 从创建链表中取出一个请求
           spin_unlock(&kthread_create_lock);

create_kthread(create); // 调用 create_kthread() 执行真正的线程创建

           spin_lock(&kthread_create_lock);
       }
       spin_unlock(&kthread_create_lock);
   }

return 0;
}

static void create_kthread(struct kthread_create_info *create)
{
int pid;

#ifdef CONFIG_NUMA
   current->pref_node_fork = create->node;
#endif
   /* We want our own signal handler (we take no signals by default). */
   pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); // 最终调用 kernel_thread 创建新的进程
   if (pid < 0) {
       /* If user was SIGKILLed, I release the structure. */
       struct completion *done = xchg(&create->done, NULL);

       if (!done) {
           kfree(create);
           return;
       }
       create->result = ERR_PTR(pid);
       complete(done); // 唤醒提交创建请求的线程
   }
}

以上就是内核线程的创建机制，可见linux内核的设计是多么的优雅！0号线程（idle线程）创建特殊的1号和2号线程后，作为系统调度线程；1号线程作为系统所有用户态进程的祖先进程；2号线称作为系统的所有内核态线程的父线程。所有用户通过 kthread_create() 接口提交内核线程创建请求，统一由 2号线程 kthreadd 完成内核线程的创建。

转载于:https://my.oschina.net/yepanl/blog/3050797