linux初始化的时候使用 init_task 进程, 该进程是静态创建的, 别的进程都是通过该进程克隆出来的。
静态创建的含义是 current 指向 init_task 进程, 并且内核堆栈使用的是 init_task 的堆栈.
下文以linux4.16.3为例描述 init_task current 如何撑起这个艰难的任务的。
current_task定义如下,都是每个cpu一个:
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DECLARE_PER_CPU(struct task_struct *, current_task);
current_task的使用方法:
static __always_inline struct task_struct *get_current(void)
{
return this_cpu_read_stable(current_task);
}
#define current get_current()
init_task定义如下:
struct task_struct init_task
{
... ...
.stack = init_stack,
... ...
}
init_stack 为 init_task 的堆栈, 地址为
#define INIT_TASK_DATA(align) \
. = ALIGN(align); \
VMLINUX_SYMBOL(__start_init_task) = .; \
VMLINUX_SYMBOL(init_thread_union) = .; \
VMLINUX_SYMBOL(init_stack) = .; \
*(.data..init_task) \
*(.data..init_thread_info) \
. = VMLINUX_SYMBOL(__start_init_task) + THREAD_SIZE; \
VMLINUX_SYMBOL(__end_init_task) = .;
还有一个有用的宏定义
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
TOP_OF_KERNEL_STACK_PADDING)
堆栈的关联
__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
.x86_tss = {
/*
* .sp0 is only used when entering ring 0 from a lower
* privilege level. Since the init task never runs anything
* but ring 0 code, there is no need for a valid value here.
* Poison it.
*/
.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
#ifdef CONFIG_X86_64
/*
* .sp1 is cpu_current_top_of_stack. The init task never
* runs user code, but cpu_current_top_of_stack should still
* be well defined before the first context switch.
*/
.sp1 = TOP_OF_INIT_STACK, // sp1 设置堆栈
#endif
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
#endif
},
#ifdef CONFIG_X86_32
/*
* Note that the .io_bitmap member must be extra-big. This is because
* the CPU will access an additional byte beyond the end of the IO
* permission bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
内核启动 head_64.S 里面
.code64
.globl startup_64
startup_64:
... ...
/* Set up the stack for verify_cpu(), similar to initial_stack below */
leaq (__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
... ...
/* Setup a boot time stack */
movq initial_stack(%rip), %rsp
如上汇编都是将内核的堆栈设置到了init_task对应的内核堆栈上, 其中 initial_stack 的定义如下
GLOBAL(initial_stack)
/*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
* unwinder reliably detect the end of the stack.
*/
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
所以当 current 指向 init_task, 并且 init_task 的 堆栈为当前的 rsp 的时候, 就可以认为当前的执行进程为 init_task
最后看看 init_task 如何克隆别的进程
void __ref rest_init(void)
{
pid = kernel_thread(kernel_init, NULL, CLONE_FS);
=>pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
(unsigned long)arg, NULL, NULL, 0);
=>long _do_fork(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr,
unsigned long tls)
{
p = copy_process(clone_flags, stack_start, stack_size,
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
=>struct task_struct *copy_process(
unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
int trace,
unsigned long tls,
int node)
{
struct task_struct *p = dup_task_struct(current, node); // 这时 current 为 init_task, 所以克隆的是 init_task 的信息
}
}
}
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
}
内核中current实现
https://www.cnblogs.com/tsecer/p/10487596.html
一步一步学linux操作系统: 11 进程数据结构_task_struct字段_内核栈
https://blog.csdn.net/leacock1991/article/details/106977793
Linux操作系统学习笔记(五)进程的核心——task_truct
https://ty-chen.github.io/linux-kernel-task-struct/