新进程创建后，怎么执行的？

qxqxa

已于 2023-12-23 21:55:28 修改

阅读量982

点赞数 15

分类专栏： Linux内核之进程管理文章标签： linux

于 2023-12-23 21:52:59 首次发布

本文链接：https://blog.csdn.net/weixin_44586903/article/details/135174619

版权

Linux内核之进程管理专栏收录该内容

2 篇文章 0 订阅

订阅专栏

复制寄存器

上篇说到copy_process创建进程的时候，会调用copy_thread来复制一些寄存器；内核线程的话，X19存放线程函数的地址，X20存放线程函数的参数；用户线程主要时保存了用户模式的各种寄存器状态，在内核切回用户的时候，便于恢复正常状态

int copy_thread(unsigned long clone_flags, unsigned long stack_start,
		unsigned long stk_sz, struct task_struct *p)
{
	struct pt_regs *childregs = task_pt_regs(p);

	memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));

	/*
	 * In case p was allocated the same task_struct pointer as some
	 * other recently-exited task, make sure p is disassociated from
	 * any cpu that may have run that now-exited task recently.
	 * Otherwise we could erroneously skip reloading the FPSIMD
	 * registers for p.
	 */
	fpsimd_flush_task_state(p);

	if (likely(!(p->flags & PF_KTHREAD))) { //用户进程
		*childregs = *current_pt_regs(); //把用户模式的各种寄存器保存在内核栈底部的pt_regs结构中
		childregs->regs[0] = 0; //子进程的X0寄存器置0，这个存放系统调用的返回值，子进程返回0

		/*
		 * Read the current TLS pointer from tpidr_el0 as it may be
		 * out-of-sync with the saved value.
		 */
		asm("mrs %0, tpidr_el0" : "=r" (*task_user_tls(p)));

		if (stack_start) {
			if (is_compat_thread(task_thread_info(p)))
				childregs->compat_sp = stack_start;
			/* 16-byte aligned stack mandatory on AArch64 */
			else if (stack_start & 15)
				return -EINVAL;
			else
				childregs->sp = stack_start;
		}

		/*
		 * If a TLS pointer was passed to clone (4th argument), use it
		 * for the new thread.
		 */
		if (clone_flags & CLONE_SETTLS)
			p->thread.tp_value = childregs->regs[3];
	} else {//内核进程
		memset(childregs, 0, sizeof(struct pt_regs));
		childregs->pstate = PSR_MODE_EL1h;
		p->thread.cpu_context.x19 = stack_start;
		p->thread.cpu_context.x20 = stk_sz;
	}
	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
	p->thread.cpu_context.sp = (unsigned long)childregs;

	ptrace_hw_copy_thread(p);

	return 0;
}

thread_struct

struct thread_struct是CPU-specific state of this task，其中x19-x28是arm64架构规定cpu上下文切换需要保存的寄存器

struct thread_struct {
	struct cpu_context	cpu_context;	/* cpu context */
	unsigned long		tp_value;	/* TLS register */
#ifdef CONFIG_COMPAT
	unsigned long		tp2_value;
#endif
	struct fpsimd_state	fpsimd_state;
	unsigned long		fault_address;	/* fault info */
	unsigned long		fault_code;	/* ESR_EL1 value */
	struct debug_info	debug;		/* debugging */
};
struct cpu_context {
	unsigned long x19;
	unsigned long x20;
	unsigned long x21;
	unsigned long x22;
	unsigned long x23;
	unsigned long x24;
	unsigned long x25;
	unsigned long x26;
	unsigned long x27;
	unsigned long x28;
	unsigned long fp;
	unsigned long sp;
	unsigned long pc;
};

唤醒进程

然后唤醒新进程，主要是设置进程状态，将进程插入运行队列，再判断是否可以抢占当前进程，不行的就只能等调度了

/*
 * wake_up_new_task - wake up a newly created task for the first time.
 *
 * This function will do some initial scheduler statistics housekeeping
 * that must be done for every newly created context, then puts the task
 * on the runqueue and wakes it.
 */
void wake_up_new_task(struct task_struct *p)
{
	struct rq_flags rf;
	struct rq *rq;

	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
	p->state = TASK_RUNNING; //进程状态设置为TASK_RUNNING
#ifdef CONFIG_SMP
	/*
	 * Fork balancing, do it here and not earlier because:
	 *  - cpus_allowed can change in the fork path
	 *  - any previously selected CPU might disappear through hotplug
	 *
	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
	 * as we're not fully set-up yet.
	 */
	p->recent_used_cpu = task_cpu(p);
	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); //调用调度类的migrate_task_rq做负载均衡，选择负载最轻的处理器
#endif
	rq = __task_rq_lock(p, &rf); //给运行队列上锁
	update_rq_clock(rq);
	post_init_entity_util_avg(&p->se);

	activate_task(rq, p, ENQUEUE_NOCLOCK); //将新进程插入运行队列
	p->on_rq = TASK_ON_RQ_QUEUED;
	trace_sched_wakeup_new(p);
	check_preempt_curr(rq, p, WF_FORK); //检查新进程是否可以抢占当前进程
#ifdef CONFIG_SMP
	if (p->sched_class->task_woken) {
		/*
		 * Nothing relies on rq->lock after this, so its fine to
		 * drop it.
		 */
		rq_unpin_lock(rq, &rf);
		p->sched_class->task_woken(rq, p); //调用调度类的task_woken
		rq_repin_lock(rq, &rf);
	}
#endif
	task_rq_unlock(rq, p, &rf); //释放运行队列的锁
}

ret_from_fork

copy_thread中的p->thread.cpu_context.pc = (unsigned long)ret_from_fork;得出进程第一次执行的pc就是ret_from_fork，也就是不管是直接执行，还是被调度；新进程首先执行这个，当然，已经执行过了的话，这个pc，就是被打断时的PC了

/*
 * This is how we return from a fork.
 */
ENTRY(ret_from_fork)
	bl	schedule_tail //清理工作
	cbz	x19, 1f			
	mov	x0, x20
	blr	x19      //不是内核线程就执行x19存储的线程函数
1:	get_thread_info tsk //将sp_el0放入x28寄存器
	b	ret_to_user //返回用户模式
ENDPROC(ret_from_fork)

获取thread_info

get_thread_info,rd用于将系统级特权级（EL）为EL0的栈指针（SP_EL0）的值加载到目标寄存器（\rd）中

/*
 * Return the current thread_info.
 */
        .macro  get_thread_info, rd
        mrs     \rd, sp_el0
        .endm

//比如x28寄存器对应的别名为tsk
sc_nr   .req    x25             // number of system calls
scno    .req    x26             // syscall number
stbl    .req    x27             // syscall table pointer
tsk     .req    x28             // current thread_info

寄存器保存

那么sp_el0的值又是在那里赋值的？上下文切换时会调用cpu_switch_to(prev, next)，参数next就是下一个进程的进程描述符，也就是第二个参数x1，且被cpu_switch_to里赋值给了sp_el0了
处理器状态切换时将前一个进程的x19-x28，fp,sp,pc 保存到了进程描述符的cpu_contex中，然后将即将执行的进程描述符的cpu_contex的x19-x28，fp,sp,pc 恢复到相应寄存器中，而且将执行进程的进程描述符task_struct地址存放在sp_el0中，用于通过current找到当前进程，这样就完成了处理器的状态切换

/*
 * Register switch for AArch64. The callee-saved registers need to be saved
 * and restored. On entry:
 *   x0 = previous task_struct (must be preserved across the switch)
 *   x1 = next task_struct
 * Previous and next are guaranteed not to be the same.
 *
 */
ENTRY(cpu_switch_to)
        mov     x10, #THREAD_CPU_CONTEXT
        add     x8, x0, x10
        mov     x9, sp
        stp     x19, x20, [x8], #16             // store callee-saved registers
        stp     x21, x22, [x8], #16
        stp     x23, x24, [x8], #16
        stp     x25, x26, [x8], #16
        stp     x27, x28, [x8], #16
        stp     x29, x9, [x8], #16
        str     lr, [x8]
        add     x8, x1, x10
        ldp     x19, x20, [x8], #16             // restore callee-saved registers
        ldp     x21, x22, [x8], #16
        ldp     x23, x24, [x8], #16
        ldp     x25, x26, [x8], #16
        ldp     x27, x28, [x8], #16
        ldp     x29, x9, [x8], #16
        ldr     lr, [x8]
        mov     sp, x9
#ifdef CONFIG_THREAD_INFO_IN_TASK
        msr     sp_el0, x1
#else
        and     x9, x9, #~(THREAD_SIZE - 1)
        msr     sp_el0, x9
#endif
        ret
ENDPROC(cpu_switch_to)

装载程序

在用户空间，调用fork后，会陷入内核后，会从ret_from_fork返回用户模式，就会调用execve来装载要运行的程序

execve/execveat系统和调用
	do_execveat_common
		do_open_execat //打开可执行文件
		sched_exec //执行一次负载均衡
		bprm_mm_init //创建新的进程描述符
			mm_alloc
			__bprm_mm_init
		prepare_binprm //设置进程证书，读文件的前128bytes到缓冲区
		copy_strings //把文件名称，环境字符串，参数字符串压到用户栈
		exec_binprm //寻找注册过的二进制格式的处理程序，识别到即调用struct linux_binfmt->load_binary来加载程序
			search_binary_handler

装载elf程序

elf是一个二进制的格式，他也会把这个elf_binfmt注册到内核中，装载的是elf程序，就会匹配到这个linux_binfmt，并调用其中的成员load_binary

static struct linux_binfmt elf_format = {
	.module		= THIS_MODULE,
	.load_binary	= load_elf_binary,
	.load_shlib	= load_elf_library,
	.core_dump	= elf_core_dump,
	.min_coredump	= ELF_EXEC_PAGESIZE,
};

对应的就是load_elf_binary，其框架如下

最后就会调用start_thread，来设置结构体pt_regs中的pc为程序入口，sp为用户栈指针，返回到用户模式，就会按照pc，sp来正常执行程序了

static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
{
	memset(regs, 0, sizeof(*regs));
	forget_syscall(regs);
	regs->pc = pc;
}

static inline void start_thread(struct pt_regs *regs, unsigned long pc,
				unsigned long sp)
{
	start_thread_common(regs, pc);
	regs->pstate = PSR_MODE_EL0t;

	if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
		regs->pstate |= PSR_SSBS_BIT;

	regs->sp = sp;
}

装载总结

1.创建一个独立的虚拟地址空间：此处的创建虚拟地址空间并不是真正的空间，而是创建映射函数所需要的相应的数据结构，比如页表，它存着虚拟空间和物理空间的联系。

2.读取可执行文件头并且建立虚拟空间与可执行文件的映射关系：当程序发生页错误时，操作系统将从物理内存中分配一个物理页，然后将该”缺页“从磁盘中读取到内存中，再设置缺页的虚拟页和物理页的映射关系，这样程序才得以正常运行。设置缺页的虚拟页和物理页的映射关系（数据结构）是装载的核心。

3.将CPU的指令寄存器设置成可执行文件的入口地址，启动运行。

进程状态

进程从其实到结束，中间会有多个状态；对于linux的的进程状态，虽然定义了不少，很多是中间状态，和一些状态的组合

/* Used in tsk->state: */
#define TASK_RUNNING			0x0000
#define TASK_INTERRUPTIBLE		0x0001
#define TASK_UNINTERRUPTIBLE		0x0002
#define __TASK_STOPPED			0x0004
#define __TASK_TRACED			0x0008
/* Used in tsk->exit_state: */
#define EXIT_DEAD			0x0010
#define EXIT_ZOMBIE			0x0020
#define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
/* Used in tsk->state again: */
#define TASK_PARKED			0x0040
#define TASK_DEAD			0x0080
#define TASK_WAKEKILL			0x0100
#define TASK_WAKING			0x0200
#define TASK_NOLOAD			0x0400
#define TASK_NEW			0x0800
#define TASK_STATE_MAX			0x1000

/* Convenience macros for the sake of set_current_state: */
#define TASK_KILLABLE			(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
#define TASK_STOPPED			(TASK_WAKEKILL | __TASK_STOPPED)
#define TASK_TRACED			(TASK_WAKEKILL | __TASK_TRACED)

#define TASK_IDLE			(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)

/* Convenience macros for the sake of wake_up(): */
#define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)

/* get_task_state(): */
#define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
					 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
					 TASK_PARKED)

我们一般就关注这几个状态就行

几个重要的状态的转换流程如下

qxqxa

关注

15
点赞
踩
21

收藏

觉得还不错? 一键收藏
1
评论
新进程创建后，怎么执行的？

1.创建一个独立的虚拟地址空间：此处的创建虚拟地址空间并不是真正的空间，而是创建映射函数所需要的相应的数据结构，比如页表，它存着虚拟空间和物理空间的联系。2.读取可执行文件头并且建立虚拟空间与可执行文件的映射关系：当程序发生页错误时，操作系统将从物理内存中分配一个物理页，然后将该”缺页“从磁盘中读取到内存中，再设置缺页的虚拟页和物理页的映射关系，这样程序才得以正常运行。设置缺页的虚拟页和物理页的映射关系（数据结构）是装载的核心。3.将CPU的指令寄存器设置成可执行文件的入口地址，启动运行。
复制链接

扫一扫