linux内核启动分析(一)

最近工作中经常使用飞腾E2000的开发版,也遇到一些启动问题,所以追踪了一下linux内核启动流程。麒麟的代码我们看不了,但是我们可以直接看飞腾的开源内核代码,点击这里可以跳到gitee,我们使用的是5.10分支。

看这个文章需要的前置汇编知识点:

  1. .quad表示定义一个4字节的变量。.long表示定义一个8字节的变量。
  2. SYM_CODE_START表示定义一个函数。定义了之后可以通过bl或者b跳转到这个函数。

有一些指令不知道什么回事,可以看下一篇文章:看linux内核启动流程需要的汇编指令解释

飞腾E2000的开发版可以使用uboot和UEFI,无论是uboot还是UEFI加载linux内核,并且启动linux内核都是从arch/arm64/kernel/head.S文件的_head这里开始运行的。下面我们来开始分析吧:

1.HEAD

	__HEAD
_head:
	/*
	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.
	 */
#ifdef CONFIG_EFI
	/*
	 * This add instruction has no meaningful effect except that
	 * its opcode forms the magic "MZ" signature required by UEFI.
	 */
	add	x13, x18, #0x16
	b	primary_entry
#else
	b	primary_entry			// branch to kernel start, magic
	.long	0				// reserved
#endif
	.quad	0				// Image load offset from start of RAM, little-endian
	le64sym	_kernel_size_le			// Effective size of kernel image, little-endian
	le64sym	_kernel_flags_le		// Informative flags, little-endian
	.quad	0				// reserved
	.quad	0				// reserved
	.quad	0				// reserved
	.ascii	ARM64_IMAGE_MAGIC		// Magic number
#ifdef CONFIG_EFI
	.long	pe_header - _head		// Offset to the PE header.

pe_header:
	__EFI_PE_HEADER
#else
	.long	0				// reserved
#endif

在_head里面只跑一个函数,就是primary_entry:

SYM_CODE_START(primary_entry)
	bl	preserve_boot_args		//保留引导加载程序中传递的参数到boot_args中
	bl	el2_setup				// 判断目前是EL1还是EL2,
									//如果是EL1就简单了,配置sctlr_el1寄存器就好了。
									//如果是EL2就复杂了,需要配置sctlr_el2寄存器,配置内存,hcr,gic
	adrp	x23, __PHYS_OFFSET
	and	x23, x23, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
	bl	set_cpu_boot_mode_flag		//把其他cpu都配置成跟cpu0同样的特权等级
	bl	__create_page_tables		//创建页表
	/*
	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
	 * details.
	 * On return, the CPU will be ready for the MMU to be turned on and
	 * the TCR will have been set.
	 */
	bl	__cpu_setup			// 初始化处理器以打开MMU。
	b	__primary_switch	//设置TTBR0和TTBR1,使能MMU,将kernel image重定位,跳转到__primary_switched

SYM_CODE_END(primary_entry)

primary_entry主要执行了以下几个步骤:

  1. 调用函数preserve_boot_args保留引导加载程序中传递的参数到boot_args中
  2. 调用函数el2_setup判断目前是EL1还是EL2,如果是EL1就简单了,配置sctlr_el1寄存器就好了;如果是EL2就复杂了,需要配置sctlr_el2寄存器,配置内存,hcr,gic。
  3. 调用函数set_cpu_boot_mode_flag把其他cpu都配置成跟cpu0同样的特权等级
  4. 调用函数__create_page_tables创建页表
  5. 调用函数__cpu_setup初始化处理器以打开MMU
  6. 调用函数__primary_switch设置TTBR0和TTBR1,使能MMU,将kernel image重定位,跳转到__primary_switched。

1.preserve_boot_args

SYM_CODE_START_LOCAL(preserve_boot_args)
	mov	x21, x0				// x21=FDT,将FDT的地址暂存在x21寄存器中,释放出x0以便后续做临时变量使用

	adr_l	x0, boot_args			// x0保存了boot_args变量的地址
	stp	x21, x1, [x0]			// 保存x0和x1的值到boot_args[0]和boot_args[1]
	stp	x2, x3, [x0, #16]		// 保存x2和x3的值到boot_args[2]和boot_args[3]

	dmb	sy				// needed before dc ivac with
						// MMU off

	mov	x1, #0x20			// 4 x 8 bytes
	b	__inval_dcache_area		// 让[boot_args,boot_args+#0x20]的内存数据缓存失效
SYM_CODE_END(preserve_boot_args)

1.1 __inval_dcache_area

//__inval_dcache_area(kaddr, size)
SYM_FUNC_START_PI(__inval_dcache_area)
	/* FALLTHROUGH */

/*
 *	__dma_inv_area(start, size)
 *	- start   - virtual start address of region
 *	- size    - size in question
 */
	add	x1, x1, x0			//X1存放kaddr+size
	dcache_line_size x2, x3
	sub	x3, x2, #1
	tst	x1, x3				// end cache line aligned?
	bic	x1, x1, x3
	b.eq	1f
	dc	civac, x1			// clean & invalidate D / U line
1:	tst	x0, x3				// start cache line aligned?
	bic	x0, x0, x3
	b.eq	2f
	dc	civac, x0			// clean & invalidate D / U line
	b	3f
2:	dc	ivac, x0			// invalidate D / U line
3:	add	x0, x0, x2
	cmp	x0, x1
	b.lo	2b
	dsb	sy
	ret
SYM_FUNC_END_PI(__inval_dcache_area)

2.el2_setup

/*
 * If we're fortunate enough to boot at EL2, ensure that the world is
 * sane before dropping to EL1.
 *
 * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
 * booted in EL1 or EL2 respectively.
 */
SYM_FUNC_START(el2_setup)
	msr	SPsel, #1			//往SPsel写1,说明使用SP_ELx
	mrs	x0, CurrentEL		//获取当前特权等级
	cmp	x0, #CurrentEL_EL2	//看看是不是特权等级是否为EL2
	b.eq	1f				//如果是,就跳转到1f
	mov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)//
	msr	sctlr_el1, x0		//配置EL1的系统控制寄存器
	mov	w0, #BOOT_CPU_MODE_EL1		// 返回值存在w0寄存器中
	isb								//内存屏障
	ret								//返回

	//这里说明当前等级是EL2
1:	mov_q	x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
	msr	sctlr_el2, x0		//配置EL2的系统控制寄存器

#ifdef CONFIG_ARM64_VHE
	/*
	 * Check for VHE being present. For the rest of the EL2 setup,
	 * x2 being non-zero indicates that we do have VHE, and that the
	 * kernel is intended to run at EL2.
	 */
	mrs	x2, id_aa64mmfr1_el1	//配置内存模式寄存器
	ubfx	x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4	//把虚拟机扩展支持位提取出来
#else
	mov	x2, xzr
#endif

	/* Hyp configuration. *///Hypervisor配置寄存器
	mov_q	x0, HCR_HOST_NVHE_FLAGS	//访问到EL2的指令转发到未定义指令
	cbz	x2, set_hcr		//x2为0(不支持虚拟机扩展,也就是传统分裂模式)则跳转到set_hcr
	mov_q	x0, HCR_HOST_VHE_FLAGS	//设置中断路由到EL2、启动EL2设施、
set_hcr://虚拟机扩展模式
	msr	hcr_el2, x0		//写入hcr_el2
	isb					//内存屏障

	/*
	 * Allow Non-secure EL1 and EL0 to access physical timer and counter.
	 * This is not necessary for VHE, since the host kernel runs in EL2,
	 * and EL0 accesses are configured in the later stage of boot process.
	 * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
	 * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
	 * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
	 * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
	 * EL2.
	 */
	cbnz	x2, 1f			//x2为0(不支持虚拟机扩展)则跳转到1f
	mrs	x0, cnthctl_el2		//读取Hypervisor控制的计数寄存器
	orr	x0, x0, #3			// Enable EL1 physical timers
	msr	cnthctl_el2, x0
1:
	msr	cntvoff_el2, xzr		// 物理计数器和虚拟计数器一致,不偏移

#ifdef CONFIG_ARM_GIC_V3
	/* GICv3 system register access */
	mrs	x0, id_aa64pfr0_el1		//读取处理器特性寄存器
	ubfx	x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
	cbz	x0, 3f		//如果不是gic3或者4.0。跳转到3f

	//说明gic版本为3.0或者4.0
	mrs_s	x0, SYS_ICC_SRE_EL2		//读取中断控制器启用寄存器
	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
	msr_s	SYS_ICC_SRE_EL2, x0
	isb					// Make sure SRE is now set
	mrs_s	x0, SYS_ICC_SRE_EL2		// Read SRE back,
	tbz	x0, #0, 3f			// and check that it sticks
	msr_s	SYS_ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults

3:
#endif

	/* Populate ID registers. */
	//填充虚拟机ID寄存器
	mrs	x0, midr_el1
	mrs	x1, mpidr_el1
	msr	vpidr_el2, x0	//虚拟化处理器ID寄存器
	msr	vmpidr_el2, x1	//虚拟化多处理器ID寄存器

#ifdef CONFIG_COMPAT
	msr	hstr_el2, xzr			// Disable CP15 traps to EL2
#endif

	/* EL2 debug */	
	mrs	x1, id_aa64dfr0_el1		//读取AArch64调试特性寄存器
	sbfx	x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
	cmp	x0, #1
	b.lt	4f				// Skip if no PMU present
	mrs	x0, pmcr_el0			//读取性能监视器控制寄存器
	ubfx	x0, x0, #11, #5			//允许EL2访问性能监视器控制寄存器
4:
	csel	x3, xzr, x0, lt			// all PMU counters from EL1

	/* Statistical profiling */
	ubfx	x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
	cbz	x0, 7f				// Skip if SPE not present
	cbnz	x2, 6f				// VHE?
	mrs_s	x4, SYS_PMBIDR_EL1		// If SPE available at EL2,找不到该寄存器
	and	x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
	cbnz	x4, 5f				// then permit sampling of physical
	mov	x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
		      1 << SYS_PMSCR_EL2_PA_SHIFT)
	msr_s	SYS_PMSCR_EL2, x4		// addresses and physical counter
5:
	mov	x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
	orr	x3, x3, x1			// If we don't have VHE, then
	b	7f				// use EL1&0 translation.
6:						// For VHE, use EL2 translation
	orr	x3, x3, #MDCR_EL2_TPMS		// and disable access from EL1
7:
	msr	mdcr_el2, x3			// Configure debug traps

	/* LORegions */
	mrs	x1, id_aa64mmfr1_el1	//AArch64内存模型特征寄存器
	ubfx	x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
	cbz	x0, 1f
	msr_s	SYS_LORC_EL1, xzr
1:

	/* Stage-2 translation */
	msr	vttbr_el2, xzr		//虚拟化转换表基寄存器

	cbz	x2, install_el2_stub

	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
	isb
	ret

SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)

3. set_cpu_boot_mode_flag

SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
	adr_l	x1, __boot_cpu_mode	//把__boot_cpu_mode地址赋值给x1
	cmp	w0, #BOOT_CPU_MODE_EL2	//如果当前cpu处于EL2
	b.ne	1f					//跳转到1
	add	x1, x1, #4				//当前cpu在EL1,使用__boot_cpu_mode[1]
	//当前cpu在EL2,使用__boot_cpu_mode[0]
1:	str	w0, [x1]			//将w0写入__boot_cpu_mode
	dmb	sy
	dc	ivac, x1			// Invalidate potentially stale cache line
	ret
SYM_FUNC_END(set_cpu_boot_mode_flag)

set_cpu_boot_mode_flag主要是根据cpu当前的特权等级,把w0寄存器,也就是当前模式记录在__boot_cpu_mode中。

4. __create_page_tables

SYM_FUNC_START_LOCAL(__create_page_tables)
	mov	x28, lr	//lr是连接寄存器

	/*
	 * Invalidate the init page tables to avoid potential dirty cache lines
	 * being evicted. Other page tables are allocated in rodata as part of
	 * the kernel image, and thus are clean to the PoC per the boot
	 * protocol.
	 */
	adrp	x0, init_pg_dir	//获取内核init页表的基地址
	adrp	x1, init_pg_end	//获取内核init页表的基地址
	sub	x1, x1, x0
	bl	__inval_dcache_area	//清除Dcache

	/*
	 * Clear the init page tables.
	 */
	//把init_pg_dir到init_pg_end这段内存清零
	//也就是把内核页表清零
	adrp	x0, init_pg_dir
	adrp	x1, init_pg_end
	sub	x1, x1, x0
1:	stp	xzr, xzr, [x0], #16	//把0写入以x0为地址的内存中,然后x0自增16
	stp	xzr, xzr, [x0], #16
	stp	xzr, xzr, [x0], #16
	stp	xzr, xzr, [x0], #16
	subs	x1, x1, #64
	b.ne	1b

	mov	x7, SWAPPER_MM_MMUFLAGS

	/*
	 * Create the identity mapping.
	 */
	//创建恒等映射,也就是虚拟地址和物理地址相同
	adrp	x0, idmap_pg_dir	//恒等映射的页全局目录的起始地址
	adrp	x3, __idmap_text_start		// 恒等映射代码节的起始地址

#ifdef CONFIG_ARM64_VA_BITS_52	//不支持,不用看
	mrs_s	x6, SYS_ID_AA64MMFR2_EL1
	and	x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
	mov	x5, #52
	cbnz	x6, 1f
#endif
	mov	x5, #VA_BITS_MIN	//虚拟地址位数
1:
	adr_l	x6, vabits_actual	//获取PC到vabits_actual的相对偏移地址
	str	x5, [x6]		//定位PC的虚拟地址
	dmb	sy
	dc	ivac, x6		// 使x6所在的dcache失效

	/*
	 * VA_BITS may be too small to allow for an ID mapping to be created
	 * that covers system RAM if that is located sufficiently high in the
	 * physical address space. So for the ID map, use an extended virtual
	 * range in that case, and configure an additional translation level
	 * if needed.
	 *
	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
	 * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
	 * this number conveniently equals the number of leading zeroes in
	 * the physical address of __idmap_text_end.
	 */
	//T0SZ决定了输出的物理地址位数,这里查看其是否足够覆盖物理地址
	adrp	x5, __idmap_text_end	//获取__idmap_text_end的页基地址
	clz	x5, x5	//计算x5高位0的个数
	cmp	x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
	b.ge	1f			// .. then skip VA range extension

	adr_l	x6, idmap_t0sz	//计算idmap_t0sz的页内偏移
	str	x5, [x6]		//把x5的数据写入以x6为地址的内存中
	dmb	sy
	dc	ivac, x6		// Invalidate potentially stale cache line

#if (VA_BITS < 48)
#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
#define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))

	/*
	 * If VA_BITS < 48, we have to configure an additional table level.
	 * First, we have to verify our assumption that the current value of
	 * VA_BITS was chosen such that all translation levels are fully
	 * utilised, and that lowering T0SZ will always result in an additional
	 * translation level to be configured.
	 */
#if VA_BITS != EXTRA_SHIFT
#error "Mismatch between VA_BITS and page size/number of translation levels"
#endif

	mov	x4, EXTRA_PTRS
	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6	//配置额外的页表
#else
	/*
	 * If VA_BITS == 48, we don't have to configure an additional
	 * translation level, but the top-level table has more entries.
	 */
	mov	x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
	str_l	x4, idmap_ptrs_per_pgd, x5
#endif
1:
	ldr_l	x4, idmap_ptrs_per_pgd	//取idmap_ptrs_per_pgd的页内偏移到x4中
	mov	x5, x3				// __pa(__idmap_text_start)
	adr_l	x6, __idmap_text_end		// __pa(__idmap_text_end)

	//为指定的虚拟地址范围映射内存
	map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14	//映射,写入页表

	/*
	 * Map the kernel image (starting with PHYS_OFFSET).
	 */
	//内核镜像映射
	adrp	x0, init_pg_dir			//页表基地址
	mov_q	x5, KIMAGE_VADDR		// 代码段的虚拟地址
	add	x5, x5, x23			// add KASLR displacement
	mov	x4, PTRS_PER_PGD	//PGD表项的数量
	adrp	x6, _end			// 代码段的物理地址末端
	adrp	x3, _text			// 代码段的物理地址起始位置
	sub	x6, x6, x3			// 代码段长度
	add	x6, x6, x5			// 代码段虚拟地址末端

	map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14	//创建内核镜像的映射关系

	/*
	 * Since the page tables have been populated with non-cacheable
	 * accesses (MMU disabled), invalidate those tables again to
	 * remove any speculatively loaded cache lines.
	 */
	dmb	sy

	adrp	x0, idmap_pg_dir
	adrp	x1, idmap_pg_end
	sub	x1, x1, x0
	bl	__inval_dcache_area	//使dcache失效

	adrp	x0, init_pg_dir
	adrp	x1, init_pg_end
	sub	x1, x1, x0
	bl	__inval_dcache_area	//使dcache失效

	ret	x28	//返回
SYM_FUNC_END(__create_page_tables)

__create_page_tables主要执行了一下几个步骤:

  1. mov x28, lr保存返回的地址
  2. 清除init页表的dcache
  3. 循环使用stp把init_pg_dir到init_pg_end这段内存写0
  4. 创建恒等映射,使得虚拟地址和物理地址相同
  5. 创建内核镜像的映射
  6. 使这两个页表的dcache失效

注意:
恒等映射将idmap_pg_dir页表对应的物理空间为__idmap_text_start 到__idmap_text_end,也就是代码段的范围。粗粒度内核页表将 init_pg_dir 地址保存到ttbr1_el1 ;init_pg_dir页表对应的物理空间为_text 到_end,也就是内核镜像代码段。这两个页表后面会在paging_init之后丢弃。

4.1map_memory

我们看看map_memory是怎么创建填写也页表的:

	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
	sub \vend, \vend, #1	//虚拟地址减一
	add \rtbl, \tbl, #PAGE_SIZE	//第一级页表项的地址,是页全局基地址的下一页
	mov \sv, \rtbl
	mov \count, #0
	//compute_indices是用来计算vstart和vend对应的 pgtable level的index的,两者之差保存在count中
	compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
	//populate_entries最终建立指向下一级的映射或者last level映射
	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
	mov \tbl, \sv
	mov \sv, \rtbl

#if SWAPPER_PGTABLE_LEVELS > 3
	compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
	mov \tbl, \sv
	mov \sv, \rtbl
#endif

#if SWAPPER_PGTABLE_LEVELS > 2
	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
	mov \tbl, \sv
#endif

	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
	bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
	populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
	.endm

其中主要函数有两个:

  1. compute_indices:它是用来计算 vstart 和 vend 对应的 pgtable level 的 index 的,两者之差保存在 count 中;
  2. populate_entries:最终建立指向下一级的映射或者 last level 映射。

5. __cpu_setup

SYM_FUNC_START(__cpu_setup)
	tlbi	vmalle1				// 使本地TLB失效
	dsb	nsh

	mov	x1, #3 << 20		//x1=0x300000
	msr	cpacr_el1, x1			// 使能EL1和EL0执行 FP/ASIMD指令
	mov	x1, #1 << 12			// Reset mdscr_el1 and disable
	msr	mdscr_el1, x1			//对AArch64 DCC寄存器的L0访问被捕获
	isb					// Unmask debug exceptions now,
	enable_dbg				// since this is per-cpu
	reset_pmuserenr_el0 x1			// Disable PMU access from EL0
	reset_amuserenr_el0 x1			// Disable AMU access from EL0

	/*
	 * Memory region attributes
	 */
	mov_q	x5, MAIR_EL1_SET	//设置nGnRnE等内存属性
#ifdef CONFIG_ARM64_MTE		//如果使能内存标签扩展支持
	/*
	 * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported
	 * (ID_AA64PFR1_EL1[11:8] > 1).
	 */
	mrs	x10, ID_AA64PFR1_EL1
	ubfx	x10, x10, #ID_AA64PFR1_MTE_SHIFT, #4
	cmp	x10, #ID_AA64PFR1_MTE
	b.lt	1f

	/* Normal Tagged memory type at the corresponding MAIR index */
	mov	x10, #MAIR_ATTR_NORMAL_TAGGED
	bfi	x5, x10, #(8 *  MT_NORMAL_TAGGED), #8

	/* initialize GCR_EL1: all non-zero tags excluded by default */
	mov	x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
	msr_s	SYS_GCR_EL1, x10

	/*
	 * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
	 * RGSR_EL1.SEED must be non-zero for IRG to produce
	 * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
	 * must initialize it.
	 */
	mrs	x10, CNTVCT_EL0
	ands	x10, x10, #SYS_RGSR_EL1_SEED_MASK
	csinc	x10, x10, xzr, ne
	lsl	x10, x10, #SYS_RGSR_EL1_SEED_SHIFT
	msr_s	SYS_RGSR_EL1, x10

	/* clear any pending tag check faults in TFSR*_EL1 */
	msr_s	SYS_TFSR_EL1, xzr
	msr_s	SYS_TFSRE0_EL1, xzr
1:
#endif
	msr	mair_el1, x5	//对内存的8个区域写入属性
	/*
	 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
	 * both user and kernel.
	 */
	//准备TCR
	mov_q	x10, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
			TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
			TCR_TBI0 | TCR_A1 | TCR_KASAN_FLAGS
	tcr_clear_errata_bits x10, x9, x5	//清除该CPU上触发勘误表的TCR位。

#ifdef CONFIG_ARM64_VA_BITS_52
	ldr_l		x9, vabits_actual
	sub		x9, xzr, x9
	add		x9, x9, #64
	tcr_set_t1sz	x10, x9
#else
	ldr_l		x9, idmap_t0sz	//读取idmap_t0sz
#endif
	tcr_set_t0sz	x10, x9	//跟新t0sz,这样我们就可以加载ID映射

	/*
	 * Set the IPS bits in TCR_EL1.
	 */
	tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6		//设置TCR.IPS到最高支持
#ifdef CONFIG_ARM64_HW_AFDBM	//如果支持Access和Dirty页面标志的硬件更新
	/*
	 * Enable hardware update of the Access Flags bit.
	 * Hardware dirty bit management is enabled later,
	 * via capabilities.
	 */
	mrs	x9, ID_AA64MMFR1_EL1
	and	x9, x9, #0xf
	cbz	x9, 1f		//如果cpu允许硬件访问标志更新功能
	orr	x10, x10, #TCR_HA		// 设置硬件访问标志更新功能
1:
#endif	/* CONFIG_ARM64_HW_AFDBM */
	msr	tcr_el1, x10		//写入tcr_el1
	/*
	 * Prepare SCTLR
	 */
	mov_q	x0, SCTLR_EL1_SET
	ret					// return to head.S
SYM_FUNC_END(__cpu_setup)

__cpu_setup执行步骤如下:

  1. tlbi vmalle1 使本地TLB失效
  2. 使能EL1和EL0执行 FP/ASIMD指令
  3. 允许AArch64 DCC寄存器的L0访问被捕获
  4. 禁止从EL0访问PMU和AMU
  5. 给内存的8个region设置上DEVICE_nGnRnE,DEVICE_nGnRE,DEVICE_GRE,NORMAL_NC,NORMAL,NORMAL_WT,NORMAL这8个属性。
  6. 清除该CPU上触发勘误表的TCR位
  7. 跟新t0sz,这样我们就可以加载ID映射
  8. 设置硬件访问标志更新功能

6. __primary_switch

SYM_FUNC_START_LOCAL(__primary_switch)
#ifdef CONFIG_RANDOMIZE_BASE
	mov	x19, x0				// 保留新的SCTLR_EL1值
	mrs	x20, sctlr_el1			// 保留旧的SCTLR EL1值
#endif

	adrp	x1, init_pg_dir		//获取init_pg_dir的页表基地址
	bl	__enable_mmu			//开启mmu
#ifdef CONFIG_RELOCATABLE
#ifdef CONFIG_RELR
	mov	x24, #0				// no RELR displacement yet
#endif
	bl	__relocate_kernel
#ifdef CONFIG_RANDOMIZE_BASE	//我们没开,不看
	ldr	x8, =__primary_switched	//把__primary_switched的内容放入x8
	adrp	x0, __PHYS_OFFSET	//获取内核代码段的页表基地址
	blr	x8	//跳转到__primary_switched运行,返回的时候返回下一个指令

	/*
	 * If we return here, we have a KASLR displacement in x23 which we need
	 * to take into account by discarding the current kernel mapping and
	 * creating a new one.
	 */
	pre_disable_mmu_workaround
	msr	sctlr_el1, x20			// disable the MMU
	isb
	bl	__create_page_tables		// recreate kernel mapping

	tlbi	vmalle1				// Remove any stale TLB entries
	dsb	nsh
	isb

	msr	sctlr_el1, x19			// re-enable the MMU
	isb
	ic	iallu				// flush instructions fetched
	dsb	nsh				// via old mapping
	isb

	bl	__relocate_kernel
#endif
#endif
	ldr	x8, =__primary_switched	//把__primary_switched的内容放入x8
	adrp	x0, __PHYS_OFFSET	//获取内核代码段的页表基地址
	br	x8						//跳转到__primary_switched,并且不返回
SYM_FUNC_END(__primary_switch)

__primary_switch执行步骤如下:

  1. 获取init_pg_dir的页表基地址
  2. 调用函数__enable_mmu开启mmu
  3. 调用函数__primary_switched,并且不再返回

6.1 __enable_mmu

SYM_FUNC_START(__enable_mmu)
	mrs	x2, ID_AA64MMFR0_EL1	//读取内存模型特征寄存器
	ubfx	x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4	//提取28到31这4位
	cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	//如果支持4k页
	b.lt    __no_granule_support		//卡死
	cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	//如果不支持4k页
	b.gt    __no_granule_support		//卡死
	//只有4KB粒度支持52位输入输出地址
	update_early_cpu_boot_status 0, x2, x3	//启动中的CPU更新失败状态
	adrp	x2, idmap_pg_dir	//读取内核页全局目录页表到x2
	phys_to_ttbr x1, x1
	phys_to_ttbr x2, x2
	msr	ttbr0_el1, x2			//内核页全局目录页表写入ttbr0_el1
	offset_ttbr1 x1, x3
	msr	ttbr1_el1, x1			//内核镜像的init目录页表写入ttbr1_el1
	isb
	msr	sctlr_el1, x0	//写入sctlr_el1寄存器
	isb
	/*
	 * Invalidate the local I-cache so that any instructions fetched
	 * speculatively from the PoC are discarded, since they may have
	 * been dynamically patched at the PoU.
	 */
	ic	iallu	//icache失效
	dsb	nsh		//内存屏障
	isb
	ret
SYM_FUNC_END(__enable_mmu)

__enable_mmu执行步骤如下:

  1. 读取内存模型特征寄存器,判断是否支持我们内核设置的页大小,现在我们内核设置的页大小是4k,根据读取内存模型特征寄存器的值判断这个cpu是否支持4k页
  2. 启动中的CPU更新失败状态
  3. 设置ttbr0_el1和ttbr1_el1寄存器
  4. icache失效和内存屏障

6.2 __primary_switched

SYM_FUNC_START_LOCAL(__primary_switched)
	adrp	x4, init_thread_union	//init_thread_union地址保存在x4中,它存放了init进程结构体
	add	sp, x4, #THREAD_SIZE	//设置sp指针为init_thread_union偏移THREAD_SIZE
	adr_l	x5, init_task		//init_task地址保存在x5
	msr	sp_el0, x5			//保存当前进程描述符到sp_el0,使用用户态的堆栈,说明是用户态程序

#ifdef CONFIG_ARM64_PTR_AUTH
	__ptrauth_keys_init_cpu	x5, x6, x7, x8
#endif

	adr_l	x8, vectors			// 读取vectors的地址
	msr	vbar_el1, x8			// 设置异常向量表
	isb

	stp	xzr, x30, [sp, #-16]!	//把将xzr和保存在x30中的链接地址入栈
	mov	x29, sp		//将栈指针保存到x29

#ifdef CONFIG_SHADOW_CALL_STACK
	adr_l	scs_sp, init_shadow_call_stack	// Set shadow call stack
#endif

	str_l	x21, __fdt_pointer, x5		//将FDT地址保存到__fdt_pointer变量

	ldr_l	x4, kimage_vaddr		// Save the offset between
	sub	x4, x4, x0			// the kernel virtual and
	str_l	x4, kimage_voffset, x5		//将kimage的虚拟地址和物理地址的偏移保存到kimage_voffset中

	// Clear BSS
	adr_l	x0, __bss_start
	mov	x1, xzr
	adr_l	x2, __bss_stop
	sub	x2, x2, x0
	bl	__pi_memset		//清理bss段数据
	dsb	ishst				// Make zero page visible to PTW

#ifdef CONFIG_KASAN
	bl	kasan_early_init
#endif
#ifdef CONFIG_RANDOMIZE_BASE
	tst	x23, ~(MIN_KIMG_ALIGN - 1)	// already running randomized?
	b.ne	0f
	mov	x0, x21				// pass FDT address in x0
	bl	kaslr_early_init		// parse FDT for KASLR options
	cbz	x0, 0f				// KASLR disabled? just proceed
	orr	x23, x23, x0			// record KASLR offset
	ldp	x29, x30, [sp], #16		// we must enable KASLR, return
	ret					// to __primary_switch()
0:
#endif
	add	sp, sp, #16		//sp加一
	mov	x29, #0			
	mov	x30, #0
	b	start_kernel	//跳转到start_kernel
SYM_FUNC_END(__primary_switched)

__primary_switched主要执行了一下步骤:

  1. 初始化init_task的结构体和堆栈
  2. 设置异常向量表
  3. 将FDT地址保存到__fdt_pointer变量
  4. 将kimage的虚拟地址和物理地址的偏移保存到kimage_voffset中
  5. 清理bss段数据
  6. 跳转到start_kernel

到这里head.S的启动就看完了。

  • 1
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小坚学Linux

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值