OPTEE学习笔记 - 启动流程（一）

最新推荐文章于 2025-04-08 10:37:41 发布

奥兰杜

最新推荐文章于 2025-04-08 10:37:41 发布

阅读量6.3k

点赞数 7

文章标签：安全

本文链接：https://blog.csdn.net/orlando19860122/article/details/111880726

版权

根据最近的学习心得，准备记录一下optee的启动流程分析。在第一部分里，我着重介绍BL31启动optee(BL32)的过程，以及主核从核的启动流程。下一部分，我着重介绍optee的启动流程。

我所参考的代码是optee-3.11.0，ATF-2.4，主要关注aarch64平台的启动流程。

ATF启动流程

大部分aarch64平台都会使用ATF(arm trusted firmware)固件进行启动，optee的启动属于框架中的BL32部分，是由BL31启动。ATF的启动流程可参考下图：

上图可以看到，BL1加载和启动了BL2，BL2加载了BL31, BL32, BL33，加载完成后跳转到BL31中执行。BL31会初始化BL32和BL33。

参考https://wiki.loliot.net/docs/linux/linux-uboot/embedded-linux-boot-process

BL31启动BL32

BL31启动BL32的代码在bl31_entrypoint.S中

func bl31_entrypoint
	/* ---------------------------------------------------------------
	 * Stash the previous bootloader arguments x0 - x3 for later use.
	 * ---------------------------------------------------------------
	 */
	mov	x20, x0
	mov	x21, x1
	mov	x22, x2
	mov	x23, x3

#if !RESET_TO_BL31
... /* 忽略 */
#else
	/* ---------------------------------------------------------------------
	 * For RESET_TO_BL31 systems which have a programmable reset address,
	 * bl31_entrypoint() is executed only on the cold boot path so we can
	 * skip the warm boot mailbox mechanism.
	 * ---------------------------------------------------------------------
	 */
	el3_entrypoint_common					\
		_init_sctlr=1					\
		_warm_boot_mailbox=!PROGRAMMABLE_RESET_ADDRESS	\
		_secondary_cold_boot=!COLD_BOOT_SINGLE_CPU	\
		_init_memory=1					\
		_init_c_runtime=1				\
		_exception_vectors=runtime_exceptions		\
		_pie_fixup_size=BL31_LIMIT - BL31_BASE

	/* ---------------------------------------------------------------------
	 * For RESET_TO_BL31 systems, BL31 is the first bootloader to run so
	 * there's no argument to relay from a previous bootloader. Zero the
	 * arguments passed to the platform layer to reflect that.
	 * ---------------------------------------------------------------------
	 */
	mov	x20, 0
	mov	x21, 0
	mov	x22, 0
	mov	x23, 0
#endif /* RESET_TO_BL31 */

	/* --------------------------------------------------------------------
	 * Perform BL31 setup
	 * --------------------------------------------------------------------
	 */
	mov	x0, x20
	mov	x1, x21
	mov	x2, x22
	mov	x3, x23
	bl	bl31_setup

#if ENABLE_PAUTH
... /* 忽略 */
#endif /* ENABLE_PAUTH */

	/* --------------------------------------------------------------------
	 * Jump to main function
	 * --------------------------------------------------------------------
	 */
	bl	bl31_main

	/* --------------------------------------------------------------------
	 * Clean the .data & .bss sections to main memory. This ensures
	 * that any global data which was initialised by the primary CPU
	 * is visible to secondary CPUs before they enable their data
	 * caches and participate in coherency.
	 * --------------------------------------------------------------------
	 */
... /* 忽略 */

	b	el3_exit
endfunc bl31_entrypoint

又看到了熟悉的el3_entrypoint_common，具体的介绍可以参考前面的文章（BL1启动），在这个宏的定义中，做了以下事情需要我们关注：

设置EL3中断向量表，BL31中设置的中断向量表是runtime_exceptions
如果从核，要跳转到plat_secondary_cold_boot_setup继续启动
初始化一些必要的寄存器

bl31_setup做了平台相关的设置，不在启动流程的参考范围内，bl31_main函数涉及到optee的启动，需要说明一下。

void bl31_main(void)
{
	NOTICE("BL31: %s\n", version_string);
	NOTICE("BL31: %s\n", build_message);

#ifdef SUPPORT_UNKNOWN_MPID
	if (unsupported_mpid_flag == 0) {
		NOTICE("Unsupported MPID detected!\n");
	}
#endif

	/* Perform platform setup in BL31 */
	bl31_platform_setup();

	/* Initialise helper libraries */
	bl31_lib_init();

#if EL3_EXCEPTION_HANDLING
	INFO("BL31: Initialising Exception Handling Framework\n");
	ehf_init();
#endif

	/* Initialize the runtime services e.g. psci. */
	INFO("BL31: Initializing runtime services\n");
	runtime_svc_init(); /* 初始化runtime service */

	/*
	 * All the cold boot actions on the primary cpu are done. We now need to
	 * decide which is the next image (BL32 or BL33) and how to execute it.
	 * If the SPD runtime service is present, it would want to pass control
	 * to BL32 first in S-EL1. In that case, SPD would have registered a
	 * function to initialize bl32 where it takes responsibility of entering
	 * S-EL1 and returning control back to bl31_main. Once this is done we
	 * can prepare entry into BL33 as normal.
	 */

	/*
	 * If SPD had registered an init hook, invoke it.
	 */
	if (bl32_init != NULL) { /* bl32_init函数指针在runtime_svc_init()中会被赋值 */
		INFO("BL31: Initializing BL32\n");

		int32_t rc = (*bl32_init)(); /* 执行这个函数即初始化了optee */

		if (rc == 0)
			WARN("BL31: BL32 initialization failed\n");
	}
	/*
	 * We are ready to enter the next EL. Prepare entry into the image
	 * corresponding to the desired security state after the next ERET.
	 */
	bl31_prepare_next_image_entry(); /* 准备BL33的image entry */

	console_flush();

	/*
	 * Perform any platform specific runtime setup prior to cold boot exit
	 * from BL31
	 */
	bl31_plat_runtime_setup();
}

runtime_svc_init()函数初始化了一系列runtime service。这些runtime service是通过DECLARE_RT_SVC定义在rt_svc_descs section中的。在ATF中目前看到opteed_fast，opteed_std，arm_arch_svc，std_svc等service。在初始化opteed_fast service时，就对bl32_init进行了赋值，指向opteed_init()函数。

static int32_t opteed_init(void)
{
	uint32_t linear_id = plat_my_core_pos();
	optee_context_t *optee_ctx = &opteed_sp_context[linear_id];
	entry_point_info_t *optee_entry_point;
	uint64_t rc;

	/*
	 * Get information about the OPTEE (BL32) image. Its
	 * absence is a critical failure.
	 */
	optee_entry_point = bl31_plat_get_next_image_ep_info(SECURE); /* 获取bl32固件入口函数地址。同时配置spsr：EL3返回到EL1，使用sp_el3栈 */
	assert(optee_entry_point);

	cm_init_my_context(optee_entry_point);  /* 把入口函数地址设到cpu context */

	/*
	 * Arrange for an entry into OPTEE. It will be returned via
	 * OPTEE_ENTRY_DONE case
	 */
	rc = opteed_synchronous_sp_entry(optee_ctx);
	assert(rc != 0);

	return rc;
}

在opteed_init函数中调用了cm_init_my_context，cm_init_my_context内容不多，又调用了cm_setup_context，我们需要关注一下这个函数，它确定了EL3返回时进入哪一个EL，以及返回地址：

void cm_setup_context(cpu_context_t *ctx, const entry_point_info_t *ep)
{
	unsigned int security_state;
	u_register_t scr_el3;
	el3_state_t *state;
	gp_regs_t *gp_regs;
	u_register_t sctlr_elx, actlr_elx;

	assert(ctx != NULL);

	security_state = GET_SECURITY_STATE(ep->h.attr);

	/* Clear any residual register values from the context */
	zeromem(ctx, sizeof(*ctx));

	/*
	 * SCR_EL3 was initialised during reset sequence in macro
	 * el3_arch_init_common. This code modifies the SCR_EL3 fields that
	 * affect the next EL.
	 *
	 * The following fields are initially set to zero and then updated to
	 * the required value depending on the state of the SPSR_EL3 and the
	 * Security state and entrypoint attributes of the next EL.
	 */
	... /* 设置 SCR_EL3 */

	/*
	 * Initialise SCTLR_EL1 to the reset value corresponding to the target
	 * execution state setting all fields rather than relying of the hw.
	 * Some fields have architecturally UNKNOWN reset values and these are
	 * set to zero.
	 *
	 * SCTLR.EE: Endianness is taken from the entrypoint attributes.
	 *
	 * SCTLR.M, SCTLR.C and SCTLR.I: These fields must be zero (as
	 *  required by PSCI specification)
	 */
	... /* 设置 SCTLR_EL1 */

	/* Enable WFE trap delay in SCR_EL3 if supported and configured */
	... /* 忽略 */

	/*
	 * Store the initialised SCTLR_EL1 value in the cpu_context - SCTLR_EL2
	 * and other EL2 registers are set up by cm_prepare_ns_entry() as they
	 * are not part of the stored cpu_context.
	 */
	write_ctx_reg(get_el1_sysregs_ctx(ctx), CTX_SCTLR_EL1, sctlr_elx); /* 把前面设置的sctlr_elx的值写入寄存器 */

	/*
	 * Base the context ACTLR_EL1 on the current value, as it is
	 * implementation defined. The context restore process will write
	 * the value from the context to the actual register and can cause
	 * problems for processor cores that don't expect certain bits to
	 * be zero.
	 */
	actlr_elx = read_actlr_el1();
	write_ctx_reg((get_el1_sysregs_ctx(ctx)), (CTX_ACTLR_EL1), (actlr_elx));

	/*
	 * Populate EL3 state so that we've the right context
	 * before doing ERET
	 */
	state = get_el3state_ctx(ctx);
	write_ctx_reg(state, CTX_SCR_EL3, scr_el3); /* 把前面设置的scr_el3的值写入寄存器 */
	write_ctx_reg(state, CTX_ELR_EL3, ep->pc); /* 把image entry的值写入elr_el3，EL3返回会跳转到entry */
	write_ctx_reg(state, CTX_SPSR_EL3, ep->spsr); /* 把spsr的值写入寄存器，EL3返回到EL1，使用sp_el3栈 */

	/*
	 * Store the X0-X7 value from the entrypoint into the context
	 * Use memcpy as we are in control of the layout of the structures
	 */
	gp_regs = get_gpregs_ctx(ctx);
	memcpy(gp_regs, (void *)&ep->args, sizeof(aapcs64_params_t));
}

cm_setup_context函数把EL3和EL1相关的寄存器值都保存在了一个context中，这个context有EL3寄存器的值也有EL1寄存器的值。

opteed_synchronous_sp_entry(optee_context_t *optee_ctx)函数定义如下：

uint64_t opteed_synchronous_sp_entry(optee_context_t *optee_ctx)
{
	uint64_t rc;

	assert(optee_ctx != NULL);
	assert(optee_ctx->c_rt_ctx == 0);

	/* Apply the Secure EL1 system register context and switch to it */
	assert(cm_get_context(SECURE) == &optee_ctx->cpu_ctx);
	cm_el1_sysregs_context_restore(SECURE);
	cm_set_next_eret_context(SECURE);

	rc = opteed_enter_sp(&optee_ctx->c_rt_ctx);
#if ENABLE_ASSERTIONS
	optee_ctx->c_rt_ctx = 0;
#endif

	return rc;
}

cm_el1_sysregs_context_restore函数根据context变量里的各个寄存器值对相应的寄存器赋值，cm_set_next_eret_context函数设置下一个EL的context，其实就是把context的地址保存在栈顶供下一个EL使用

opteed_enter_sp函数定义如下：

func opteed_enter_sp
	/* Make space for the registers that we're going to save */
	mov	x3, sp
	str	x3, [x0, #0] /* x0是&optee_ctx->c_rt_ctx，即optee_ctx->c_rt_ctx = sp */
	sub	sp, sp, #OPTEED_C_RT_CTX_SIZE

	/* Save callee-saved registers on to the stack */
	stp	x19, x20, [sp, #OPTEED_C_RT_CTX_X19]
	stp	x21, x22, [sp, #OPTEED_C_RT_CTX_X21]
	stp	x23, x24, [sp, #OPTEED_C_RT_CTX_X23]
	stp	x25, x26, [sp, #OPTEED_C_RT_CTX_X25]
	stp	x27, x28, [sp, #OPTEED_C_RT_CTX_X27]
	stp	x29, x30, [sp, #OPTEED_C_RT_CTX_X29] /* 保存了lr(x30)寄存器，返回地址在opteed_synchronous_sp_entry函数中 */

	/* ---------------------------------------------
	 * Everything is setup now. el3_exit() will
	 * use the secure context to restore to the
	 * general purpose and EL3 system registers to
	 * ERET into OPTEE.
	 * ---------------------------------------------
	 */
	b	el3_exit /* 前面的流程中设置了elr_el3和spsr_el3，即为optee的_start函数。退出el3就进入el1， 并开始运行optee */
endfunc opteed_enter_sp

OPTEE的_start

 FUNC _start , :
	mov	x19, x0		/* Save pagable part address */
#if defined(CFG_DT_ADDR)
	ldr     x20, =CFG_DT_ADDR
#else
	mov	x20, x2		/* Save DT address */
#endif

	adr	x0, reset_vect_table
	msr	vbar_el1, x0
	isb

	set_sctlr_el1
	isb

#ifdef CFG_WITH_PAGER
... /* 忽略 */
#else
	/*
	 * The binary is built as:
	 * [Core, rodata and data] : In correct location
	 * [struct boot_embdata + data] : Should be moved to __end, first
	 * uint32_t tells the length of the struct + data
	 */
	adr_l	x0, __end		/* dst */
	adr_l	x1, __data_end		/* src */
	ldr	w2, [x1]		/* struct boot_embdata::total_len */
	/* Copy backwards (as memmove) in case we're overlapping */
	add	x0, x0, x2
	add	x1, x1, x2
	adr	x3, cached_mem_end
	str	x0, [x3]
	adr_l	x2, __end

copy_init:
	ldp	x3, x4, [x1, #-16]!
	stp	x3, x4, [x0, #-16]!
	cmp	x0, x2
	b.gt	copy_init
#endif

	/*
	 * Clear .bss, this code obviously depends on the linker keeping
	 * start/end of .bss at least 8 byte aligned.
	 */
	adr_l	x0, __bss_start
	adr_l	x1, __bss_end
clear_bss:
	str	xzr, [x0], #8
	cmp	x0, x1
	b.lt	clear_bss

#ifdef CFG_VIRTUALIZATION
... /* 忽略 */
#endif

	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
	set_sp

	bl	thread_init_thread_core_local

	/* Enable aborts now that we can receive exceptions */
	msr	daifclr, #DAIFBIT_ABT

	/*
	 * Invalidate dcache for all memory used during initialization to
	 * avoid nasty surprices when the cache is turned on. We must not
	 * invalidate memory not used by OP-TEE since we may invalidate
	 * entries used by for instance ARM Trusted Firmware.
	 */
	adr_l	x0, __text_start
	ldr	x1, cached_mem_end
	sub	x1, x1, x0
	bl	dcache_cleaninv_range

	/* Enable Console */
	bl	console_init

#ifdef CFG_CORE_ASLR
... /* 忽略 */
#else
	mov	x0, #0
#endif

	adr	x1, boot_mmu_config
	bl	core_init_mmu_map

#ifdef CFG_CORE_ASLR
... /* 忽略 */
#endif

	bl	__get_core_pos
	bl	enable_mmu
#ifdef CFG_CORE_ASLR
... /* 忽略 */
#endif

	mov	x0, x19		/* pagable part address */
	mov	x1, #-1
	mov	x2, x20		/* DT address */
	bl	boot_init_primary

	/*
	 * In case we've touched memory that secondary CPUs will use before
	 * they have turned on their D-cache, clean and invalidate the
	 * D-cache before exiting to normal world.
	 */
	adr_l	x0, __text_start
	ldr	x1, cached_mem_end
	sub	x1, x1, x0
	bl	dcache_cleaninv_range


	/*
	 * Clear current thread id now to allow the thread to be reused on
	 * next entry. Matches the thread_init_boot_thread in
	 * boot.c.
	 */
#ifndef CFG_VIRTUALIZATION
	bl 	thread_clr_boot_thread
#endif

#ifdef CFG_CORE_FFA
... /* 忽略 */
#else
	/*
	 * Pass the vector address returned from main_init
	 * Compensate for the load offset since cpu_on_handler() is
	 * called with MMU off.
	 */
	ldr	x0, boot_mmu_config + CORE_MMU_CONFIG_LOAD_OFFSET
	adr	x1, thread_vector_table
	sub	x1, x1, x0 /* 第二参数是thread_vector_table */
	mov	x0, #TEESMC_OPTEED_RETURN_ENTRY_DONE /* 第一个参数是smc_fid */
	smc	#0
	b	.	/* SMC should not return */
#endif
END_FUNC _start
DECLARE_KEEP_INIT _start

_start函数在一系列设置后又通过smc进入了EL3，由于EL3已经设置了中断向量为runtime_exceptions，所以会转入中断向量处理。由于是smc命令产生的中断，是同步中断，因此会调用handle_sync_exception

vector_entry sync_exception_aarch64
	/*
	 * This exception vector will be the entry point for SMCs and traps
	 * that are unhandled at lower ELs most commonly. SP_EL3 should point
	 * to a valid cpu context where the general purpose and system register
	 * state can be saved.
	 */
	apply_at_speculative_wa
	check_and_unmask_ea
	handle_sync_exception
end_vector_entry sync_exception_aarch64


	/* ---------------------------------------------------------------------
	 * This macro handles Synchronous exceptions.
	 * Only SMC exceptions are supported.
	 * ---------------------------------------------------------------------
	 */
	.macro	handle_sync_exception
#if ENABLE_RUNTIME_INSTRUMENTATION
.../* 忽略 */
#endif

	mrs	x30, esr_el3
	ubfx	x30, x30, #ESR_EC_SHIFT, #ESR_EC_LENGTH

	/* Handle SMC exceptions separately from other synchronous exceptions */
	cmp	x30, #EC_AARCH32_SMC
	b.eq	smc_handler32

	cmp	x30, #EC_AARCH64_SMC
	b.eq	smc_handler64 /* 转入smc_handler64 */

	/* Synchronous exceptions other than the above are assumed to be EA */
	ldr	x30, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
	b	enter_lower_el_sync_ea
	.endm

smc_handler64:
	/* NOTE: The code below must preserve x0-x4 */

	/*
	 * Save general purpose and ARMv8.3-PAuth registers (if enabled).
	 * If Secure Cycle Counter is not disabled in MDCR_EL3 when
	 * ARMv8.5-PMU is implemented, save PMCR_EL0 and disable Cycle Counter.
	 */
	bl	save_gp_pmcr_pauth_regs

#if ENABLE_PAUTH
... /* 忽略 */
#endif

	/*
	 * Populate the parameters for the SMC handler.
	 * We already have x0-x4 in place. x5 will point to a cookie (not used
	 * now). x6 will point to the context structure (SP_EL3) and x7 will
	 * contain flags we need to pass to the handler.
	 */
	mov	x5, xzr
	mov	x6, sp

	/*
	 * Restore the saved C runtime stack value which will become the new
	 * SP_EL0 i.e. EL3 runtime stack. It was saved in the 'cpu_context'
	 * structure prior to the last ERET from EL3.
	 */
	ldr	x12, [x6, #CTX_EL3STATE_OFFSET + CTX_RUNTIME_SP]

	/* Switch to SP_EL0 */
	msr	spsel, #MODE_SP_EL0

	/*
	 * Save the SPSR_EL3, ELR_EL3, & SCR_EL3 in case there is a world
	 * switch during SMC handling.
	 * TODO: Revisit if all system registers can be saved later.
	 */
	mrs	x16, spsr_el3
	mrs	x17, elr_el3
	mrs	x18, scr_el3
	stp	x16, x17, [x6, #CTX_EL3STATE_OFFSET + CTX_SPSR_EL3]
	str	x18, [x6, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]

	/* Copy SCR_EL3.NS bit to the flag to indicate caller's security */
	bfi	x7, x18, #0, #1

	mov	sp, x12

	/* Get the unique owning entity number */ /* x0保存的是smc_fid */
	ubfx	x16, x0, #FUNCID_OEN_SHIFT, #FUNCID_OEN_WIDTH
	ubfx	x15, x0, #FUNCID_TYPE_SHIFT, #FUNCID_TYPE_WIDTH
	orr	x16, x16, x15, lsl #FUNCID_OEN_WIDTH /* 经过运算找出需要调用的service，这里是opteed_fast */

	/* Load descriptor index from array of indices */
	adrp	x14, rt_svc_descs_indices
	add	x14, x14, :lo12:rt_svc_descs_indices
	ldrb	w15, [x14, x16]

	/* Any index greater than 127 is invalid. Check bit 7. */
	tbnz	w15, 7, smc_unknown

	/*
	 * Get the descriptor using the index
	 * x11 = (base + off), w15 = index
	 *
	 * handler = (base + off) + (index << log2(size))
	 */
	adr	x11, (__RT_SVC_DESCS_START__ + RT_SVC_DESC_HANDLE)
	lsl	w10, w15, #RT_SVC_SIZE_LOG2
	ldr	x15, [x11, w10, uxtw] /* 经过运算找出opteed_fast需要调用的handler，这里是opteed_smc_handler */

	/*
	 * Call the Secure Monitor Call handler and then drop directly into
	 * el3_exit() which will program any remaining architectural state
	 * prior to issuing the ERET to the desired lower EL.
	 */
#if DEBUG
	cbz	x15, rt_svc_fw_critical_error
#endif
	blr	x15

	b	el3_exit

x15是opteed_smc_handler的地址，即跳转到opteed_smc_handler。

static uintptr_t opteed_smc_handler(uint32_t smc_fid,
			 u_register_t x1,
			 u_register_t x2,
			 u_register_t x3,
			 u_register_t x4,
			 void *cookie,
			 void *handle,
			 u_register_t flags)
{
... /* 忽略 */

	/*
	 * Returning from OPTEE
	 */

	switch (smc_fid) {
	/*
	 * OPTEE has finished initialising itself after a cold boot
	 */
	case TEESMC_OPTEED_RETURN_ENTRY_DONE:
		/*
		 * Stash the OPTEE entry points information. This is done
		 * only once on the primary cpu
		 */
		assert(optee_vector_table == NULL);
		optee_vector_table = (optee_vectors_t *) x1;

		if (optee_vector_table) {
			set_optee_pstate(optee_ctx->state, OPTEE_PSTATE_ON);

			/*
			 * OPTEE has been successfully initialized.
			 * Register power management hooks with PSCI
			 */
			psci_register_spd_pm_hook(&opteed_pm);

			/*
			 * Register an interrupt handler for S-EL1 interrupts
			 * when generated during code executing in the
			 * non-secure state.
			 */
			flags = 0;
			set_interrupt_rm_flag(flags, NON_SECURE);
			rc = register_interrupt_type_handler(INTR_TYPE_S_EL1,
						opteed_sel1_interrupt_handler,
						flags);
			if (rc)
				panic();
		}

		/*
		 * OPTEE reports completion. The OPTEED must have initiated
		 * the original request through a synchronous entry into
		 * OPTEE. Jump back to the original C runtime context.
		 */
		opteed_synchronous_sp_exit(optee_ctx, x1);
		break;

... /* 忽略 */
}

opteed_smc_handler函数需要留意psci_register_spd_pm_hook(&opteed_pm)，向PSCI注册了一个opteed_pm，在PSCI启动从核的时候会通过这个callback进入从核的OPTEE。

opteed_smc_handler调用了opteed_synchronous_sp_exit。opteed_synchronous_sp_exit调用了opteed_exit_sp函数：

func opteed_exit_sp
	/* Restore the previous stack */
	mov	sp, x0

	/* Restore callee-saved registers on to the stack */
	ldp	x19, x20, [x0, #(OPTEED_C_RT_CTX_X19 - OPTEED_C_RT_CTX_SIZE)]
	ldp	x21, x22, [x0, #(OPTEED_C_RT_CTX_X21 - OPTEED_C_RT_CTX_SIZE)]
	ldp	x23, x24, [x0, #(OPTEED_C_RT_CTX_X23 - OPTEED_C_RT_CTX_SIZE)]
	ldp	x25, x26, [x0, #(OPTEED_C_RT_CTX_X25 - OPTEED_C_RT_CTX_SIZE)]
	ldp	x27, x28, [x0, #(OPTEED_C_RT_CTX_X27 - OPTEED_C_RT_CTX_SIZE)]
	ldp	x29, x30, [x0, #(OPTEED_C_RT_CTX_X29 - OPTEED_C_RT_CTX_SIZE)] /* 恢复了lr(x30)寄存器 */

	/* ---------------------------------------------
	 * This should take us back to the instruction
	 * after the call to the last opteed_enter_sp().
	 * Place the second parameter to x0 so that the
	 * caller will see it as a return value from the
	 * original entry call
	 * ---------------------------------------------
	 */
	mov	x0, x1 /* x1是thread_vector_table */
	ret
endfunc opteed_exit_sp

由于恢复了lr寄存器，因此不会再return回smc_handler64函数，而是opteed_synchronous_sp_entry调用opteed_enter_sp的地方。因此return了以后，就是在opteed_synchronous_sp_entry继续执行。

opteed_synchronous_sp_entry返回后回到opteed_init()，opteed_init（bl32_init）返回后回到bl31_main函数。

bl31_main会继续执行bl31_prepare_next_image_entry然后去启动BL33。这部分就不在本文讨论范围内了。

启动从核

后面准备描述一下ATF如何在从核启动OPTEE。从我阅读OPTEE源码来看，官方没有给出common的流程，我理解这部分需要厂商在port的时候根据平台实现关键函数。我这里以qemu的实现来做说明。

前面提到OPTEE中有一些service在启动的时候会被初始化，其中有提到一个service叫做std_svc，在这里需要拿出来说一下。这个函数的setup函数（即初始化函数）如下


/* Setup Standard Services */
static int32_t std_svc_setup(void)
{
	uintptr_t svc_arg;
	int ret = 0;

	svc_arg = get_arm_std_svc_args(PSCI_FID_MASK); /* 获取到了从核启动的地址 */
	assert(svc_arg);

	/*
	 * PSCI is one of the specifications implemented as a Standard Service.
	 * The `psci_setup()` also does EL3 architectural setup.
	 */
	if (psci_setup((const psci_lib_args_t *)svc_arg) != PSCI_E_SUCCESS) {
		ret = 1;
	}

... /* 忽略 */

	return ret;
}

其中有一个比较重要的函数是psci_setup，这个是函数初始化了很多关于PSCI的功能。是为了Linux通过PSCI启动从核做了准备。这里需要关注一个函数的调用get_arm_std_svc_args(PSCI_FID_MASK)。这个函数获取到了从核启动的地址。内容如下：

uintptr_t get_arm_std_svc_args(unsigned int svc_mask)
{
	/* Setup the arguments for PSCI Library */
	DEFINE_STATIC_PSCI_LIB_ARGS_V1(psci_args, bl31_warm_entrypoint); /* 从核启动地址是bl31_warm_entrypoint */

	/* PSCI is the only ARM Standard Service implemented */
	assert(svc_mask == PSCI_FID_MASK);

	return (uintptr_t)&psci_args;
}

然后 std_svc_setup把svc_arg做为参数传递给了psci_setup

int __init psci_setup(const psci_lib_args_t *lib_args)
{
... /* 忽略 */

	(void) plat_setup_psci_ops((uintptr_t)lib_args->mailbox_ep,
				   &psci_plat_pm_ops);
	assert(psci_plat_pm_ops != NULL);

... /* 忽略 */

	return 0;
}

在qemu平台上，plat_setup_psci_ops的实现如下：

int plat_setup_psci_ops(uintptr_t sec_entrypoint,
			const plat_psci_ops_t **psci_ops)
{
	uintptr_t *mailbox = (void *) PLAT_QEMU_TRUSTED_MAILBOX_BASE;

	*mailbox = sec_entrypoint;
	secure_entrypoint = (unsigned long) sec_entrypoint;
	*psci_ops = &plat_qemu_psci_pm_ops;

	return 0;
}

上面的函数做了2个事情，一个是初始化了maibox base地址上的值，就是从核的启动地址，就是bl31_warm_entrypoint函数，还有一个是初始化了psci_plat_pm_ops，是plat_qemu_psci_pm_ops

static const plat_psci_ops_t plat_qemu_psci_pm_ops = {
	.cpu_standby = qemu_cpu_standby,
	.pwr_domain_on = qemu_pwr_domain_on,
	.pwr_domain_off = qemu_pwr_domain_off,
	.pwr_domain_pwr_down_wfi = qemu_pwr_domain_pwr_down_wfi,
	.pwr_domain_suspend = qemu_pwr_domain_suspend,
	.pwr_domain_on_finish = qemu_pwr_domain_on_finish,
	.pwr_domain_suspend_finish = qemu_pwr_domain_suspend_finish,
	.system_off = qemu_system_off,
	.system_reset = qemu_system_reset,
	.validate_power_state = qemu_validate_power_state,
	.validate_ns_entrypoint = qemu_validate_ns_entrypoint
};

到此为止，psci的初始化就完成了，后面就等Linux通过PSCI启动从核了。

Linux通过PSCI启动从核可以参考https://blog.csdn.net/tiantao2012/article/details/72622103。根据文中描述，Linux启动从核，最后会通过SMC调用到BL31来启动从核。由前文分析可知，BL31的SMC中断向量是runtime_exceptions，Linux选择的处理用的service就是std_svc_service，处理函数是std_svc_smc_handler。std_svc_smc_handler调用了psci_smc_handler，在aarch64平台psci_smc_handler又调用了psci_cpu_on，psci_cpu_on又调用了psci_cpu_on_start，psci_cpu_on_start又通过psci_plat_pm_ops变量调用了他的pwr_domain_on函数，就是qemu_pwr_domain_on

static int qemu_pwr_domain_on(u_register_t mpidr)
{
	int rc = PSCI_E_SUCCESS;
	unsigned pos = plat_core_pos_by_mpidr(mpidr);
	uint64_t *hold_base = (uint64_t *)PLAT_QEMU_HOLD_BASE;

	hold_base[pos] = PLAT_QEMU_HOLD_STATE_GO;
	sev();

	return rc;
}

可以看到，qemu中调用了sev唤醒了从核，并且从核从bl31_warm_entrypoint函数启动了。

func bl31_warm_entrypoint
#if ENABLE_RUNTIME_INSTRUMENTATION

	/*
	 * This timestamp update happens with cache off.  The next
	 * timestamp collection will need to do cache maintenance prior
	 * to timestamp update.
	 */
	pmf_calc_timestamp_addr rt_instr_svc, RT_INSTR_EXIT_HW_LOW_PWR
	mrs	x1, cntpct_el0
	str	x1, [x0]
#endif

	/*
	 * On the warm boot path, most of the EL3 initialisations performed by
	 * 'el3_entrypoint_common' must be skipped:
	 *
	 *  - Only when the platform bypasses the BL1/BL31 entrypoint by
	 *    programming the reset address do we need to initialise SCTLR_EL3.
	 *    In other cases, we assume this has been taken care by the
	 *    entrypoint code.
	 *
	 *  - No need to determine the type of boot, we know it is a warm boot.
	 *
	 *  - Do not try to distinguish between primary and secondary CPUs, this
	 *    notion only exists for a cold boot.
	 *
	 *  - No need to initialise the memory or the C runtime environment,
	 *    it has been done once and for all on the cold boot path.
	 */
	el3_entrypoint_common					\
		_init_sctlr=PROGRAMMABLE_RESET_ADDRESS		\
		_warm_boot_mailbox=0				\
		_secondary_cold_boot=0				\
		_init_memory=0					\
		_init_c_runtime=0				\
		_exception_vectors=runtime_exceptions		\
		_pie_fixup_size=0

... /* 忽略 */

    bl	psci_warmboot_entrypoint

... /* 忽略 */

	b	el3_exit
endfunc bl31_warm_entrypoint

可以看到调用的还是el3_entrypoint_common，只不过_secondary_cold_boot设为了0，也就是从核按照主核的方式初始化一遍，并且不用再考虑从核的问题了。psci_warmboot_entrypoint函数会导致OPTEE的初始化，需要关注

void psci_warmboot_entrypoint(void)
{

... /* 忽略 */

	/*
	 * This CPU could be resuming from suspend or it could have just been
	 * turned on. To distinguish between these 2 cases, we examine the
	 * affinity state of the CPU:
	 *  - If the affinity state is ON_PENDING then it has just been
	 *    turned on.
	 *  - Else it is resuming from suspend.
	 *
	 * Depending on the type of warm reset identified, choose the right set
	 * of power management handler and perform the generic, architecture
	 * and platform specific handling.
	 */
	if (psci_get_aff_info_state() == AFF_STATE_ON_PENDING)
		psci_cpu_on_finish(cpu_idx, &state_info);
	else
		psci_cpu_suspend_finish(cpu_idx, &state_info);

... /* 忽略 */
}

从注释中可以看到，刚启动的从核，应该是走AFF_STATE_ON_PENDING分支，调用了psci_cpu_on_finish函数

void psci_cpu_on_finish(unsigned int cpu_idx, const psci_power_state_t *state_info)
{

... /* 忽略 */

	/*
	 * Call the cpu on finish handler registered by the Secure Payload
	 * Dispatcher to let it do any bookeeping. If the handler encounters an
	 * error, it's expected to assert within
	 */
	if ((psci_spd_pm != NULL) && (psci_spd_pm->svc_on_finish != NULL))
		psci_spd_pm->svc_on_finish(0);

... /* 忽略 */
}

在前面曾提到，主核的初始化中，定义了psci_spd_pm的值，是opteed_pm，这边就是调用了opteed_pm->svc_on_finish，其实就是opteed_cpu_on_finish_handler

static void opteed_cpu_on_finish_handler(u_register_t unused)
{
	int32_t rc = 0;
	uint32_t linear_id = plat_my_core_pos();
	optee_context_t *optee_ctx = &opteed_sp_context[linear_id];
	entry_point_info_t optee_on_entrypoint;

	assert(optee_vector_table);
	assert(get_optee_pstate(optee_ctx->state) == OPTEE_PSTATE_OFF);

	opteed_init_optee_ep_state(&optee_on_entrypoint, opteed_rw,
				(uint64_t)&optee_vector_table->cpu_on_entry,
				0, 0, 0, optee_ctx);

	/* Initialise this cpu's secure context */
	cm_init_my_context(&optee_on_entrypoint);

	/* Enter OPTEE */
	rc = opteed_synchronous_sp_entry(optee_ctx);

	/*
	 * Read the response from OPTEE. A non-zero return means that
	 * something went wrong while communicating with OPTEE.
	 */
	if (rc != 0)
		panic();

	/* Update its context to reflect the state OPTEE is in */
	set_optee_pstate(optee_ctx->state, OPTEE_PSTATE_ON);
}

可以发现，这个函数的实现和主核进入OPTEE的函数实现类似，并且注释中可以看到opteed_synchronous_sp_entry进入了OPTEE。后面的流程和主核相同。其中需要注意的是跳转到OPTEE的entrypoint不是_start了，而是optee_vector_table->cpu_on_entry。cpu_on_entry的指针指向的是vector_cpu_on_entry函数

LOCAL_FUNC vector_cpu_on_entry , : , .identity_map
	bl	cpu_on_handler
	mov	x1, x0
	ldr	x0, =TEESMC_OPTEED_RETURN_ON_DONE
	smc	#0
	b	.	/* SMC should not return */
END_FUNC vector_cpu_on_entry

主要是执行了cpu_on_handler函数，后面的逻辑是返回处理

FUNC cpu_on_handler , :
	mov	x19, x0
	mov	x20, x1
	mov	x21, x30

	adr	x0, reset_vect_table
	msr	vbar_el1, x0
	isb

	set_sctlr_el1
	isb

	/* Enable aborts now that we can receive exceptions */
	msr	daifclr, #DAIFBIT_ABT

	bl	__get_core_pos
	bl	enable_mmu

	/* Setup SP_EL0 and SP_EL1, SP will be set to SP_EL0 */
	set_sp

	mov	x0, x19
	mov	x1, x20
#ifdef CFG_CORE_FFA
	bl	boot_cpu_on_handler
	b	thread_ffa_msg_wait
#else
	mov	x30, x21
	b	boot_cpu_on_handler
#endif
END_FUNC cpu_on_handler
DECLARE_KEEP_PAGER cpu_on_handler

cpu_on_handler在做过和primary core类似的处理后跳转到boot_cpu_on_handler函数。boot_cpu_on_handler函数调用的是init_secondary_helper函数

static void init_secondary_helper(unsigned long nsec_entry)
{
	IMSG("Secondary CPU %zu initializing", get_core_pos());

	/*
	 * Mask asynchronous exceptions before switch to the thread vector
	 * as the thread handler requires those to be masked while
	 * executing with the temporary stack. The thread subsystem also
	 * asserts that the foreign interrupts are blocked when using most of
	 * its functions.
	 */
	thread_set_exceptions(THREAD_EXCP_ALL);

	secondary_init_cntfrq();
	thread_init_per_cpu();
	init_sec_mon(nsec_entry);
	main_secondary_init_gic();
	init_vfp_sec();
	init_vfp_nsec();

	IMSG("Secondary CPU %zu switching to normal world boot", get_core_pos());
}

init_secondary_helper调用结束后，OPTEE从核启动就结束了