目录
2.1.虚拟机指令翻译为中间码 - translator_loop()
2.1.1.生成固定的首条指令 - gen_tb_start()
2.1.2.插入一条空指令 - tricore_tr_insn_start()
2.1.3.指令翻译为中间码 - tricore_tr_translate_insn()
2.1.3.1.指令查找 - decode_32Bit_opc()
2.1.3.2.指令生成 - gen_compute_branch()
2.1.4.停止翻译 - tricore_tr_tb_stop()
2.1.5.插入一条 TB 块退出指令 - gen_tb_end()
2.2.中间码翻译为宿主机指令 - tcg_gen_code()
4.1.1.计算地址偏移 - tcg_splitwx_diff
4.1.2.指令执行 - TCG 对应的 tcg_qemu_tb_exec
3.2.TCI 执行 - TCI 对应的 tcg_qemu_tb_exec
1.流程概览
cpu_exec_loop() 模拟 CPU 循环执行指令的流程,每条指令执行前,即每个 CPU Cycle 均会检查异常和中断请求
本篇以 TriCore 指令集的翻译及执行流程为例,关注以下三点:
-
指令翻译;
-
TB 块查找;
-
指令执行;
1.1.主循环 - cpu_exec_loop()
// accel/tcg/cpu-exec.c
static int __attribute__((noinline))
cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
{
...
while (!cpu_handle_exception(cpu, &ret)) { // 检查异常请求
...
while (!cpu_handle_interrupt(cpu, &last_tb)) { // 检查中断请求
...
cpu_get_tb_cpu_state(cpu_env(cpu), &pc, &cs_base, &flags); // 获取 env->PC
...
tb = tb_lookup(cpu, pc, cs_base, flags, cflags); // 查找对应的 TB 块
if (tb == NULL) { // 未找到对应 TB --> 生成新的 TB
...
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); // 生成 TB
...
}
...
cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit); // 运行 TB 块中的指令
指令翻译执行的大致流程如下图所示:
1.2.TriCore 指令翻译 OP
指令翻译过程中,需要读取并解析 ELF 中的指令,各平台需要定义相关的 OP,以便于将指令翻译为中间码 IR (Intermediate Representation),TriCore 架构的指令翻译 OP 如下:
// target/tricore/translate.c
static const TranslatorOps tricore_tr_ops = {
.init_disas_context = tricore_tr_init_disas_context,
.tb_start = tricore_tr_tb_start,
.insn_start = tricore_tr_insn_start,
.translate_insn = tricore_tr_translate_insn,
.tb_stop = tricore_tr_tb_stop,
};
2.指令翻译 - tb_gen_code()
以下面的 TriCore 用户程序生成的汇编指令 JGE 为例:
该指令为 JGE,其地址 (PC 指针的值) 为 80001386,具体含义为:判断 D[2] 寄存器中的值是否大于等于 1,如果是则跳转至 80001316 处执行
QEMU 翻译虚拟机指令的方法为 tb_gen_code(),其为指令分配 TB 块,并将其翻译为宿主机指令
// CallStack
cpu_exec_loop()
|--> tb_gen_code()
-------------------------------------------------------------------------------------------------------
// accel/tcg/translate-all.c
TranslationBlock *tb_gen_code(CPUState *cpu, vaddr pc, uint64_t cs_base, uint32_t flags, int cflags)
{
...
phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); // phys_pc = 0x601386
...
tb = tcg_tb_alloc(tcg_ctx); // 生成一个空的TB,其中的值全为0 --> tb = (TranslationBlock *) 0x7fffb015b700
...
gen_code_buf = tcg_ctx->code_gen_ptr; // 这里初始的buffer值全部为空 --> "0"
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); // 0x7fff7015b7c0 <code_gen_buffer+1423251> 该内存目前为空
...
tb->pc = pc; // tb->pc = 0x80001386
tb->cs_base = cs_base; // tb->cs_base = 0x0
tb->flags = flags; // tb->flags = 0x0
tb->cflags = cflags; // tb->cflags = 0xff000201
tb_set_page_addr0(tb, phys_pc);
|--> tb->page_addr[0] = addr; // tb->page_addr[0] = 0x601386
tb_set_page_addr1(tb, -1);
|--> tb->page_addr[1] = addr; // tb->page_addr[1] = 0xffffffffffffffff
// tb->page_addr = {0x601386, 0xffffffffffffffff}
...
tcg_ctx->gen_tb = tb; // tcg_ctx->gen_tb = (TranslationBlock *) 0x7fffb015b700
tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64; // tcg_ctx->addr_type = TCG_TYPE_I32
...
gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); // ... tb=0x7fffb015b700, pc=0x80001386 ...
|--> gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
|--> translator_loop(cs, tb, max_insns, pc, host_pc, &tricore_tr_ops, &ctx.base); // 生成中间码!
|--> return tcg_gen_code(tcg_ctx, tb, pc);
2.1.虚拟机指令翻译为中间码 - translator_loop()
// CallStack
tb_gen_code()
|--> setjmp_gen_code()
|--> gen_intermediate_code()
|--> translator_loop()
-------------------------------------------------------------------------------------
// accel/tcg/translator.c
void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, vaddr pc,
void *host_pc, const TranslatorOps *ops, DisasContextBase *db)
{
...
/* Initialize DisasContext */
db->tb = tb; // db->tb = (TranslationBlock *) 0x7fffb015b700
db->pc_first = pc; // db->pc_first = 0x80001386
db->pc_next = pc; // db->pc_next = 0x80001386
db->is_jmp = DISAS_NEXT;
db->num_insns = 0;
db->max_insns = *max_insns; // db->max_insns = 0x1
db->singlestep_enabled = cflags & CF_SINGLE_STEP; // db->singlestep_enabled = 0x0
...
db->host_addr[0] = host_pc; // db->host_addr[0] = (void *) 0x7ffff4e01386
db->host_addr[1] = NULL;
ops->init_disas_context(db, cpu); // 注意!此时 DisasContext ctx 中的的 pc_succ_insn 和 opcode 尚未设置
|--> tricore_tr_init_disas_context() // target/tricore/translate.c
|--> DisasContext *ctx = container_of(dcbase, DisasContext, base);
|--> ctx->mem_idx = cpu_mmu_index(env, false); // ctx->mem_idx = 0x0
|--> ctx->priv = FIELD_EX32(tb_flags, TB_FLAGS, PRIV); // ctx->priv = 0x2
|--> ctx->features = env->features; // ctx->features = 0x1f
...
/* Start translating. */
icount_start_insn = gen_tb_start(db, cflags);
ops->tb_start(db, cpu); // Tricore 该 op 为空,不执行任何操作
...
while (true) {
*max_insns = ++db->num_insns; // *max_insns = 0x1
ops->insn_start(db, cpu);
|--> tricore_tr_insn_start() // 生成一条空指令,该条空指令可以看到当前虚拟机当前正在翻译的指令
...
ops->translate_insn(db, cpu);
|--> tricore_tr_translate_insn() // 生成中间码
...
}
ops->tb_stop(db, cpu);
|--> tricore_tr_tb_stop()
gen_tb_end(tb, cflags, icount_start_insn, db->num_insns); // 生成 TCG 终章
2.1.1.生成固定的首条指令 - gen_tb_start()
生成 TCG 序言,包含两条指令:
-
tcg_gen_ld_i32: INDEX_op_ld_i32 - 从 ArchCPU 中读取 icount_decr 至 count,icount_decr 为 -1 时,强制停止 TCG 执行与该 CPU 链接的 TB,并返回顶层循环;
-
tcg_gen_brcondi_i32:INDEX_op_brcond_i32 - 当 count 小于 0 时跳转至 tcg_ctx->exitreq_label;
// CallStack
gen_intermediate_code()
|--> translator_loop()
|--> gen_tb_start()
--------------------------------------------------------------------------------------------------
// accel/tcg/translator.c
static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags)
{
TCGv_i32 count = NULL;
TCGOp *icount_start_insn = NULL; // TCGOp 保存该条指令的操作码与操作数
if ((cflags & CF_USE_ICOUNT) || !(cflags & CF_NOIRQ)) { // cflags = 0xff000201
count = tcg_temp_new_i32(); // 在 tcg_ctx 中分配一个 TCGv_i32 类型的变量 count
tcg_gen_ld_i32(count, tcg_env, offsetof(ArchCPU, parent_obj.neg.icount_decr.u32) - offsetof(ArchCPU, env));
|--> tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); // ret = count = 0xc10, arg2 = tcg_env = 0x2a8, offset = 0xfffffffffffffff0
|--> tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset); // 生成带 3 个参数的中间码
|--> TCGOp *op = tcg_emit_op(opc, 3); // 创建中间指令
|--> TCGOp *op = tcg_op_alloc(opc, nargs); // opc = INDEX_op_ld_i32
|--> QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); // tcg_gen_ld_i32() 等一系列 tcg_gen_ 方法生成的指令会直接存放在 tcg_ctx 的 ops 链表中
|--> op->args[0] = a1; // op->args[0] = 0x7fff58001780
|--> op->args[1] = a2; // op->args[1] = 0x7fff58000e18
|--> op->args[2] = a3; // op->args[2] = 0xfffffffffffffff0
...
} else {
tcg_ctx->exitreq_label = gen_new_label();
|--> TCGContext *s = tcg_ctx;
|--> TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
|--> memset(l, 0, sizeof(TCGLabel));
|--> l->id = s->nb_labels++;
tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label); // cond=TCG_COND_LT (值为2), arg1=0xc10, arg2=0x0, l=0x7fff5800cd00
|--> tcg_gen_brcond_i32(cond, arg1, tcg_constant_i32(arg2), l); // cond=TCG_COND_LT, arg1=0xc10, arg2=0xc48, l=0x7fff5800cd00
|--> tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l)); // opc=INDEX_op_brcond_i32, a1=0xc10, a2=0xc48, a3=0x2, a4=0x7fff5800cd00
|--> tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
|--> TCGOp *op = tcg_emit_op(opc, 4); // opc = INDEX_op_brcond_i32
|--> op->args[0] = a1; // op->args[0] = 0x7fff58001780
|--> op->args[1] = a2; // op->args[1] = 0x7fff580017b8
|--> op->args[2] = a3; // op->args[2] = 0x2
|--> op->args[3] = a4; // op->args[3] = 0x7fff5800cd00
|--> add_last_as_label_use(l);
|--> TCGLabelUse *u = tcg_malloc(sizeof(TCGLabelUse));
|--> u->op = tcg_last_op();
|--> QSIMPLEQ_INSERT_TAIL(&l->branches, u, next);
2.1.2.插入一条空指令 - tricore_tr_insn_start()
生成一条空指令,INDEX_op_insn_start 的标志位为 TCG_OPF_NOT_PRESENT,宿主机不会执行该指令
// CallStack
gen_intermediate_code()
|--> translator_loop()
|--> gen_tb_start()
|--> ops->insn_start()
|--> tricore_tr_insn_start()
--------------------------------------------------------------------------------------------------
// target/tricore/translate.c
static void tricore_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
tcg_gen_insn_start(ctx->base.pc_next);
|--> TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 64 / TCG_TARGET_REG_BITS); // 这里 TCG_TARGET_REG_BITS = 64,Tricore 并未定义该宏,理论上应该为 32 ?
|--> TCGOp *op = tcg_op_alloc(opc, nargs); // opc=INDEX_op_insn_start, nargs=0x1
|--> QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
|--> tcg_set_insn_start_param(op, 0, pc);
|--> tcg_set_insn_param(op, arg, v);
|--> op->args[arg] = v; // arg=0x0, v=0x80001386
2.1.3.指令翻译为中间码 - tricore_tr_translate_insn()
获取 ctx->opcode,以用户程序的 80001386 这条指令为例:
对应的 ctx->opcode = 0x7fc812ff,其二进制表示为:
0111 1111 1100 1000 // 7F C8
0001 0010 1111 1111 // 12 FF
参照 TriCore 指令集手册,这些位的定义如下:
-
ff:指令代码,第 0~8 位;
-
12:第 8~11 位表示源寄存器 s1 编号,这里为 d2 寄存器 (0x2),第 12~15 位 const4 为 0x1(0001),共同构成00010010,即 0x12;
-
7fc8:disp15;
完整的指令表示,当 d2 寄存器中的值大于等于 1 时跳转,跳转地址为 PC = PC + sign_ext(disp15) * 2
// CallStack
gen_intermediate_code()
|--> translator_loop()
|--> gen_tb_start()
|--> ops->insn_start()
|--> tricore_tr_insn_start()
|--> ops->translate_insn()
|--> tricore_tr_translate_insn()
--------------------------------------------------------------------------------------------------
// target/tricore/translate.c
static void tricore_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
...
insn_lo = translator_lduw(env, &ctx->base, ctx->base.pc_next); // env=0x555557504d90, db=0x7fff605ff540, pc=0x80001386
|--> void *p = translator_access(env, db, pc, sizeof(ret));
|--> host = db->host_addr[0]; // host = (void *) 0x7ffff4e01386
|--> base = db->pc_first; // base = 0x80001386
|--> return host + (pc - base); // pc = 0x80001386,返回值为 0x7ffff4e01386
|--> plugin_insn_append(pc, p, sizeof(ret));
|--> struct qemu_plugin_insn *insn = tcg_ctx->plugin_insn; // insn 为 NULL,接下来会直接返回
|--> return lduw_p(p); // 返回值为 0x12ff,即 insn_lo = 0x12ff
|--> lduw_le_p(p)
|--> return (uint16_t)le_bswap(lduw_he_p(ptr), 16);
...
uint32_t insn_hi = translator_lduw(env, &ctx->base, ctx->base.pc_next + 2); // insn_hi = 0x7fc8
ctx->opcode = insn_hi << 16 | insn_lo; // ctx->opcode = 0x7fc812ff
ctx->pc_succ_insn = ctx->base.pc_next + 4; // ctx->pc_succ_insn = 0x8000138a,对于 Tricore 用户程序,该地址 0x80001386 的下一条指令(0x80001386 执行后未跳转或跳转位置的指令执行结束,返回后的下一条指令)
decode_32Bit_opc(ctx);
...
ctx->base.pc_next = ctx->pc_succ_insn;
2.1.3.1.指令查找 - decode_32Bit_opc()
解析 OP Code 并查找对应的指令
// CallStack
|--> translator_loop()
|--> gen_tb_start()
|--> ops->insn_start()
|--> tricore_tr_insn_start()
|--> ops->translate_insn()
|--> tricore_tr_translate_insn()
|--> decode_32Bit_opc()
-----------------------------------------------------------------------------------------------------
// target/tricore/tricore-opcodes.h
#define MASK_BITS_SHIFT(op, start, end) (extract32(op, (start), (end) - (start) + 1))
#define MASK_OP_MAJOR(op) MASK_BITS_SHIFT(op, 0, 7)
// 宏展开:
MASK_OP_MAJOR(op) = extract32(op, 0, 8) // 获取 32-Bit 输入的低 8 位
// target/tricore/tricore-opcodes.h
#define MASK_BITS_SHIFT_SEXT(op, start, end) (sextract32(op, (start), (end) - (start) + 1))
#define MASK_OP_BRC_CONST4_SEXT(op) MASK_BITS_SHIFT_SEXT(op, 12, 15)
// 宏展开:
MASK_OP_BRC_CONST4_SEXT(op) = sextract32(op, 12, 4) // 获取 32-Bit 输入的第 12~15 位
// target/tricore/tricore-opcodes.h
#define MASK_OP_BRC_S1(op) MASK_BITS_SHIFT(op, 8, 11)
// 宏展开:
MASK_OP_BRC_S1(op) = extract32(op, 8, 11) // 获取 32-Bit 输入的第 8~11 位
-----------------------------------------------------------------------------------------------------
// target/tricore/translate.c
static void decode_32Bit_opc(DisasContext *ctx)
{
...
op1 = MASK_OP_MAJOR(ctx->opcode); // op1 = 0xff,opcode 的低 8 位为 OP1, ctx->opcode = 0x7fc812ff
...
switch (op1) {
...
/* BRC Format */
case OPCM_32_BRC_EQ_NEQ:
case OPCM_32_BRC_GE:
case OPCM_32_BRC_JLT:
case OPCM_32_BRC_JNE:
const4 = MASK_OP_BRC_CONST4_SEXT(ctx->opcode); // const4 = 0x1
address = MASK_OP_BRC_DISP15_SEXT(ctx->opcode); // address = 0xffffffc8
r1 = MASK_OP_BRC_S1(ctx->opcode); // r1 = 0x2
gen_compute_branch(ctx, op1, r1, 0, const4, address);
2.1.3.2.指令生成 - gen_compute_branch()
TCG 会对虚拟机寄存器重新映射,其对应关系为:
cpu_gpr_a = {0x318, 0x350, 0x388, 0x3c0, 0x3f8, 0x430, 0x468, 0x4a0, 0x4d8, 0x510, 0x548, 0x580, 0x5b8, 0x5f0, 0x628, 0x660}
cpu_gpr_d = {0x698, 0x6d0, 0x708, 0x740, 0x778, 0x7b0, 0x7e8, 0x820, 0x858, 0x890, 0x8c8, 0x900, 0x938, 0x970, 0x9a8, 0x9e0}
这里生成条件跳转的流程如下:
-
新建跳转表 Label;
-
设置跳转条件:tcg_gen_brcond_tl(TCG_COND_XXX, arg1, arg2, label) --> if (arg1 <condition> arg2) goto label
-
下一条指令;
-
设置跳转表;
-
跳转分支;
// CallStack
|--> translator_loop()
|--> gen_tb_start()
|--> ops->insn_start()
|--> tricore_tr_insn_start()
|--> ops->translate_insn()
|--> tricore_tr_translate_insn()
|--> decode_32Bit_opc()
|--> gen_compute_branch()
|--> gen_branch_condi()
|--> gen_branch_cond()
-------------------------------------------------------------------------------------------------------------------------------------------------
// include/exec/helper-proto.h.inc
#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
dh_ctype(ret) HELPER(name) (dh_ctype(t1)) DEF_HELPER_ATTR;
// accel/tcg/tcg-runtime.h
DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, cptr, env)
// 宏展开:
const void *helper_lookup_tb_ptr(CPUArchState *env) __attribute__((noinline)); // 实际调用:const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
-------------------------------------------------------------------------------------------------------------------------------------------------
// target/tricore/translate.c
static void gen_compute_branch(DisasContext *ctx, uint32_t opc, int r1,
int r2 , int32_t constant , int32_t offset) // ctx=0x7fff605ff540, opc=0xff, r1=0x2, r2=0x0, constant=0x1, offset=0xffffffc8
{
...
switch (opc) {
...
case OPCM_32_BRC_GE:
if (MASK_OP_BRC_OP2(ctx->opcode) == OP2_32_BRC_JGE) {
gen_branch_condi(ctx, TCG_COND_GE, cpu_gpr_d[r1], constant, offset); // ctx=0x7fff605ff540, cond=TCG_COND_GE, r1=0x708 (d2), r2=0x1, address=0xffc8
|--> TCGv temp = tcg_constant_i32(r2); // temp = (TCGv) 0xc80
|--> gen_branch_cond(ctx, cond, r1, temp, address); // ctx=0x7fff605ff540, cond=TCG_COND_GE, r1=0x708, r2=0xc80, address=0xffc8
-------------------------------------------------------------------------------------------------------------------------------------------------
// target/tricore/translate.c
static inline void gen_branch_cond(DisasContext *ctx, TCGCond cond, TCGv r1, TCGv r2, int16_t address)
{
target_ulong target_address = 0;
TCGLabel *jumpLabel = gen_new_label();
tcg_gen_brcond_tl(cond, r1, r2, jumpLabel); // 跳转条件
|--> tcg_gen_brcond_i32() // cond=TCG_COND_GE, arg1=0x708, arg2=0xc80, l=0x7fff5800ce08
|--> tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l)); // opc=INDEX_op_brcond_i32, a1=0x708, a2=0xc80, a3=0x3, a4=0x7fff5800ce08
|--> tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
|--> TCGOp *op = tcg_emit_op(opc, 4); // opc = INDEX_op_brcond_i32
|--> op->args[0] = a1; // op->args[0] = 0x7fff58001278
|--> op->args[1] = a2; // op->args[1] = 0x7fff580017f0
|--> op->args[2] = a3; // op->args[2] = 0x3
|--> op->args[3] = a4; // op->args[3] = 0x7fff5800ce08
|--> add_last_as_label_use(l);
|--> TCGLabelUse *u = tcg_malloc(sizeof(TCGLabelUse));
|--> u->op = tcg_last_op(); // u->op.opc = INDEX_op_brcond_i32
|--> QSIMPLEQ_INSERT_TAIL(&l->branches, u, next);
gen_goto_tb(ctx, 1, ctx->pc_succ_insn); // ctx->pc_succ_insn = 0x8000138a
|--> gen_save_pc(dest);
|--> tcg_gen_movi_tl(cpu_PC, pc); // 将 pc 的值赋给 cpu_PC
|--> tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
|--> tcg_gen_lookup_and_goto_ptr();
|--> ptr = tcg_temp_ebb_new_ptr();
|--> gen_helper_lookup_tb_ptr(ptr, tcg_env);
|--> tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
|--> tcg_temp_free_ptr(ptr);
|--> ctx->base.is_jmp = DISAS_NORETURN;
gen_set_label(jumpLabel); // 跳转标记
|--> l->present = 1;
|--> tcg_gen_op1(INDEX_op_set_label, label_arg(l));
gen_goto_tb(ctx, 0, ctx->base.pc_next + address * 2); // ctx->base.pc_next + address * 2 = 0x80001316
|--> gen_save_pc(dest);
|--> tcg_gen_movi_tl(cpu_PC, pc);
|--> tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
|--> tcg_gen_lookup_and_goto_ptr();
2.1.4.停止翻译 - tricore_tr_tb_stop()
// CallStack
|--> translator_loop()
|--> gen_tb_start()
|--> ops->insn_start()
|--> tricore_tr_insn_start()
|--> ops->translate_insn()
|--> tricore_tr_translate_insn()
|--> ops->tb_stop()
|--> tricore_tr_tb_stop()
-------------------------------------------------------------------------------------------------------------------------------------------------
// target/tricore/translate.c
static void tricore_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
switch (ctx->base.is_jmp) { // ctx->base.is_jmp = DISAS_NORETURN
case DISAS_TOO_MANY:
gen_goto_tb(ctx, 0, ctx->base.pc_next);
break;
case DISAS_EXIT_UPDATE:
gen_save_pc(ctx->base.pc_next);
/* fall through */
case DISAS_EXIT:
tcg_gen_exit_tb(NULL, 0);
break;
case DISAS_JUMP:
tcg_gen_lookup_and_goto_ptr();
break;
case DISAS_NORETURN:
break;
default:
g_assert_not_reached();
}
}
2.1.5.插入一条 TB 块退出指令 - gen_tb_end()
生成一条退出当前 TB 的指令
// CallStack
|--> translator_loop()
|--> gen_tb_start()
|--> ops->insn_start()
|--> ops->translate_insn()
|--> ops->tb_stop()
|--> gen_tb_end()
-------------------------------------------------------------------------------------------------------------------------------------------------
// accel/tcg/translator.c
static void gen_tb_end(const TranslationBlock *tb, uint32_t cflags,
TCGOp *icount_start_insn, int num_insns)
{
...
if (tcg_ctx->exitreq_label) {
gen_set_label(tcg_ctx->exitreq_label);
|--> l->present = 1;
|--> tcg_gen_op1(INDEX_op_set_label, label_arg(l));
tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED);
|--> uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx; // idx = 0x3, val = 0x7fffb0142683
|--> rw += tcg_splitwx_diff; // rw = 0x7fffb0142680, tcg_splitwx_diff = 0x0
|--> tcg_gen_op1i(INDEX_op_exit_tb, val); // val = 0x7fffb0142683
|--> tcg_gen_op1(opc, a1);
2.2.中间码翻译为宿主机指令 - tcg_gen_code()
// CallStack
cpu_exec_loop()
|--> tb_gen_code()
|--> setjmp_gen_code()
|--> gen_intermediate_code()
|--> tcg_gen_code()
-------------------------------------------------------------------------------------------------------
// tcg/tcg.c
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
{
int i, start_words, num_insns;
TCGOp *op;
...
tcg_optimize(s); // 指令优化
reachable_code_pass(s); // 删除无法使用的指令
liveness_pass_0(s); // 指令声明周期调整
liveness_pass_1(s);
...
/* Initialize goto_tb jump offsets. */
tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
tcg_reg_alloc_start(s); // 为 TCG 临时变量分配内存
/*
* Reset the buffer pointers when restarting after overflow.
* TODO: Move this into translate-all.c with the rest of the
* buffer management. Having only this done here is confusing.
*/
s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
s->code_ptr = s->code_buf;
s->data_gen_ptr = NULL;
...
start_words = s->insn_start_words;
s->gen_insn_data =
tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
tcg_out_tb_start(s);
num_insns = -1;
QTAILQ_FOREACH(op, &s->ops, link) { // 将中间码翻译为宿主机指令
TCGOpcode opc = op->opc;
switch (opc) {
case INDEX_op_mov_i32:
case INDEX_op_mov_i64:
case INDEX_op_mov_vec:
...
}
...
s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
/* Generate TB finalization at the end of block */
#ifdef TCG_TARGET_NEED_LDST_LABELS
i = tcg_out_ldst_finalize(s);
if (i < 0) {
return i;
}
#endif
#ifdef TCG_TARGET_NEED_POOL_LABELS
i = tcg_out_pool_finalize(s);
if (i < 0) {
return i;
}
#endif
if (!tcg_resolve_relocs(s)) {
return -2;
}
...
return tcg_current_code_size(s); // 中间码翻译为宿主机指令结束
}
3.TB 查找 - tb_lookup()
检查 TB 块是否已经翻译过,如果是则直接调用缓存中已翻译的 TB 块,避免重复翻译,提升运行速度
// accel/tcg/cpu-exec.c
static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc, uint64_t cs_base,
uint32_t flags, uint32_t cflags)
{
...
hash = tb_jmp_cache_hash_func(pc);
jc = cpu->tb_jmp_cache;
if (cflags & CF_PCREL) {
...
} else {
/* Use rcu_read to ensure current load of pc from *tb. */
tb = qatomic_rcu_read(&jc->array[hash].tb);
if (likely(tb &&
tb->pc == pc &&
tb->cs_base == cs_base &&
tb->flags == flags &&
tb_cflags(tb) == cflags)) {
return tb;
}
tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
...
/* Use the pc value already stored in tb->pc. */
qatomic_set(&jc->array[hash].tb, tb);
}
return tb;
}
-------------------------------------------------------------------------
// accel/tcg/cpu-exec.c
static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
uint64_t cs_base, uint32_t flags,
uint32_t cflags)
{
tb_page_addr_t phys_pc;
struct tb_desc desc;
uint32_t h;
desc.env = cpu_env(cpu);
desc.cs_base = cs_base;
desc.flags = flags;
desc.cflags = cflags;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
...
desc.page_addr0 = phys_pc;
h = tb_hash_func(phys_pc, pc,
flags, cs_base, cflags);
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
}
4.指令执行 - tcg_qemu_tb_exec()
4.1.TCG 执行
4.1.1.计算地址偏移 - tcg_splitwx_diff
buf_rx 和 buf_rw 这两块 Buffer 在 TCG 加速器创建时分配,用于存储翻译后的代码
这两块 Buffer 使用 mmap(... MAP_SHARED ...) 映射至同一共享内存 mfd,因此 buf_rx 和 buf_rw 中的内容保持一致
tcg_init() // tcg/tcg.c
|--> tcg_context_init(max_cpus);
|--> tcg_region_init(tb_size, splitwx, max_cpus); // tcg/region.c
|--> have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal); // tcg/region.c
|--> prot = alloc_code_gen_buffer_splitwx(size, errp); // tcg/region.c
|--> return alloc_code_gen_buffer_splitwx_memfd(size, errp);
------------------------------------------------------------------------------------------------------
// tcg/region.c
static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
{
void *buf_rw = NULL, *buf_rx = MAP_FAILED;
int fd = -1;
buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
|--> int mfd = qemu_memfd_create(name, size, false, 0, seals, NULL);
|--> mfd = memfd_create(name, flags);
|--> ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); // buf_rw = ptr,权限为 PROT_READ、PROT_WRITE,大小 size = 0x40000000(1GB)
|--> *fd = mfd;
...
buf_rx = mmap(NULL, size, host_prot_read_exec(), MAP_SHARED, fd, 0); // 使用 MAP_SHARED,当前进程对其进行修改,其他内存会同步看见
|--> host_prot_read_exec() --> return PROT_READ | PROT_EXEC;
...
tcg_splitwx_diff = buf_rx - buf_rw;
4.1.2.指令执行 - TCG 对应的 tcg_qemu_tb_exec
tcg_qemu_tb_exec() 是一个函数指针,在 tcg_prologue_init() 中将其赋值为 tcg_splitwx_to_rx()
tcg_splitwx_to_rx() 将传入的 Buffer 指针加上 tcg_splitwx_diff,从而将该指针由指向可读可写的内存 buf_rw,变为指向可读可执行的内存 buf_rx
由于内存获得了可执行权限 x,因此系统会自动执行该指针指向内存中的内容(从新指针指向的位置处开始执行)
cpu_exec_loop() // accel/tcg/cpu-exec.c
|--> cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit) // accel/tcg/cpu-exec.c
|--> cpu_tb_exec(cpu, tb, tb_exit) // accel/tcg/cpu-exec.c
|--> const void *tb_ptr = itb->tc.ptr;
|--> ret = tcg_qemu_tb_exec(env, tb_ptr);
------------------------------------------------------------------------------------------------------
// tcg/tcg.c
tcg_prologue_fn *tcg_qemu_tb_exec;
------------------------------------------------------------------------------------------------------
// include/tcg/tcg.h
typedef uintptr_t tcg_prologue_fn(CPUArchState *env, const void *tb_ptr); // tcg_prologue_fn 实质上是一个指针
extern tcg_prologue_fn *tcg_qemu_tb_exec;
------------------------------------------------------------------------------------------------------
// tcg/tcg.c
void tcg_prologue_init(void)
{
TCGContext *s = tcg_ctx;
...
s->code_ptr = s->code_gen_ptr; // s->code_ptr = 0x7fffb0000000
s->code_buf = s->code_gen_ptr;
s->data_gen_ptr = NULL;
tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
------------------------------------------------------------------------------------------------------
// tcg/region.c
const void *tcg_splitwx_to_rx(void *rw)
{
/* Pass NULL pointers unchanged. */
if (rw) {
g_assert(in_code_gen_buffer(rw));
rw += tcg_splitwx_diff; // tcg_splitwx_diff = 0xffffffffc0000000
}
return rw; // rw = 0x7fff70000000
}
3.2.TCI 执行 - TCI 对应的 tcg_qemu_tb_exec
编译时选择 --enable-tcg-interpreter 会打开 TCG 解释器,此时 tcg_qemu_tb_exec 调用如下函数:
// tcg/tci.c
uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
const void *v_tb_ptr)
{
const uint32_t *tb_ptr = v_tb_ptr; // 0x7fffb0142740
...
for (;;) {
...
insn = *tb_ptr++;
opc = extract32(insn, 0, 8);
switch (opc) {
...
下面为 80001386 这条指令,经过翻译得到的 TB 块的执行流程
insn = 0xfff0e40d
opc = INDEX_op_ld_i32
...
case INDEX_op_ld_i32:
CASE_64(ld32u)
tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R4 = 4,r1 = TCG_REG_R14 = 14,ofs = 0xfffffff0
ptr = (void *)(regs[r1] + ofs);
regs[r0] = *(uint32_t *)ptr;
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x5c2
opc = INDEX_op_tci_movi
...
case INDEX_op_tci_movi:
tci_args_ri(insn, &r0, &t1); // r0 = TCG_REG_R5, t1 = 0x0
regs[r0] = t1;
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x254d06
opc = INDEX_op_setcond_i32
...
case INDEX_op_setcond_i32:
tci_args_rrrc(insn, &r0, &r1, &r2, &condition); // r0 = TCG_REG_R13, r1 = TCG_REG_R4, r2 = TCG_REG_R5, condition = TCG_COND_LT
regs[r0] = tci_compare32(regs[r1], regs[r2], condition);
|--> int32_t i0 = u0; // i0 = 0x0
|--> int32_t i1 = u1; // i1 = 0x0
|--> result = (i0 < i1); // result = 0x0
|--> return result;
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x3cd26
opc = INDEX_op_brcond_i32
...
case INDEX_op_brcond_i32:
tci_args_rl(insn, tb_ptr, &r0, &ptr); // tb_ptr = 0x7fffb0142750, r0 = TCG_REG_R13, ptr = 0x7fffb014278c
if ((uint32_t)regs[r0]) {
tb_ptr = ptr;
}
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x14c2
opc = INDEX_op_tci_movi
...
case INDEX_op_tci_movi:
tci_args_ri(insn, &r0, &t1); // r0 = TCG_REG_R4, t1 = 0x1
regs[r0] = t1;
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0xfff4e40e
opc = INDEX_op_st8_i32
...
CASE_32_64(st8)
tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R4, r1 = TCG_REG_R14, ofs = 0xfffffff4
ptr = (void *)(regs[r1] + ofs); // regs[14] = 0x5555574ec430, env = 0x5555574ec430, ptr = 0x5555574ec424,
*(uint8_t *)ptr = regs[r0];
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x48e50d
opc = INDEX_op_ld_i32
...
CASE_64(ld32u)
tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R5, r1 = TCG_REG_R14, ofs = 0x48
ptr = (void *)(regs[r1] + ofs); // regs[r1] = 0x5555574ec430, env = 0x5555574ec430,regs[r1] + ofs = &env->gpr_d[2] = 0x5555574ec478
regs[r0] = *(uint32_t *)ptr; // regs[r0] = 0x5, 即从d[2]寄存器中读取数据 --> 0x5
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x345d06
opc = INDEX_op_setcond_i32
...
case INDEX_op_setcond_i32:
tci_args_rrrc(insn, &r0, &r1, &r2, &condition); // r0 = TCG_REG_R13, r1 = TCG_REG_R5, r2 = TCG_REG_R4, condition = TCG_COND_GE
regs[r0] = tci_compare32(regs[r1], regs[r2], condition); // regs[r1] = 0x5, regs[r2] = 0x1, condition = TCG_COND_GE --> regs[r0] = 0x1
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x14d26
opc = INDEX_op_brcond_i32
...
case INDEX_op_brcond_i32:
tci_args_rl(insn, tb_ptr, &r0, &ptr); // tb_ptr = 0x7fffb0142764, r0 = TCG_REG_R13, ptr = 0x7fffb0142778
if ((uint32_t)regs[r0]) { // regs[r0] = 0x1
tb_ptr = ptr;
}
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x2c4c3
opc = INDEX_op_tci_movl
...
case INDEX_op_tci_movl:
tci_args_rl(insn, tb_ptr, &r0, &ptr); // tb_ptr = 0x7fffb014277c, r0 = TCG_REG_R4, ptr = 0x7fffb01427a8
regs[r0] = *(tcg_target_ulong *)ptr; // regs[r0] = 0xffffffff80001316
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x9ce410
opc = INDEX_op_st_i32
...
CASE_64(st32)
tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R4, r1 = TCG_REG_R14, ofs = 0x9c
ptr = (void *)(regs[r1] + ofs); // regs[r1] = env = 0x5555574ec430, regs[r1] + ofs = &env->PC = 0x5555574ec4cc
*(uint32_t *)ptr = regs[r0]; // regs[r0] = 0xffffffff80001316, *(uint32_t *)ptr = 0x80001316
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0xfe4d
opc = INDEX_op_st_i64
...
case INDEX_op_st_i64:
tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R14, r1 = TCG_REG_R15, ofs = 0x0
ptr = (void *)(regs[r1] + ofs); // regs[r1] = 0x7fffa05ff150, ofs = 0x0
*(uint64_t *)ptr = regs[r0]; // regs[r0] = env = 0x5555574ec430, ptr = 0x7fffa05ff150
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x8202
opc = INDEX_op_call
...
case INDEX_op_call:
{
...
tci_args_nl(insn, tb_ptr, &len, &ptr); // tb_ptr = 0x7fffb0142788, len = 0x2, ptr = 0x7fffb0142790
func = ((void **)ptr)[0]; // func = (void *) 0x5555557ea6dc <helper_lookup_tb_ptr>
cif = ((void **)ptr)[1]; // cif = (ffi_cif *) 0x7fff980163d0
n = cif->nargs; // n = 0x1
for (i = s = 0; i < n; ++i) {
ffi_type *t = cif->arg_types[i];
call_slots[i] = &stack[s];
s += DIV_ROUND_UP(t->size, 8); // s = 0x1
}
/* Helper functions may need to access the "return address" */
tci_tb_ptr = (uintptr_t)tb_ptr; // tb_ptr = 0x7fffb0142788, tci_tb_ptr = 0x7fffb0142788
ffi_call(cif, func, stack, call_slots);
}
switch (len) {
...
case 2: /* uint64_t */
memcpy(®s[TCG_REG_R0], stack, 8);
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x84
opc = INDEX_op_goto_ptr
...
case INDEX_op_goto_ptr:
tci_args_r(insn, &r0); // r0 = TCG_REG_R0
ptr = (void *)regs[r0]; // regs[TCG_REG_R0] = 0x7fffb0140200, ptr = 0xfff0e40d
...
tb_ptr = ptr; // tb_ptr = 0x7fffb0140200
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0xfff0e40d
opc = INDEX_op_ld_i32
...
CASE_64(ld32u)
tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R4, r1 = TCG_REG_R14, ofs = 0xfffffff0
ptr = (void *)(regs[r1] + ofs); // regs[r1] = env = 0x5555574ec430, ptr = 0x5555574ec420
regs[r0] = *(uint32_t *)ptr; // regs[r0]: 0xffffffff80001316 --> 0x0
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x5c2
opc = INDEX_op_tci_movi
...
case INDEX_op_tci_movi:
tci_args_ri(insn, &r0, &t1); // r0 = TCG_REG_R5, t1 = 0x0
regs[r0] = t1; // regs[r0] = 0x0
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x254d06
opc = INDEX_op_setcond_i32
...
case INDEX_op_setcond_i32:
tci_args_rrrc(insn, &r0, &r1, &r2, &condition); // r0 = TCG_REG_R13, r1 = TCG_REG_R4, r2 = TCG_REG_R5, condition = TCG_COND_LT
regs[r0] = tci_compare32(regs[r1], regs[r2], condition); // regs[r1] = 0x0, regs[r2] = 0x0, regs[r0] = 0x0
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x34d26
opc = INDEX_op_brcond_i32
...
case INDEX_op_brcond_i32:
tci_args_rl(insn, tb_ptr, &r0, &ptr); // tb_ptr = 0x7fffb0140210, r0 = TCG_REG_R13, ptr = 0x7fffb0140244
if ((uint32_t)regs[r0]) { // regs[r0] = 0x0
tb_ptr = ptr;
}
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0x14c2
opc = INDEX_op_tci_movi
...
case INDEX_op_tci_movi:
tci_args_ri(insn, &r0, &t1); // r0 = TCG_REG_R4, t1 = 0x1
regs[r0] = t1; // regs[r0] = 0x1
break;
--------------------------------------------------------------------------------------------------------------------
insn = 0xfff4e40e
opc = INDEX_op_st8_i32
...
CASE_32_64(st8)
tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R4, r1 = TCG_REG_R14, ofs = 0xfffffff4
ptr = (void *)(regs[r1] + ofs); // regs[r1] = env = 0x5555574ec430, regs[r1] + ofs = 0x5555574ec424
*(uint8_t *)ptr = regs[r0]; // regs[r0] = 0x1
break;
--------------------------------------------------------------------------------------------------------------------
...