[TCG] 02.指令翻译及执行流程

目录

1.流程概览

1.1.主循环 - cpu_exec_loop()

1.2.TriCore 指令翻译 OP

2.指令翻译 - tb_gen_code()

2.1.虚拟机指令翻译为中间码 - translator_loop()

2.1.1.生成固定的首条指令 - gen_tb_start()

2.1.2.插入一条空指令 - tricore_tr_insn_start()

2.1.3.指令翻译为中间码 - tricore_tr_translate_insn()

2.1.3.1.指令查找 - decode_32Bit_opc()

2.1.3.2.指令生成 - gen_compute_branch()

2.1.4.停止翻译 - tricore_tr_tb_stop()

2.1.5.插入一条 TB 块退出指令 - gen_tb_end()

2.2.中间码翻译为宿主机指令 - tcg_gen_code()

3.TB 查找 - tb_lookup()

4.指令执行 - tcg_qemu_tb_exec()

4.1.TCG 执行

4.1.1.计算地址偏移 - tcg_splitwx_diff

4.1.2.指令执行 - TCG 对应的 tcg_qemu_tb_exec

3.2.TCI 执行 - TCI 对应的 tcg_qemu_tb_exec


1.流程概览

cpu_exec_loop() 模拟 CPU 循环执行指令的流程,每条指令执行前,即每个 CPU Cycle 均会检查异常和中断请求

本篇以 TriCore 指令集的翻译及执行流程为例,关注以下三点:

  • 指令翻译;

  • TB 块查找;

  • 指令执行;

1.1.主循环 - cpu_exec_loop()

// accel/tcg/cpu-exec.c
static int __attribute__((noinline))
cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
{
    ...
    while (!cpu_handle_exception(cpu, &ret)) { // 检查异常请求
        ...
        while (!cpu_handle_interrupt(cpu, &last_tb)) { // 检查中断请求
            ...
            cpu_get_tb_cpu_state(cpu_env(cpu), &pc, &cs_base, &flags); // 获取 env->PC
            ...

            tb = tb_lookup(cpu, pc, cs_base, flags, cflags); // 查找对应的 TB 块

            if (tb == NULL) { // 未找到对应 TB --> 生成新的 TB
                ...
                tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); // 生成 TB
                ...
            }
            ...

            cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit); // 运行 TB 块中的指令

指令翻译执行的大致流程如下图所示:

1.2.TriCore 指令翻译 OP

指令翻译过程中,需要读取并解析 ELF 中的指令,各平台需要定义相关的 OP,以便于将指令翻译为中间码 IR (Intermediate Representation),TriCore 架构的指令翻译 OP 如下:

// target/tricore/translate.c
static const TranslatorOps tricore_tr_ops = {
    .init_disas_context = tricore_tr_init_disas_context,
    .tb_start           = tricore_tr_tb_start,
    .insn_start         = tricore_tr_insn_start,
    .translate_insn     = tricore_tr_translate_insn,
    .tb_stop            = tricore_tr_tb_stop,
};

2.指令翻译 - tb_gen_code()

以下面的 TriCore 用户程序生成的汇编指令 JGE 为例:

该指令为 JGE,其地址 (PC 指针的值) 为 80001386,具体含义为:判断 D[2] 寄存器中的值是否大于等于 1,如果是则跳转至 80001316 处执行

QEMU 翻译虚拟机指令的方法为 tb_gen_code(),其为指令分配 TB 块,并将其翻译为宿主机指令

// CallStack
cpu_exec_loop()
    |--> tb_gen_code()
-------------------------------------------------------------------------------------------------------

// accel/tcg/translate-all.c
TranslationBlock *tb_gen_code(CPUState *cpu, vaddr pc, uint64_t cs_base, uint32_t flags, int cflags)
{
    ...
    phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); // phys_pc = 0x601386
    ...
    tb = tcg_tb_alloc(tcg_ctx); // 生成一个空的TB,其中的值全为0 --> tb = (TranslationBlock *) 0x7fffb015b700
    ...
    gen_code_buf = tcg_ctx->code_gen_ptr; // 这里初始的buffer值全部为空 --> "0"
    tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); // 0x7fff7015b7c0 <code_gen_buffer+1423251> 该内存目前为空
    ...
    tb->pc = pc;           // tb->pc = 0x80001386
    tb->cs_base = cs_base; // tb->cs_base = 0x0
    tb->flags = flags;     // tb->flags = 0x0
    tb->cflags = cflags;   // tb->cflags = 0xff000201
    tb_set_page_addr0(tb, phys_pc);
        |--> tb->page_addr[0] = addr; // tb->page_addr[0] = 0x601386
    tb_set_page_addr1(tb, -1);
        |--> tb->page_addr[1] = addr; // tb->page_addr[1] = 0xffffffffffffffff
    // tb->page_addr = {0x601386, 0xffffffffffffffff}
    ...
    tcg_ctx->gen_tb = tb; // tcg_ctx->gen_tb = (TranslationBlock *) 0x7fffb015b700
    tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64; // tcg_ctx->addr_type = TCG_TYPE_I32
    ...
    gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); // ... tb=0x7fffb015b700, pc=0x80001386 ...
        |--> gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
            |--> translator_loop(cs, tb, max_insns, pc, host_pc, &tricore_tr_ops, &ctx.base); // 生成中间码!
        |--> return tcg_gen_code(tcg_ctx, tb, pc);

2.1.虚拟机指令翻译为中间码 - translator_loop()

// CallStack
tb_gen_code()
    |--> setjmp_gen_code()
        |--> gen_intermediate_code()
            |--> translator_loop()
-------------------------------------------------------------------------------------

// accel/tcg/translator.c
void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, vaddr pc, 
                    void *host_pc, const TranslatorOps *ops, DisasContextBase *db)
{
    ...
    /* Initialize DisasContext */
    db->tb = tb;       // db->tb = (TranslationBlock *) 0x7fffb015b700
    db->pc_first = pc; // db->pc_first = 0x80001386
    db->pc_next = pc;  // db->pc_next = 0x80001386
    db->is_jmp = DISAS_NEXT;
    db->num_insns = 0;
    db->max_insns = *max_insns; // db->max_insns = 0x1
    db->singlestep_enabled = cflags & CF_SINGLE_STEP; // db->singlestep_enabled = 0x0
    ...
    db->host_addr[0] = host_pc; // db->host_addr[0] = (void *) 0x7ffff4e01386
    db->host_addr[1] = NULL;
    
    ops->init_disas_context(db, cpu); // 注意!此时 DisasContext ctx 中的的 pc_succ_insn 和 opcode 尚未设置
        |--> tricore_tr_init_disas_context() // target/tricore/translate.c
            |--> DisasContext *ctx = container_of(dcbase, DisasContext, base);
            |--> ctx->mem_idx = cpu_mmu_index(env, false); // ctx->mem_idx = 0x0
            |--> ctx->priv = FIELD_EX32(tb_flags, TB_FLAGS, PRIV); // ctx->priv = 0x2
            |--> ctx->features = env->features; // ctx->features = 0x1f
    ...
    /* Start translating.  */
    icount_start_insn = gen_tb_start(db, cflags);
    ops->tb_start(db, cpu); // Tricore 该 op 为空,不执行任何操作
    ...

    while (true) {
        *max_insns = ++db->num_insns; // *max_insns = 0x1
        ops->insn_start(db, cpu);
            |--> tricore_tr_insn_start() // 生成一条空指令,该条空指令可以看到当前虚拟机当前正在翻译的指令
        ...
        
        ops->translate_insn(db, cpu);
            |--> tricore_tr_translate_insn() // 生成中间码
     ...
     }
     ops->tb_stop(db, cpu);
         |--> tricore_tr_tb_stop()
     
     gen_tb_end(tb, cflags, icount_start_insn, db->num_insns); // 生成 TCG 终章

2.1.1.生成固定的首条指令 - gen_tb_start()

生成 TCG 序言,包含两条指令:

  • tcg_gen_ld_i32: INDEX_op_ld_i32 - 从 ArchCPU 中读取 icount_decr 至 count,icount_decr 为 -1 时,强制停止 TCG 执行与该 CPU 链接的 TB,并返回顶层循环;

  • tcg_gen_brcondi_i32:INDEX_op_brcond_i32 - 当 count 小于 0 时跳转至 tcg_ctx->exitreq_label;

// CallStack
gen_intermediate_code()
    |--> translator_loop()
        |--> gen_tb_start()
--------------------------------------------------------------------------------------------------

// accel/tcg/translator.c
static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags)
{
    TCGv_i32 count = NULL;
    TCGOp *icount_start_insn = NULL; // TCGOp 保存该条指令的操作码与操作数

    if ((cflags & CF_USE_ICOUNT) || !(cflags & CF_NOIRQ)) { // cflags = 0xff000201
        count = tcg_temp_new_i32(); // 在 tcg_ctx 中分配一个 TCGv_i32 类型的变量 count

        tcg_gen_ld_i32(count, tcg_env, offsetof(ArchCPU, parent_obj.neg.icount_decr.u32) - offsetof(ArchCPU, env));
            |--> tcg_gen_ldst_op_i32(INDEX_op_ld_i32, ret, arg2, offset); // ret = count = 0xc10,  arg2 = tcg_env = 0x2a8, offset = 0xfffffffffffffff0
                |--> tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_ptr_arg(base), offset); // 生成带 3 个参数的中间码
                    |--> TCGOp *op = tcg_emit_op(opc, 3); // 创建中间指令
                        |--> TCGOp *op = tcg_op_alloc(opc, nargs); // opc = INDEX_op_ld_i32
                        |--> QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link); // tcg_gen_ld_i32() 等一系列 tcg_gen_ 方法生成的指令会直接存放在 tcg_ctx 的 ops 链表中
                    |--> op->args[0] = a1; // op->args[0] = 0x7fff58001780
                    |--> op->args[1] = a2; // op->args[1] = 0x7fff58000e18
                    |--> op->args[2] = a3; // op->args[2] = 0xfffffffffffffff0
    ...

    } else {
        tcg_ctx->exitreq_label = gen_new_label();
            |--> TCGContext *s = tcg_ctx;
            |--> TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
            |--> memset(l, 0, sizeof(TCGLabel));
            |--> l->id = s->nb_labels++;

        tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label); // cond=TCG_COND_LT (值为2), arg1=0xc10, arg2=0x0, l=0x7fff5800cd00
            |--> tcg_gen_brcond_i32(cond, arg1, tcg_constant_i32(arg2), l); // cond=TCG_COND_LT, arg1=0xc10, arg2=0xc48, l=0x7fff5800cd00
                |--> tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l)); // opc=INDEX_op_brcond_i32, a1=0xc10, a2=0xc48, a3=0x2, a4=0x7fff5800cd00
                    |--> tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
                        |--> TCGOp *op = tcg_emit_op(opc, 4); // opc = INDEX_op_brcond_i32
                        |--> op->args[0] = a1; // op->args[0] = 0x7fff58001780
                        |--> op->args[1] = a2; // op->args[1] = 0x7fff580017b8
                        |--> op->args[2] = a3; // op->args[2] = 0x2
                        |--> op->args[3] = a4; // op->args[3] = 0x7fff5800cd00
                |--> add_last_as_label_use(l);
                    |--> TCGLabelUse *u = tcg_malloc(sizeof(TCGLabelUse));
                    |--> u->op = tcg_last_op();
                    |--> QSIMPLEQ_INSERT_TAIL(&l->branches, u, next);

2.1.2.插入一条空指令 - tricore_tr_insn_start()

生成一条空指令,INDEX_op_insn_start 的标志位为 TCG_OPF_NOT_PRESENT,宿主机不会执行该指令

// CallStack
gen_intermediate_code()
    |--> translator_loop()
        |--> gen_tb_start()
        |--> ops->insn_start()
            |--> tricore_tr_insn_start()
--------------------------------------------------------------------------------------------------

// target/tricore/translate.c
static void tricore_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
{
    DisasContext *ctx = container_of(dcbase, DisasContext, base);

    tcg_gen_insn_start(ctx->base.pc_next);
        |--> TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 64 / TCG_TARGET_REG_BITS); // 这里 TCG_TARGET_REG_BITS = 64,Tricore 并未定义该宏,理论上应该为 32 ?
            |--> TCGOp *op = tcg_op_alloc(opc, nargs); // opc=INDEX_op_insn_start, nargs=0x1
            |--> QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
        |--> tcg_set_insn_start_param(op, 0, pc);
            |--> tcg_set_insn_param(op, arg, v);
                |--> op->args[arg] = v; // arg=0x0, v=0x80001386

2.1.3.指令翻译为中间码 - tricore_tr_translate_insn()

获取 ctx->opcode,以用户程序的 80001386 这条指令为例:

对应的 ctx->opcode = 0x7fc812ff,其二进制表示为:

0111 1111 1100 1000 // 7F C8

0001 0010 1111 1111 // 12 FF

参照 TriCore 指令集手册,这些位的定义如下:

  • ff:指令代码,第 0~8 位;

  • 12:第 8~11 位表示源寄存器 s1 编号,这里为 d2 寄存器 (0x2),第 12~15 位 const4 为 0x1(0001),共同构成00010010,即 0x12;

  • 7fc8:disp15;

完整的指令表示,当 d2 寄存器中的值大于等于 1 时跳转,跳转地址为 PC = PC + sign_ext(disp15) * 2

// CallStack
gen_intermediate_code()
    |--> translator_loop()
        |--> gen_tb_start()
        |--> ops->insn_start()
            |--> tricore_tr_insn_start()
        |--> ops->translate_insn()
            |--> tricore_tr_translate_insn()
--------------------------------------------------------------------------------------------------

// target/tricore/translate.c
static void tricore_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
{
    DisasContext *ctx = container_of(dcbase, DisasContext, base);
    ...
    
    insn_lo = translator_lduw(env, &ctx->base, ctx->base.pc_next); // env=0x555557504d90, db=0x7fff605ff540, pc=0x80001386
        |--> void *p = translator_access(env, db, pc, sizeof(ret));
            |--> host = db->host_addr[0]; // host = (void *) 0x7ffff4e01386
            |--> base = db->pc_first; // base = 0x80001386
            |--> return host + (pc - base); // pc = 0x80001386,返回值为 0x7ffff4e01386
        |--> plugin_insn_append(pc, p, sizeof(ret));
            |--> struct qemu_plugin_insn *insn = tcg_ctx->plugin_insn; // insn 为 NULL,接下来会直接返回 
        |--> return lduw_p(p); // 返回值为 0x12ff,即 insn_lo = 0x12ff
            |--> lduw_le_p(p)
                |--> return (uint16_t)le_bswap(lduw_he_p(ptr), 16);
    ...
        uint32_t insn_hi = translator_lduw(env, &ctx->base, ctx->base.pc_next + 2); // insn_hi = 0x7fc8
        ctx->opcode = insn_hi << 16 | insn_lo; // ctx->opcode = 0x7fc812ff
        ctx->pc_succ_insn = ctx->base.pc_next + 4; // ctx->pc_succ_insn = 0x8000138a,对于 Tricore 用户程序,该地址 0x80001386 的下一条指令(0x80001386 执行后未跳转或跳转位置的指令执行结束,返回后的下一条指令)
        decode_32Bit_opc(ctx);
    ...
    ctx->base.pc_next = ctx->pc_succ_insn;
2.1.3.1.指令查找 - decode_32Bit_opc()

解析 OP Code 并查找对应的指令

// CallStack
|--> translator_loop()
    |--> gen_tb_start()
    |--> ops->insn_start()
        |--> tricore_tr_insn_start()
    |--> ops->translate_insn()
        |--> tricore_tr_translate_insn()
            |--> decode_32Bit_opc()
-----------------------------------------------------------------------------------------------------

// target/tricore/tricore-opcodes.h
#define MASK_BITS_SHIFT(op, start, end) (extract32(op, (start), (end) - (start) + 1))
#define MASK_OP_MAJOR(op)      MASK_BITS_SHIFT(op, 0, 7)
// 宏展开:
MASK_OP_MAJOR(op) = extract32(op, 0, 8) // 获取 32-Bit 输入的低 8 位


// target/tricore/tricore-opcodes.h
#define MASK_BITS_SHIFT_SEXT(op, start, end) (sextract32(op, (start), (end) - (start) + 1))
#define MASK_OP_BRC_CONST4_SEXT(op) MASK_BITS_SHIFT_SEXT(op, 12, 15)
// 宏展开:
MASK_OP_BRC_CONST4_SEXT(op) = sextract32(op, 12, 4) // 获取 32-Bit 输入的第 12~15 位


// target/tricore/tricore-opcodes.h
#define MASK_OP_BRC_S1(op)     MASK_BITS_SHIFT(op, 8, 11)
// 宏展开:
MASK_OP_BRC_S1(op) = extract32(op, 8, 11) // 获取 32-Bit 输入的第 8~11 位
-----------------------------------------------------------------------------------------------------

// target/tricore/translate.c
static void decode_32Bit_opc(DisasContext *ctx)
{
    ...
    op1 = MASK_OP_MAJOR(ctx->opcode); // op1 = 0xff,opcode 的低 8 位为 OP1, ctx->opcode = 0x7fc812ff
    ...
    switch (op1) {
    ...
/* BRC Format */
    case OPCM_32_BRC_EQ_NEQ:
    case OPCM_32_BRC_GE:
    case OPCM_32_BRC_JLT:
    case OPCM_32_BRC_JNE:
        const4 = MASK_OP_BRC_CONST4_SEXT(ctx->opcode);  // const4 = 0x1
        address = MASK_OP_BRC_DISP15_SEXT(ctx->opcode); // address = 0xffffffc8
        r1 = MASK_OP_BRC_S1(ctx->opcode); // r1 = 0x2
        gen_compute_branch(ctx, op1, r1, 0, const4, address);
2.1.3.2.指令生成 - gen_compute_branch()

TCG 会对虚拟机寄存器重新映射,其对应关系为:

cpu_gpr_a = {0x318, 0x350, 0x388, 0x3c0, 0x3f8, 0x430, 0x468, 0x4a0, 0x4d8, 0x510, 0x548, 0x580, 0x5b8, 0x5f0, 0x628, 0x660}

cpu_gpr_d = {0x698, 0x6d0, 0x708, 0x740, 0x778, 0x7b0, 0x7e8, 0x820, 0x858, 0x890, 0x8c8, 0x900, 0x938, 0x970, 0x9a8, 0x9e0}

这里生成条件跳转的流程如下:

  • 新建跳转表 Label;

  • 设置跳转条件:tcg_gen_brcond_tl(TCG_COND_XXX, arg1, arg2, label) --> if (arg1 <condition> arg2) goto label

  • 下一条指令;

  • 设置跳转表;

  • 跳转分支;

// CallStack
|--> translator_loop()
    |--> gen_tb_start()
    |--> ops->insn_start()
        |--> tricore_tr_insn_start()
    |--> ops->translate_insn()
        |--> tricore_tr_translate_insn()
            |--> decode_32Bit_opc()
                |--> gen_compute_branch()
                    |--> gen_branch_condi()
                        |--> gen_branch_cond()
-------------------------------------------------------------------------------------------------------------------------------------------------

// include/exec/helper-proto.h.inc
#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) \
dh_ctype(ret) HELPER(name) (dh_ctype(t1)) DEF_HELPER_ATTR;

// accel/tcg/tcg-runtime.h
DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, cptr, env)
// 宏展开:
const void *helper_lookup_tb_ptr(CPUArchState *env) __attribute__((noinline)); // 实际调用:const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
-------------------------------------------------------------------------------------------------------------------------------------------------

// target/tricore/translate.c
static void gen_compute_branch(DisasContext *ctx, uint32_t opc, int r1,
                               int r2 , int32_t constant , int32_t offset) // ctx=0x7fff605ff540, opc=0xff, r1=0x2, r2=0x0, constant=0x1, offset=0xffffffc8
{
    ...
    switch (opc) {
    ...
    case OPCM_32_BRC_GE:
         if (MASK_OP_BRC_OP2(ctx->opcode) == OP2_32_BRC_JGE) {
            gen_branch_condi(ctx, TCG_COND_GE, cpu_gpr_d[r1], constant, offset); // ctx=0x7fff605ff540, cond=TCG_COND_GE, r1=0x708 (d2), r2=0x1, address=0xffc8
                |--> TCGv temp = tcg_constant_i32(r2); // temp = (TCGv) 0xc80
                |--> gen_branch_cond(ctx, cond, r1, temp, address); // ctx=0x7fff605ff540, cond=TCG_COND_GE, r1=0x708, r2=0xc80, address=0xffc8
-------------------------------------------------------------------------------------------------------------------------------------------------

// target/tricore/translate.c
static inline void gen_branch_cond(DisasContext *ctx, TCGCond cond, TCGv r1, TCGv r2, int16_t address)
{
    target_ulong target_address = 0;
    TCGLabel *jumpLabel = gen_new_label();

    tcg_gen_brcond_tl(cond, r1, r2, jumpLabel); // 跳转条件
        |--> tcg_gen_brcond_i32() // cond=TCG_COND_GE, arg1=0x708, arg2=0xc80, l=0x7fff5800ce08
            |--> tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l)); // opc=INDEX_op_brcond_i32, a1=0x708, a2=0xc80, a3=0x3, a4=0x7fff5800ce08
                |--> tcg_gen_op4(opc, tcgv_i32_arg(a1), tcgv_i32_arg(a2), a3, a4);
                    |--> TCGOp *op = tcg_emit_op(opc, 4); // opc = INDEX_op_brcond_i32
                    |--> op->args[0] = a1; // op->args[0] = 0x7fff58001278
                    |--> op->args[1] = a2; // op->args[1] = 0x7fff580017f0
                    |--> op->args[2] = a3; // op->args[2] = 0x3
                    |--> op->args[3] = a4; // op->args[3] = 0x7fff5800ce08
            |--> add_last_as_label_use(l);
                |--> TCGLabelUse *u = tcg_malloc(sizeof(TCGLabelUse));
                |--> u->op = tcg_last_op(); // u->op.opc = INDEX_op_brcond_i32
                |--> QSIMPLEQ_INSERT_TAIL(&l->branches, u, next);

    gen_goto_tb(ctx, 1, ctx->pc_succ_insn); // ctx->pc_succ_insn = 0x8000138a
        |--> gen_save_pc(dest);
            |--> tcg_gen_movi_tl(cpu_PC, pc); // 将 pc 的值赋给 cpu_PC
                |--> tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
        |--> tcg_gen_lookup_and_goto_ptr();
            |--> ptr = tcg_temp_ebb_new_ptr();
            |--> gen_helper_lookup_tb_ptr(ptr, tcg_env);
            |--> tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
            |--> tcg_temp_free_ptr(ptr);
        |--> ctx->base.is_jmp = DISAS_NORETURN;

    gen_set_label(jumpLabel); // 跳转标记
        |--> l->present = 1;
        |--> tcg_gen_op1(INDEX_op_set_label, label_arg(l));

    gen_goto_tb(ctx, 0, ctx->base.pc_next + address * 2); // ctx->base.pc_next + address * 2 = 0x80001316
        |--> gen_save_pc(dest);
            |--> tcg_gen_movi_tl(cpu_PC, pc);
                |--> tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
        |--> tcg_gen_lookup_and_goto_ptr();

2.1.4.停止翻译 - tricore_tr_tb_stop()

// CallStack
|--> translator_loop()
    |--> gen_tb_start()
    |--> ops->insn_start()
        |--> tricore_tr_insn_start()
    |--> ops->translate_insn()
        |--> tricore_tr_translate_insn()
    |--> ops->tb_stop()
        |--> tricore_tr_tb_stop()
-------------------------------------------------------------------------------------------------------------------------------------------------

// target/tricore/translate.c
static void tricore_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
{
    DisasContext *ctx = container_of(dcbase, DisasContext, base);

    switch (ctx->base.is_jmp) { // ctx->base.is_jmp = DISAS_NORETURN
    case DISAS_TOO_MANY:
        gen_goto_tb(ctx, 0, ctx->base.pc_next);
        break;
    case DISAS_EXIT_UPDATE:
        gen_save_pc(ctx->base.pc_next);
        /* fall through */
    case DISAS_EXIT:
        tcg_gen_exit_tb(NULL, 0);
        break;
    case DISAS_JUMP:
        tcg_gen_lookup_and_goto_ptr();
        break;
    case DISAS_NORETURN:
        break;
    default:
        g_assert_not_reached();
    }
}

2.1.5.插入一条 TB 块退出指令 - gen_tb_end()

生成一条退出当前 TB 的指令

// CallStack
|--> translator_loop()
    |--> gen_tb_start()
    |--> ops->insn_start()
    |--> ops->translate_insn()
    |--> ops->tb_stop()
    |--> gen_tb_end()
-------------------------------------------------------------------------------------------------------------------------------------------------

// accel/tcg/translator.c
static void gen_tb_end(const TranslationBlock *tb, uint32_t cflags,
                       TCGOp *icount_start_insn, int num_insns)
{
    ...
    if (tcg_ctx->exitreq_label) {
        gen_set_label(tcg_ctx->exitreq_label);
            |--> l->present = 1;
            |--> tcg_gen_op1(INDEX_op_set_label, label_arg(l));

        tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED);
            |--> uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx; // idx = 0x3, val = 0x7fffb0142683
                |--> rw += tcg_splitwx_diff; // rw = 0x7fffb0142680, tcg_splitwx_diff = 0x0
            |--> tcg_gen_op1i(INDEX_op_exit_tb, val); // val = 0x7fffb0142683
                |--> tcg_gen_op1(opc, a1);

2.2.中间码翻译为宿主机指令 - tcg_gen_code()

// CallStack
cpu_exec_loop()
    |--> tb_gen_code()
        |--> setjmp_gen_code()
            |--> gen_intermediate_code()
            |--> tcg_gen_code()
-------------------------------------------------------------------------------------------------------

// tcg/tcg.c
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
{
    int i, start_words, num_insns;
    TCGOp *op;
    ...

    tcg_optimize(s); // 指令优化

    reachable_code_pass(s); // 删除无法使用的指令
    liveness_pass_0(s); // 指令声明周期调整
    liveness_pass_1(s);
    ...

    /* Initialize goto_tb jump offsets. */
    tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
    tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
    tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
    tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;

    tcg_reg_alloc_start(s); // 为 TCG 临时变量分配内存

    /*
     * Reset the buffer pointers when restarting after overflow.
     * TODO: Move this into translate-all.c with the rest of the
     * buffer management.  Having only this done here is confusing.
     */
    s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
    s->code_ptr = s->code_buf;
    s->data_gen_ptr = NULL;
    ...

    start_words = s->insn_start_words;
    s->gen_insn_data =
        tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);

    tcg_out_tb_start(s);

    num_insns = -1;
    QTAILQ_FOREACH(op, &s->ops, link) { // 将中间码翻译为宿主机指令
        TCGOpcode opc = op->opc;

        switch (opc) {
        case INDEX_op_mov_i32:
        case INDEX_op_mov_i64:
        case INDEX_op_mov_vec:
            ...
    }
    ...
    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);

    /* Generate TB finalization at the end of block */
#ifdef TCG_TARGET_NEED_LDST_LABELS
    i = tcg_out_ldst_finalize(s);
    if (i < 0) {
        return i;
    }
#endif
#ifdef TCG_TARGET_NEED_POOL_LABELS
    i = tcg_out_pool_finalize(s);
    if (i < 0) {
        return i;
    }
#endif
    if (!tcg_resolve_relocs(s)) {
        return -2;
    }
    ...

    return tcg_current_code_size(s); // 中间码翻译为宿主机指令结束
}

3.TB 查找 - tb_lookup()

检查 TB 块是否已经翻译过,如果是则直接调用缓存中已翻译的 TB 块,避免重复翻译,提升运行速度

// accel/tcg/cpu-exec.c
static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc, uint64_t cs_base,
                                        uint32_t flags, uint32_t cflags)
{
    ...

    hash = tb_jmp_cache_hash_func(pc);
    jc = cpu->tb_jmp_cache;

    if (cflags & CF_PCREL) {
        ...
    } else {
        /* Use rcu_read to ensure current load of pc from *tb. */
        tb = qatomic_rcu_read(&jc->array[hash].tb);

        if (likely(tb &&
                   tb->pc == pc &&
                   tb->cs_base == cs_base &&
                   tb->flags == flags &&
                   tb_cflags(tb) == cflags)) {
            return tb;
        }
        tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
        ...
        /* Use the pc value already stored in tb->pc. */
        qatomic_set(&jc->array[hash].tb, tb);
    }

    return tb;
}
-------------------------------------------------------------------------

// accel/tcg/cpu-exec.c
static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
                                          uint64_t cs_base, uint32_t flags,
                                          uint32_t cflags)
{
    tb_page_addr_t phys_pc;
    struct tb_desc desc;
    uint32_t h;

    desc.env = cpu_env(cpu);
    desc.cs_base = cs_base;
    desc.flags = flags;
    desc.cflags = cflags;
    desc.pc = pc;
    phys_pc = get_page_addr_code(desc.env, pc);
    ...
    desc.page_addr0 = phys_pc;
    h = tb_hash_func(phys_pc, pc,
                     flags, cs_base, cflags);
    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
}

4.指令执行 - tcg_qemu_tb_exec()

4.1.TCG 执行

4.1.1.计算地址偏移 - tcg_splitwx_diff

buf_rx 和 buf_rw 这两块 Buffer 在 TCG 加速器创建时分配,用于存储翻译后的代码

这两块 Buffer 使用 mmap(... MAP_SHARED ...) 映射至同一共享内存 mfd,因此 buf_rx 和 buf_rw 中的内容保持一致

tcg_init() // tcg/tcg.c
    |--> tcg_context_init(max_cpus);
    |--> tcg_region_init(tb_size, splitwx, max_cpus); // tcg/region.c
        |--> have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal); // tcg/region.c
            |--> prot = alloc_code_gen_buffer_splitwx(size, errp); // tcg/region.c
                |--> return alloc_code_gen_buffer_splitwx_memfd(size, errp);
------------------------------------------------------------------------------------------------------

// tcg/region.c
static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
{
    void *buf_rw = NULL, *buf_rx = MAP_FAILED;
    int fd = -1;

    buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
        |--> int mfd = qemu_memfd_create(name, size, false, 0, seals, NULL);
            |--> mfd = memfd_create(name, flags);
        |--> ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); // buf_rw = ptr,权限为 PROT_READ、PROT_WRITE,大小 size = 0x40000000(1GB)
        |--> *fd = mfd;
    ...
    buf_rx = mmap(NULL, size, host_prot_read_exec(), MAP_SHARED, fd, 0); // 使用 MAP_SHARED,当前进程对其进行修改,其他内存会同步看见
        |--> host_prot_read_exec() --> return PROT_READ | PROT_EXEC;
    ...
    tcg_splitwx_diff = buf_rx - buf_rw;

4.1.2.指令执行 - TCG 对应的 tcg_qemu_tb_exec

tcg_qemu_tb_exec() 是一个函数指针,在 tcg_prologue_init() 中将其赋值为 tcg_splitwx_to_rx()

tcg_splitwx_to_rx() 将传入的 Buffer 指针加上 tcg_splitwx_diff,从而将该指针由指向可读可写的内存 buf_rw,变为指向可读可执行的内存 buf_rx

由于内存获得了可执行权限 x,因此系统会自动执行该指针指向内存中的内容(从新指针指向的位置处开始执行)

cpu_exec_loop() // accel/tcg/cpu-exec.c
    |--> cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit) // accel/tcg/cpu-exec.c
        |--> cpu_tb_exec(cpu, tb, tb_exit) // accel/tcg/cpu-exec.c
            |--> const void *tb_ptr = itb->tc.ptr;
            |--> ret = tcg_qemu_tb_exec(env, tb_ptr);
------------------------------------------------------------------------------------------------------

// tcg/tcg.c
tcg_prologue_fn *tcg_qemu_tb_exec;
------------------------------------------------------------------------------------------------------

// include/tcg/tcg.h
typedef uintptr_t tcg_prologue_fn(CPUArchState *env, const void *tb_ptr); // tcg_prologue_fn 实质上是一个指针
extern tcg_prologue_fn *tcg_qemu_tb_exec;
------------------------------------------------------------------------------------------------------

// tcg/tcg.c
void tcg_prologue_init(void)
{
    TCGContext *s = tcg_ctx;
    ...
    
    s->code_ptr = s->code_gen_ptr; // s->code_ptr = 0x7fffb0000000
    s->code_buf = s->code_gen_ptr;
    s->data_gen_ptr = NULL;

    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
------------------------------------------------------------------------------------------------------

// tcg/region.c
const void *tcg_splitwx_to_rx(void *rw)
{
    /* Pass NULL pointers unchanged. */
    if (rw) {
        g_assert(in_code_gen_buffer(rw));
        rw += tcg_splitwx_diff; // tcg_splitwx_diff = 0xffffffffc0000000
    }
    return rw; // rw = 0x7fff70000000
}

3.2.TCI 执行 - TCI 对应的 tcg_qemu_tb_exec

编译时选择 --enable-tcg-interpreter 会打开 TCG 解释器,此时 tcg_qemu_tb_exec 调用如下函数:

// tcg/tci.c
uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                                            const void *v_tb_ptr)
{
    const uint32_t *tb_ptr = v_tb_ptr; // 0x7fffb0142740
    ...
    for (;;) {
        ...
        insn = *tb_ptr++;
        opc = extract32(insn, 0, 8);

        switch (opc) {
        ...

下面为 80001386 这条指令,经过翻译得到的 TB 块的执行流程

insn = 0xfff0e40d
opc = INDEX_op_ld_i32
...
        case INDEX_op_ld_i32:
        CASE_64(ld32u)
            tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R4 = 4,r1 = TCG_REG_R14 = 14,ofs = 0xfffffff0
            ptr = (void *)(regs[r1] + ofs);
            regs[r0] = *(uint32_t *)ptr;
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x5c2
opc = INDEX_op_tci_movi
...
        case INDEX_op_tci_movi:
            tci_args_ri(insn, &r0, &t1); // r0 = TCG_REG_R5, t1 = 0x0
            regs[r0] = t1;
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x254d06
opc = INDEX_op_setcond_i32
...
        case INDEX_op_setcond_i32:
            tci_args_rrrc(insn, &r0, &r1, &r2, &condition); // r0 = TCG_REG_R13, r1 = TCG_REG_R4, r2 = TCG_REG_R5, condition = TCG_COND_LT
            regs[r0] = tci_compare32(regs[r1], regs[r2], condition);
                |--> int32_t i0 = u0; // i0 = 0x0
                |--> int32_t i1 = u1; // i1 = 0x0
                |--> result = (i0 < i1); // result = 0x0
                |--> return result;
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x3cd26
opc = INDEX_op_brcond_i32
...
        case INDEX_op_brcond_i32:
            tci_args_rl(insn, tb_ptr, &r0, &ptr); // tb_ptr = 0x7fffb0142750, r0 = TCG_REG_R13, ptr = 0x7fffb014278c
            if ((uint32_t)regs[r0]) {
                tb_ptr = ptr;
            }
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x14c2
opc = INDEX_op_tci_movi
...
        case INDEX_op_tci_movi:
            tci_args_ri(insn, &r0, &t1); // r0 = TCG_REG_R4, t1 = 0x1
            regs[r0] = t1;
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0xfff4e40e
opc = INDEX_op_st8_i32
...
        CASE_32_64(st8)
            tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R4, r1 = TCG_REG_R14, ofs = 0xfffffff4
            ptr = (void *)(regs[r1] + ofs); // regs[14] = 0x5555574ec430, env = 0x5555574ec430, ptr = 0x5555574ec424, 
            *(uint8_t *)ptr = regs[r0];
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x48e50d
opc = INDEX_op_ld_i32
...
        CASE_64(ld32u)
            tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R5, r1 = TCG_REG_R14, ofs = 0x48
            ptr = (void *)(regs[r1] + ofs); // regs[r1] = 0x5555574ec430, env = 0x5555574ec430,regs[r1] + ofs = &env->gpr_d[2] = 0x5555574ec478
            regs[r0] = *(uint32_t *)ptr; // regs[r0] = 0x5, 即从d[2]寄存器中读取数据 --> 0x5
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x345d06
opc = INDEX_op_setcond_i32
...
        case INDEX_op_setcond_i32:
            tci_args_rrrc(insn, &r0, &r1, &r2, &condition); // r0 = TCG_REG_R13, r1 = TCG_REG_R5, r2 = TCG_REG_R4, condition = TCG_COND_GE
            regs[r0] = tci_compare32(regs[r1], regs[r2], condition); // regs[r1] = 0x5, regs[r2] = 0x1, condition = TCG_COND_GE --> regs[r0] = 0x1
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x14d26
opc = INDEX_op_brcond_i32
...
        case INDEX_op_brcond_i32:
            tci_args_rl(insn, tb_ptr, &r0, &ptr); // tb_ptr = 0x7fffb0142764, r0 = TCG_REG_R13, ptr = 0x7fffb0142778
            if ((uint32_t)regs[r0]) { // regs[r0] = 0x1
                tb_ptr = ptr;
            }
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x2c4c3
opc = INDEX_op_tci_movl
...
        case INDEX_op_tci_movl:
            tci_args_rl(insn, tb_ptr, &r0, &ptr); // tb_ptr = 0x7fffb014277c, r0 = TCG_REG_R4, ptr = 0x7fffb01427a8
            regs[r0] = *(tcg_target_ulong *)ptr; // regs[r0] = 0xffffffff80001316
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x9ce410
opc = INDEX_op_st_i32
...
        CASE_64(st32)
            tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R4, r1 = TCG_REG_R14, ofs = 0x9c
            ptr = (void *)(regs[r1] + ofs); // regs[r1] = env = 0x5555574ec430, regs[r1] + ofs = &env->PC = 0x5555574ec4cc
            *(uint32_t *)ptr = regs[r0]; // regs[r0] = 0xffffffff80001316, *(uint32_t *)ptr = 0x80001316
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0xfe4d
opc = INDEX_op_st_i64
...
        case INDEX_op_st_i64:
            tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R14, r1 = TCG_REG_R15, ofs = 0x0
            ptr = (void *)(regs[r1] + ofs); // regs[r1] = 0x7fffa05ff150, ofs = 0x0
            *(uint64_t *)ptr = regs[r0]; // regs[r0] = env =  0x5555574ec430, ptr = 0x7fffa05ff150
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x8202
opc = INDEX_op_call
...
        case INDEX_op_call:
            {
                ...
                tci_args_nl(insn, tb_ptr, &len, &ptr); // tb_ptr = 0x7fffb0142788, len = 0x2, ptr = 0x7fffb0142790
                func = ((void **)ptr)[0]; // func = (void *) 0x5555557ea6dc <helper_lookup_tb_ptr>
                cif = ((void **)ptr)[1]; // cif = (ffi_cif *) 0x7fff980163d0

                n = cif->nargs; // n = 0x1
                for (i = s = 0; i < n; ++i) {
                    ffi_type *t = cif->arg_types[i];
                    call_slots[i] = &stack[s];
                    s += DIV_ROUND_UP(t->size, 8); // s = 0x1
                }

                /* Helper functions may need to access the "return address" */
                tci_tb_ptr = (uintptr_t)tb_ptr; // tb_ptr = 0x7fffb0142788, tci_tb_ptr = 0x7fffb0142788
                ffi_call(cif, func, stack, call_slots);
            }

            switch (len) {
            ...
            case 2: /* uint64_t */
                memcpy(&regs[TCG_REG_R0], stack, 8);
                break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x84
opc = INDEX_op_goto_ptr
...
        case INDEX_op_goto_ptr:
            tci_args_r(insn, &r0); // r0 = TCG_REG_R0
            ptr = (void *)regs[r0]; // regs[TCG_REG_R0] = 0x7fffb0140200, ptr = 0xfff0e40d
            ...
            tb_ptr = ptr; // tb_ptr = 0x7fffb0140200
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0xfff0e40d
opc = INDEX_op_ld_i32
...
        CASE_64(ld32u)
            tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R4, r1 = TCG_REG_R14, ofs = 0xfffffff0
            ptr = (void *)(regs[r1] + ofs); // regs[r1] = env = 0x5555574ec430, ptr = 0x5555574ec420
            regs[r0] = *(uint32_t *)ptr; // regs[r0]: 0xffffffff80001316 --> 0x0
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x5c2
opc = INDEX_op_tci_movi
...
        case INDEX_op_tci_movi:
            tci_args_ri(insn, &r0, &t1); // r0 = TCG_REG_R5, t1 = 0x0
            regs[r0] = t1; // regs[r0] = 0x0
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x254d06
opc = INDEX_op_setcond_i32
...
        case INDEX_op_setcond_i32:
            tci_args_rrrc(insn, &r0, &r1, &r2, &condition); // r0 = TCG_REG_R13, r1 = TCG_REG_R4, r2 = TCG_REG_R5, condition = TCG_COND_LT
            regs[r0] = tci_compare32(regs[r1], regs[r2], condition); // regs[r1] = 0x0, regs[r2] = 0x0, regs[r0] = 0x0
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x34d26
opc = INDEX_op_brcond_i32
...
        case INDEX_op_brcond_i32:
            tci_args_rl(insn, tb_ptr, &r0, &ptr); // tb_ptr = 0x7fffb0140210, r0 = TCG_REG_R13, ptr = 0x7fffb0140244
            if ((uint32_t)regs[r0]) { // regs[r0] = 0x0
                tb_ptr = ptr;
            }
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0x14c2
opc = INDEX_op_tci_movi
...
        case INDEX_op_tci_movi:
            tci_args_ri(insn, &r0, &t1); // r0 = TCG_REG_R4, t1 = 0x1
            regs[r0] = t1; // regs[r0] = 0x1
            break;
--------------------------------------------------------------------------------------------------------------------

insn = 0xfff4e40e
opc = INDEX_op_st8_i32
...
        CASE_32_64(st8)
            tci_args_rrs(insn, &r0, &r1, &ofs); // r0 = TCG_REG_R4, r1 = TCG_REG_R14, ofs = 0xfffffff4
            ptr = (void *)(regs[r1] + ofs); // regs[r1] = env = 0x5555574ec430, regs[r1] + ofs = 0x5555574ec424
            *(uint8_t *)ptr = regs[r0]; // regs[r0] = 0x1
            break;
--------------------------------------------------------------------------------------------------------------------

...

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值