Gexmul 运行指令原理简述

1. 执行指令:

“运行”章节中已经提到指令是在machine_run()中的cpus[i]->run_instr执行,而cpus[i]->run_instr有是在cpu_new的时候就注册为mips32_run_instr,因此实际执行指令的是mips32_run_instr。mips32_run_instr在tmp_mips_tail.cc中结合cpu_dymtrans.cc定义的, tmp_mips_tail.cc是由generate_tail.cc自动生成的:

tmp_mips_tail.cc
#define DYNTRANS_RUN_INSTR_DEF mips_run_instr
#include "cpu_dyntrans.cc"
#undef DYNTRANS_RUN_INSTR_DEF
 
cpu_dyntrans.cc
int DYNTRANS_RUN_INSTR_DEF(struct cpu *cpu)
{
    转换pc
    DYNTRANS_PC_TO_POINTERS32(cpu);
                                          
    执行指令
    for (;;) {
        struct DYNTRANS_IC *ic;
 
         I; I; I; I; I;   I; I; I; I; I;
         I; I; I; I; I;   I; I; I; I; I;
         I; I; I; I; I;   I; I; I; I; I;
         I; I; I; I; I;   I; I; I; I; I;
         I; I; I; I; I;   I; I; I; I; I;
 
         I; I; I; I; I;   I; I; I; I; I;
 
         I; I; I; I; I;   I; I; I; I; I;
         I; I; I; I; I;   I; I; I; I; I;
         I; I; I; I; I;   I; I; I; I; I;
         I; I; I; I; I;   I; I; I; I; I;
         I; I; I; I; I;   I; I; I; I; I;
              
         I; I; I; I; I;   I; I; I; I; I;
 
         cpu->n_translated_instrs += 120;
        if (cpu->n_translated_instrs >= N_SAFE_DYNTRANS_LIMIT)
            break;
    }
}

执行指令使用宏I,定义在cpu_dyntrans.cc:

#define I       ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic);

其中DYNTRANS_ARCH根据不同的cpu架构而不同,对已mips,定义在tmp_mips_head.cc中

#define DYNTRANS_ARCH mips

mips的结构体类型是mips_cpu定义在cpu_mips.h中,里面保存CPU的各种信息包括通用寄存器, cache line信息等:

struct mips_cpu {
              struct mips_cpu_type_def cpu_type;
 
              /*  General purpose registers:  */
              uint64_t        gpr[N_MIPS_GPRS];
 
              /*  Dummy destination register when writing to the zero register:  */
              uint64_t        scratch;
 
              /*  Special purpose registers:  */
              uint64_t        hi;
              uint64_t        lo;
 
              /*  Coprocessors:  */
              struct mips_coproc *coproc[N_MIPS_COPROCS];
              uint64_t        cop0_config_select1;
 
              int                      last_written_tlb_index;
 
              /*  Count/compare timer:  */
              int                      compare_register_set;
              int                      compare_interrupts_pending;
              struct interrupt irq_compare;
              struct timer              *timer;
 
              int                      rmw;                /*  Read-Modify-Write  */
              uint64_t        rmw_len;      /*  Length of rmw modification  */
              uint64_t        rmw_addr;   /*  Address of rmw modification  */
 
              /*
               *  NOTE:  The R5900 has 128-bit registers. I'm not really sure
               *  whether they are used a lot or not, at least with code produced
               *  with gcc they are not. An important case however is lq and sq
               *  (load and store of 128-bit values). These "upper halves" of R5900
               *  quadwords can be used in those cases.
               *
               *  hi1 and lo1 are the high 64-bit parts of the hi and lo registers.
               *  sa is a 32-bit "shift amount" register.
               *
               *  TODO:  Generalize this.
               */
              uint64_t        gpr_quadhi[N_MIPS_GPRS];
              uint64_t        hi1;
              uint64_t        lo1;
              uint32_t        r5900_sa;
 
 
              /*
               *  Data and Instruction caches:
               */
 
              /*  Cache sizes: (1 << x) x=0 for default values  */
              /*  This is legacy stuff. TODO: Clean up!  */
              int                      cache_picache;
              int                      cache_pdcache;
              int                      cache_secondary;
              int                      cache_picache_linesize;
              int                      cache_pdcache_linesize;
              int                      cache_secondary_linesize;
 
              unsigned char          *cache[2];
              void                  *cache_tags[2];
              uint64_t        cache_last_paddr[2];
              int                      cache_size[2];
              int                      cache_linesize[2];
              int                      cache_mask[2];
 
              /*
               *  Instruction translation cache and Virtual->Physical->Host
               *  address translation:
               */
              DYNTRANS_ITC(mips)
              VPH_TLBS(mips,MIPS)
              VPH32(mips,MIPS)
              VPH64(mips,MIPS)
};

next_ic由宏DYNTRANS_ITC(Dyntrans "Instruction Translation Cache")产生,定义在cpu.h内:

#define DYNTRANS_ITC(arch)    struct arch ## _tc_physpage *cur_physpage;  \
                                                        struct arch ## _instr_call  *cur_ic_page;   \
                                                        struct arch ## _instr_call  *next_ic;       \
                                                        struct arch ## _tc_physpage *physpage_template;\
                                                        void (*combination_check)(struct cpu *,     \
                                                            struct arch ## _instr_call *, int low_addr);

2.指令load并执行的基本流程:

2.1 cpu_new的时候调用mips_cpu_init_tables 分配physpage_template, 并将其指令函数初始化为TO_BE_TRANSLATED

cpu.cc cpu_new()
{
    fp->init_tables(cpu);
}

init_table在CPU_FAMILY_INIT的时候已经注册为mips_cpu_init_tables, mips_cpu_init_tables由tmp_mips_tail.cc和cpy_dyntrans.cc共同产生:

tmp_mips_tail.cc
                            #define DYNTRANS_INIT_TABLES mips_cpu_init_tables
#include "cpu_dyntrans.cc"
#undef DYNTRANS_INIT_TABLES
              void DYNTRANS_INIT_TABLES(struct cpu *cpu)
              {
                            struct DYNTRANS_TC_PHYSPAGE *ppp;
                            分配temp page
                            CHECK_ALLOCATION(ppp =
                                    (struct DYNTRANS_TC_PHYSPAGE *) malloc(sizeof(struct DYNTRANS_TC_PHYSPAGE)));
                            //将指令函数全部指向TO_BE_TRANSLATED
                            for (i=0; i<DYNTRANS_IC_ENTRIES_PER_PAGE; i++)
                                          ppp->ics[i].f = TO_BE_TRANSLATED;
 
                            cpu->cd.DYNTRANS_ARCH.physpage_template = ppp;
}

tmp_mips_head.cc中定义了:
              #define DYNTRANS_TC_PHYSPAGE mips_tc_physpage
              
cpu.h的宏DYNTRANS_MISC_DECLARATIONS在cpu_mips.h中定义了mips_tc_physpage结构体
              cpu_mips.h
                  DYNTRANS_MISC_DECLARATIONS(mips,MIPS,uint64_t)
              cpu.h
              #define DYNTRANS_MISC_DECLARATIONS(arch,ARCH,addrtype)  struct \
                  arch ## _instr_call {                                                                  \
                            void    (*f)(struct cpu *, struct arch ## _instr_call *); \
                            size_t               arg[ARCH ## _N_IC_ARGS];                                    \
                  };                                                                                                             \
                                                                                                                              \
                  /*  Translation cache struct for each physical page:  */           \
                  struct arch ## _tc_physpage {                                                         \
                            struct arch ## _instr_call ics[ARCH ## _IC_ENTRIES_PER_PAGE+2];\
                            uint32_t        next_ofs;       /*  (0 for end of chain)  */ \
                            uint32_t        translations_bitmap;                                    \
                            uint32_t        translation_ranges_ofs;                              \
                            addrtype       physaddr;                                                \
                  };                                                                                                             \
                                                                                                                              \
                  struct arch ## _vpg_tlb_entry {                                                                   \
                            uint8_t                         valid;                                                          \
                            uint8_t                         writeflag;                                                \
                            addrtype       vaddr_page;                                                        \
                            addrtype       paddr_page;                                                       \
                            unsigned char          *host_page;                                                        \
                  };
 
展开得到
              struct mips_tc_physpage {                                                                 
                            struct mips _instr_call ics[MIPS _IC_ENTRIES_PER_PAGE+2];
                            uint32_t        next_ofs;       /*  (0 for end of chain)  */ 
                            uint32_t        translations_bitmap;                                    
                            uint32_t        translation_ranges_ofs;                              
                            addrtype       physaddr;                                                
              };


2.2 mips_run_instr的时候调用mips32_pc_to_pointers 会根据pc ,查看是否存在对应的phys_page,如果不存在,会alloc,并以physpage_template的值填充phys_page,同时通过mips32_update_translation_table把指令码load到host_load内,最后将next_ic和cur_ic_page指向对应的phys_page

在tmp_mips_tail.cc中有定义:

#define DYNTRANS_PC_TO_POINTERS_FUNC mips32_pc_to_pointers

结合cpu_dyntrans.cc中的定义可以看到pc to pointers的流程

void DYNTRANS_PC_TO_POINTERS_FUNC(struct cpu *cpu)
{
    //获取PC所在的phys page
    cached_pc = cpu->pc;
    index = DYNTRANS_ADDR_TO_PAGENR(cached_pc);
    ppp = cpu->cd.DYNTRANS_ARCH.phys_page[index];
    phy_page存在,直接赋给next_ic
    if (ppp != NULL)
        goto have_it;
 
    //phys_page不存在时,重新load
    DYNTRANS_PC_TO_POINTERS_GENERIC(cpu);
 
    将nex_ic指向pyhs page
    have_it:
       cpu->cd.DYNTRANS_ARCH.cur_ic_page = &ppp->ics[0];
       cpu->cd.DYNTRANS_ARCH.next_ic = cpu->cd.DYNTRANS_ARCH.cur_ic_page +
       DYNTRANS_PC_TO_IC_ENTRY(cached_pc);
}
 
void DYNTRANS_PC_TO_POINTERS_GENERIC(struct cpu *cpu)
{
    //转换为物理地址
    cached_pc = cpu->pc;
    vaddr = cached_pc;
    ok = cpu->translate_v2p( cpu, vaddr, &paddr, FLAG_INSTR);

    转换为emul host memory地址
    physaddr = paddr;
    unsigned char *host_page = memory_paddr_to_hostaddr(cpu->mem, physaddr, MEM_READ);
 
    如果不存在,分配host memory block,并将指令码load到host_page内
    if (host_page != NULL) {
         cpu->update_translation_table(cpu, cached_pc & ~q, host_page, 0, physaddr);
    }
              
    寻找是否有可用phys_page
    physpage_entryp = &(((uint32_t *)cpu->translation_cache)[table_index]);
    physpage_ofs = *physpage_entryp;
    ppp = NULL;
    /*  Traverse the physical page chain:  */
    while (physpage_ofs != 0) {
                            
      }
      
    没有找到,alloc pyhs_page
    if (physpage_ofs == 0) {
        DYNTRANS_TC_ALLOCATE(cpu, physaddr);
    }
}

从tmp_mips_head.cc和tmp_mips_tail.cc 中可以看到DYNTRANS_TC_ALLOCATE和DYNTRANS_TC_ALLOCATE_DEFAULT_PAGE_DEF都被定义为mips_tc_allocate_default_page

static void DYNTRANS_TC_ALLOCATE_DEFAULT_PAGE_DEF(struct cpu *cpu,
              uint64_t physaddr)
{ 
              struct DYNTRANS_TC_PHYSPAGE *ppp;
              分配phys_page
              ppp = (struct DYNTRANS_TC_PHYSPAGE *)(cpu->translation_cache
                  + cpu->translation_cache_cur_ofs);
              
              将physpage_template的内容(TO_BE_TRANSLATED)赋给新的page,因此一开始访问所有next_ic都是TO_BE_TRANSLATED
              /*  Copy the entire template page first:  */
              memcpy(ppp, cpu->cd.DYNTRANS_ARCH.physpage_template, sizeof(
                  struct DYNTRANS_TC_PHYSPAGE));
 
              ppp->physaddr = physaddr & ~(DYNTRANS_PAGESIZE - 1);
 
              cpu->translation_cache_cur_ofs += sizeof(struct DYNTRANS_TC_PHYSPAGE);
 
              cpu->translation_cache_cur_ofs --;
              cpu->translation_cache_cur_ofs |= 63;
              cpu->translation_cache_cur_ofs ++;
}


2.3 执行指令的时候呼叫到I,因此会执行next_ic 内的f,也就是TO_BE_TRANSLATED == instr(to_be_translated)

ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic);

从上面的分析可以知道一开始的ic->f都是instr(to_be_translated)

2.4 执行instr(to_be_translated), 会根据pc将指令从码从host_load中取出来,并分析指令码,并翻译为对应的指令函数,并执行指令函数

在tmp_mips_head.cc中分别定义了:

#define instr(n) mips_instr_ ## n
#define X(n) void mips_instr_ ## n(struct cpu *cpu, struct mips_instr_call *ic)

因此instr(to_be_translated) 在cpu_mips_instr.cc中定义

X(to_be_translated)
{
    从pc转换为页内地址addr
    addr = cpu->pc & ~((MIPS_IC_ENTRIES_PER_PAGE-1) << MIPS_INSTR_ALIGNMENT_SHIFT);
    addr += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT);
    cpu->pc = (MODE_int_t)addr;
    addr &= ~((1 << MIPS_INSTR_ALIGNMENT_SHIFT) - 1);
                                                        
    从页内addr读出指令码
    page = cpu->cd.mips.host_load[(uint32_t)addr >> 12];
    memcpy(ib, page + (addr & 0xffc), sizeof(ib));
    uint32_t *p = (uint32_t *) ib;
    iword = *p;
 
    分析指令码并转化为对应的指令函数
    main_opcode = iword >> 26;
    rs = (iword >> 21) & 31;
    rt = (iword >> 16) & 31;
    rd = (iword >> 11) & 31;
    sa = (iword >>  6) & 31;
    imm = (int16_t)iword;
    s6 = iword & 63;
    s10 = (rs << 5) | sa;
 
    switch (main_opcode){
        将转换的指令函数替换to_be_translated
        case …: ic->f = instr(sll);
                                                                                    
    }
 
    连续转换128条指令码为指令函数
    cpu->translation_readahead = MAX_DYNTRANS_READAHEAD;
 
    while (DYNTRANS_ADDR_TO_PAGENR(baseaddr +(i << DYNTRANS_INSTR_ALIGNMENT_SHIFT)) == pagenr && cpu->translation_readahead > 0) {
         void (*old_f)(struct cpu *,
         struct DYNTRANS_IC *) = ic[i].f;
 
         /*  Already translated? Then abort:  */
         if (old_f != TO_BE_TRANSLATED)
              break;
 
         /*  Translate the instruction:  */
         ic[i].f(cpu, ic+i);
 
         /*  Translation failed? Then abort.  */
         if (ic[i].f == old_f)
             break;
 
         cpu->translation_readahead --;
         ++i;
    }
 
    cpu->translation_readahead = 0;
    最后执行一条指令
    ic->f(cpu, ic);
}
从上面可见执行一个I就会连续转换128条指令码才会执行下一个I,此时下一个I已经是指令函数,不用再转换,知道被转换的指令用完,或者是发生跳转到新的转换指令上,才会又发生指令码到指令函数的转换

转载于:https://my.oschina.net/lgl88911/blog/291467

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值