1. 执行指令:
在“运行”章节中已经提到指令是在machine_run()中的cpus[i]->run_instr执行,而cpus[i]->run_instr有是在cpu_new的时候就注册为mips32_run_instr,因此实际执行指令的是mips32_run_instr。mips32_run_instr在tmp_mips_tail.cc中结合cpu_dymtrans.cc定义的, tmp_mips_tail.cc是由generate_tail.cc自动生成的:
tmp_mips_tail.cc
#define DYNTRANS_RUN_INSTR_DEF mips_run_instr
#include "cpu_dyntrans.cc"
#undef DYNTRANS_RUN_INSTR_DEF
cpu_dyntrans.cc
int DYNTRANS_RUN_INSTR_DEF(struct cpu *cpu)
{
转换pc
DYNTRANS_PC_TO_POINTERS32(cpu);
执行指令
for (;;) {
struct DYNTRANS_IC *ic;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
I; I; I; I; I; I; I; I; I; I;
cpu->n_translated_instrs += 120;
if (cpu->n_translated_instrs >= N_SAFE_DYNTRANS_LIMIT)
break;
}
}
执行指令使用宏I,定义在cpu_dyntrans.cc:
#define I ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic);
其中DYNTRANS_ARCH根据不同的cpu架构而不同,对已mips,定义在tmp_mips_head.cc中
#define DYNTRANS_ARCH mips
mips的结构体类型是mips_cpu定义在cpu_mips.h中,里面保存CPU的各种信息包括通用寄存器, cache line信息等:
struct mips_cpu {
struct mips_cpu_type_def cpu_type;
/* General purpose registers: */
uint64_t gpr[N_MIPS_GPRS];
/* Dummy destination register when writing to the zero register: */
uint64_t scratch;
/* Special purpose registers: */
uint64_t hi;
uint64_t lo;
/* Coprocessors: */
struct mips_coproc *coproc[N_MIPS_COPROCS];
uint64_t cop0_config_select1;
int last_written_tlb_index;
/* Count/compare timer: */
int compare_register_set;
int compare_interrupts_pending;
struct interrupt irq_compare;
struct timer *timer;
int rmw; /* Read-Modify-Write */
uint64_t rmw_len; /* Length of rmw modification */
uint64_t rmw_addr; /* Address of rmw modification */
/*
* NOTE: The R5900 has 128-bit registers. I'm not really sure
* whether they are used a lot or not, at least with code produced
* with gcc they are not. An important case however is lq and sq
* (load and store of 128-bit values). These "upper halves" of R5900
* quadwords can be used in those cases.
*
* hi1 and lo1 are the high 64-bit parts of the hi and lo registers.
* sa is a 32-bit "shift amount" register.
*
* TODO: Generalize this.
*/
uint64_t gpr_quadhi[N_MIPS_GPRS];
uint64_t hi1;
uint64_t lo1;
uint32_t r5900_sa;
/*
* Data and Instruction caches:
*/
/* Cache sizes: (1 << x) x=0 for default values */
/* This is legacy stuff. TODO: Clean up! */
int cache_picache;
int cache_pdcache;
int cache_secondary;
int cache_picache_linesize;
int cache_pdcache_linesize;
int cache_secondary_linesize;
unsigned char *cache[2];
void *cache_tags[2];
uint64_t cache_last_paddr[2];
int cache_size[2];
int cache_linesize[2];
int cache_mask[2];
/*
* Instruction translation cache and Virtual->Physical->Host
* address translation:
*/
DYNTRANS_ITC(mips)
VPH_TLBS(mips,MIPS)
VPH32(mips,MIPS)
VPH64(mips,MIPS)
};
next_ic由宏DYNTRANS_ITC(Dyntrans "Instruction Translation Cache")产生,定义在cpu.h内:
#define DYNTRANS_ITC(arch) struct arch ## _tc_physpage *cur_physpage; \
struct arch ## _instr_call *cur_ic_page; \
struct arch ## _instr_call *next_ic; \
struct arch ## _tc_physpage *physpage_template;\
void (*combination_check)(struct cpu *, \
struct arch ## _instr_call *, int low_addr);
2.指令load并执行的基本流程:
2.1 cpu_new的时候调用mips_cpu_init_tables 分配physpage_template, 并将其指令函数初始化为TO_BE_TRANSLATED
cpu.cc cpu_new()
{
fp->init_tables(cpu);
}
init_table在CPU_FAMILY_INIT的时候已经注册为mips_cpu_init_tables, mips_cpu_init_tables由tmp_mips_tail.cc和cpy_dyntrans.cc共同产生:
tmp_mips_tail.cc
#define DYNTRANS_INIT_TABLES mips_cpu_init_tables
#include "cpu_dyntrans.cc"
#undef DYNTRANS_INIT_TABLES
void DYNTRANS_INIT_TABLES(struct cpu *cpu)
{
struct DYNTRANS_TC_PHYSPAGE *ppp;
分配temp page
CHECK_ALLOCATION(ppp =
(struct DYNTRANS_TC_PHYSPAGE *) malloc(sizeof(struct DYNTRANS_TC_PHYSPAGE)));
//将指令函数全部指向TO_BE_TRANSLATED
for (i=0; i<DYNTRANS_IC_ENTRIES_PER_PAGE; i++)
ppp->ics[i].f = TO_BE_TRANSLATED;
cpu->cd.DYNTRANS_ARCH.physpage_template = ppp;
}
tmp_mips_head.cc中定义了:
#define DYNTRANS_TC_PHYSPAGE mips_tc_physpage
cpu.h的宏DYNTRANS_MISC_DECLARATIONS在cpu_mips.h中定义了mips_tc_physpage结构体
cpu_mips.h
DYNTRANS_MISC_DECLARATIONS(mips,MIPS,uint64_t)
cpu.h
#define DYNTRANS_MISC_DECLARATIONS(arch,ARCH,addrtype) struct \
arch ## _instr_call { \
void (*f)(struct cpu *, struct arch ## _instr_call *); \
size_t arg[ARCH ## _N_IC_ARGS]; \
}; \
\
/* Translation cache struct for each physical page: */ \
struct arch ## _tc_physpage { \
struct arch ## _instr_call ics[ARCH ## _IC_ENTRIES_PER_PAGE+2];\
uint32_t next_ofs; /* (0 for end of chain) */ \
uint32_t translations_bitmap; \
uint32_t translation_ranges_ofs; \
addrtype physaddr; \
}; \
\
struct arch ## _vpg_tlb_entry { \
uint8_t valid; \
uint8_t writeflag; \
addrtype vaddr_page; \
addrtype paddr_page; \
unsigned char *host_page; \
};
展开得到
struct mips_tc_physpage {
struct mips _instr_call ics[MIPS _IC_ENTRIES_PER_PAGE+2];
uint32_t next_ofs; /* (0 for end of chain) */
uint32_t translations_bitmap;
uint32_t translation_ranges_ofs;
addrtype physaddr;
};
2.2 mips_run_instr的时候调用mips32_pc_to_pointers 会根据pc ,查看是否存在对应的phys_page,如果不存在,会alloc,并以physpage_template的值填充phys_page,同时通过mips32_update_translation_table把指令码load到host_load内,最后将next_ic和cur_ic_page指向对应的phys_page
在tmp_mips_tail.cc中有定义:
#define DYNTRANS_PC_TO_POINTERS_FUNC mips32_pc_to_pointers
结合cpu_dyntrans.cc中的定义可以看到pc to pointers的流程
void DYNTRANS_PC_TO_POINTERS_FUNC(struct cpu *cpu)
{
//获取PC所在的phys page
cached_pc = cpu->pc;
index = DYNTRANS_ADDR_TO_PAGENR(cached_pc);
ppp = cpu->cd.DYNTRANS_ARCH.phys_page[index];
phy_page存在,直接赋给next_ic
if (ppp != NULL)
goto have_it;
//phys_page不存在时,重新load
DYNTRANS_PC_TO_POINTERS_GENERIC(cpu);
将nex_ic指向pyhs page
have_it:
cpu->cd.DYNTRANS_ARCH.cur_ic_page = &ppp->ics[0];
cpu->cd.DYNTRANS_ARCH.next_ic = cpu->cd.DYNTRANS_ARCH.cur_ic_page +
DYNTRANS_PC_TO_IC_ENTRY(cached_pc);
}
void DYNTRANS_PC_TO_POINTERS_GENERIC(struct cpu *cpu)
{
//转换为物理地址
cached_pc = cpu->pc;
vaddr = cached_pc;
ok = cpu->translate_v2p( cpu, vaddr, &paddr, FLAG_INSTR);
转换为emul host memory地址
physaddr = paddr;
unsigned char *host_page = memory_paddr_to_hostaddr(cpu->mem, physaddr, MEM_READ);
如果不存在,分配host memory block,并将指令码load到host_page内
if (host_page != NULL) {
cpu->update_translation_table(cpu, cached_pc & ~q, host_page, 0, physaddr);
}
寻找是否有可用phys_page
physpage_entryp = &(((uint32_t *)cpu->translation_cache)[table_index]);
physpage_ofs = *physpage_entryp;
ppp = NULL;
/* Traverse the physical page chain: */
while (physpage_ofs != 0) {
}
没有找到,alloc pyhs_page
if (physpage_ofs == 0) {
DYNTRANS_TC_ALLOCATE(cpu, physaddr);
}
}
从tmp_mips_head.cc和tmp_mips_tail.cc 中可以看到DYNTRANS_TC_ALLOCATE和DYNTRANS_TC_ALLOCATE_DEFAULT_PAGE_DEF都被定义为mips_tc_allocate_default_page
static void DYNTRANS_TC_ALLOCATE_DEFAULT_PAGE_DEF(struct cpu *cpu,
uint64_t physaddr)
{
struct DYNTRANS_TC_PHYSPAGE *ppp;
分配phys_page
ppp = (struct DYNTRANS_TC_PHYSPAGE *)(cpu->translation_cache
+ cpu->translation_cache_cur_ofs);
将physpage_template的内容(TO_BE_TRANSLATED)赋给新的page,因此一开始访问所有next_ic都是TO_BE_TRANSLATED
/* Copy the entire template page first: */
memcpy(ppp, cpu->cd.DYNTRANS_ARCH.physpage_template, sizeof(
struct DYNTRANS_TC_PHYSPAGE));
ppp->physaddr = physaddr & ~(DYNTRANS_PAGESIZE - 1);
cpu->translation_cache_cur_ofs += sizeof(struct DYNTRANS_TC_PHYSPAGE);
cpu->translation_cache_cur_ofs --;
cpu->translation_cache_cur_ofs |= 63;
cpu->translation_cache_cur_ofs ++;
}
2.3 执行指令的时候呼叫到I,因此会执行next_ic 内的f,也就是TO_BE_TRANSLATED == instr(to_be_translated)
ic = cpu->cd.DYNTRANS_ARCH.next_ic ++; ic->f(cpu, ic);
从上面的分析可以知道一开始的ic->f都是instr(to_be_translated)
2.4 执行instr(to_be_translated), 会根据pc将指令从码从host_load中取出来,并分析指令码,并翻译为对应的指令函数,并执行指令函数
在tmp_mips_head.cc中分别定义了:
#define instr(n) mips_instr_ ## n
#define X(n) void mips_instr_ ## n(struct cpu *cpu, struct mips_instr_call *ic)
因此instr(to_be_translated) 在cpu_mips_instr.cc中定义
X(to_be_translated)
{
从pc转换为页内地址addr
addr = cpu->pc & ~((MIPS_IC_ENTRIES_PER_PAGE-1) << MIPS_INSTR_ALIGNMENT_SHIFT);
addr += (low_pc << MIPS_INSTR_ALIGNMENT_SHIFT);
cpu->pc = (MODE_int_t)addr;
addr &= ~((1 << MIPS_INSTR_ALIGNMENT_SHIFT) - 1);
从页内addr读出指令码
page = cpu->cd.mips.host_load[(uint32_t)addr >> 12];
memcpy(ib, page + (addr & 0xffc), sizeof(ib));
uint32_t *p = (uint32_t *) ib;
iword = *p;
分析指令码并转化为对应的指令函数
main_opcode = iword >> 26;
rs = (iword >> 21) & 31;
rt = (iword >> 16) & 31;
rd = (iword >> 11) & 31;
sa = (iword >> 6) & 31;
imm = (int16_t)iword;
s6 = iword & 63;
s10 = (rs << 5) | sa;
switch (main_opcode){
将转换的指令函数替换to_be_translated
case …: ic->f = instr(sll);
}
连续转换128条指令码为指令函数
cpu->translation_readahead = MAX_DYNTRANS_READAHEAD;
while (DYNTRANS_ADDR_TO_PAGENR(baseaddr +(i << DYNTRANS_INSTR_ALIGNMENT_SHIFT)) == pagenr && cpu->translation_readahead > 0) {
void (*old_f)(struct cpu *,
struct DYNTRANS_IC *) = ic[i].f;
/* Already translated? Then abort: */
if (old_f != TO_BE_TRANSLATED)
break;
/* Translate the instruction: */
ic[i].f(cpu, ic+i);
/* Translation failed? Then abort. */
if (ic[i].f == old_f)
break;
cpu->translation_readahead --;
++i;
}
cpu->translation_readahead = 0;
最后执行一条指令
ic->f(cpu, ic);
}
从上面可见执行一个I就会连续转换128条指令码才会执行下一个I,此时下一个I已经是指令函数,不用再转换,知道被转换的指令用完,或者是发生跳转到新的转换指令上,才会又发生指令码到指令函数的转换