qemu TLB表项:
否命中,如果命中直接从 hva 地址中返回,否则还是通过 mmu 来获取。
如果没有命中,则需要通过 mmu 获取该虚拟地址所对应的物理地址,对 tlb 进行填充。
地址没有对应的物理地址或者权限不够等情况,cpu 就会出现 page_fault 异常。
果有,更新 tlb;否则,保存出错信息。
用大量的 code_buffer。Qemu 使用了外调函数来实现这个功能。在对 target 翻译过程中,对于内存访问操
作会生成如下形式的中间码:
以下 qemu_ld/st_helpers 中的函数进行 target 的虚拟地址和物理地址转换的工作。
qemu 内存访问过程:guest virtual addr (GVA) → guest physical addr (GPA) → host virtualaddr (HVA)。其中 GVA->HVA 由 qemu 负责完成,HVA->HPA 由 host 操作系统完成。tlb 的结构如下,addr_xxx 表示 GVA 地址,同时也表示了执行权限;addrend =gpa_base – gva_base;
typedef struct CPUTLBEntry {
target_ulong addr_read; // 可读
target_ulong addr_write; // 可写
target_ulong addr_code; // 可执行
unsigned long addend;
} CPUTLBEntry;
1.get_page_addr_code 会首先查看 tlb 是否命中,如果没有命中就 ldub_code 走 mmu 翻译这个分支,否则直接获取 hva。
313 /* NOTE: this function can trigger an exception */
314 /* NOTE2: the returned address is not exactly the physical address: it
315 is the offset relative to phys_ram_base */
316 tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
317 {
318 int mmu_idx, page_index, pd;
319 void *p;
320 MemoryRegion *mr;
321
322 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
323 mmu_idx = cpu_mmu_index(env1);
324 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
325 (addr & TARGET_PAGE_MASK))) {
326 #ifdef CONFIG_TCG_PASS_AREG0
327 cpu_ldub_code(env1, addr);
328 #else
329 ldub_code(addr);
330 #endif
331 }
332 pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
333 mr = iotlb_to_region(pd);
334 if (memory_region_is_unassigned(mr)) {
335 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
336 cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
337 #else
338 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x"
339 TARGET_FMT_lx "\n", addr);
340 #endif
341 }
342 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
343 return qemu_ram_addr_from_host_nofail(p);
344 }
2.TLB 没有命中时,会通过 ldub_code,这个函数是由下面的宏产生。这个宏首先会在 tlb 检查下是
否命中,如果命中直接从 hva 地址中返回,否则还是通过 mmu 来获取。
95 static inline RES_TYPE
96 glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
97 target_ulong ptr)
98 {
99 int page_index;
100 RES_TYPE res;
101 target_ulong addr;
102 int mmu_idx;
103
104 addr = ptr;
105 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
106 mmu_idx = CPU_MMU_INDEX;
107 if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
108 (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
109 res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
110 addr,
111 mmu_idx);
112 } else {
113 uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
114 res = glue(glue(ld, USUFFIX), _raw)(hostaddr);
115 }
116 return res;
117 }
3.在这个模板中会对 tlb 进行查询,看是否命中,如果命中,还要根据是 io 还是 ram 进行分别处理;
如果没有命中,则需要通过 mmu 获取该虚拟地址所对应的物理地址,对 tlb 进行填充。
106 DATA_TYPE
107 glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
108 target_ulong addr,
109 int mmu_idx)
110 {
111 DATA_TYPE res;
112 int index;
113 target_ulong tlb_addr;
114 target_phys_addr_t ioaddr;
115 uintptr_t retaddr;
116
117 /* test if there is match for unaligned or IO access */
118 /* XXX: could done more in memory macro in a non portable way */
119 index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
120 redo:
121 tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
122 if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
123 if (tlb_addr & ~TARGET_PAGE_MASK) {
124 /* IO access */
125 if ((addr & (DATA_SIZE - 1)) != 0)
126 goto do_unaligned_access;
127 retaddr = GETPC();
128 ioaddr = env->iotlb[mmu_idx][index];
129 res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
130 } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
131 /* slow unaligned access (it spans two pages or IO) */
132 do_unaligned_access:
133 retaddr = GETPC();
134 #ifdef ALIGNED_ONLY
135 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
136 #endif
137 res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
138 mmu_idx, retaddr);
139 } else {
140 /* unaligned/aligned access in the same page */
141 uintptr_t addend;
142 #ifdef ALIGNED_ONLY
143 if ((addr & (DATA_SIZE - 1)) != 0) {
144 retaddr = GETPC();
145 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
146 }
147 #endif
148 addend = env->tlb_table[mmu_idx][index].addend;
149 res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(intptr_t)
150 (addr + addend));
151 }
152 } else {
153 /* the page is not in the TLB : fill it */
154 retaddr = GETPC();
155 #ifdef ALIGNED_ONLY
156 if ((addr & (DATA_SIZE - 1)) != 0)
157 do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
158 #endif
159 tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
160 goto redo;
161 }
162 return res;
163 }
4.以 arm 为例,tlb_fill 会通过 cpu_arm_handle_mmu_fault 对虚实地址转换进行处理,如果该虚拟
地址没有对应的物理地址或者权限不够等情况,cpu 就会出现 page_fault 异常。
72 /* try to fill the TLB and return an exception if error. If retaddr is
73 NULL, it means that the function was called in C code (i.e. not
74 from generated code or from helper.c) */
75 /* XXX: fix it to restore all registers */
76 void tlb_fill(CPUARMState *env1, target_ulong addr, int is_write, int mmu_idx,
77 uintptr_t retaddr)
78 {
79 TranslationBlock *tb;
80 CPUARMState *saved_env;
81 int ret;
82
83 saved_env = env;
84 env = env1;
85 ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx);
86 if (unlikely(ret)) {
87 if (retaddr) {
88 /* now we have a real cpu fault */
89 tb = tb_find_pc(retaddr);
90 if (tb) {
91 /* the PC is inside the translated code. It means that we have
92 a virtual CPU fault */
93 cpu_restore_state(tb, env, retaddr);
94 }
95 }
96 raise_exception(env->exception_index);
97 }
98 env = saved_env;
99 }
5. cpu_arm_handle_mmu_fault 里面主要是 page_walk,检查是否存在对应的物理地址和权限。如
果有,更新 tlb;否则,保存出错信息。
2122 int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address,
2123 int access_type, int mmu_idx)
2124 {
2125 uint32_t phys_addr;
2126 target_ulong page_size;
2127 int prot;
2128 int ret, is_user;
2129
2130 is_user = mmu_idx == MMU_USER_IDX;
2131 ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot,
2132 &page_size);
2133 if (ret == 0) {
2134 /* Map a single [sub]page. */
2135 phys_addr &= ~(uint32_t)0x3ff;
2136 address &= ~(uint32_t)0x3ff;
2137 tlb_set_page (env, address, phys_addr, prot, mmu_idx, page_size);
2138 return 0;
2139 }
2140
2141 if (access_type == 2) {
2142 env->cp15.c5_insn = ret;
2143 env->cp15.c6_insn = address;
2144 env->exception_index = EXCP_PREFETCH_ABORT;
2145 } else {
2146 env->cp15.c5_data = ret;
2147 if (access_type == 1 && arm_feature(env, ARM_FEATURE_V6))
2148 env->cp15.c5_data |= (1 << 11);
2149 env->cp15.c6_data = address;
2150 env->exception_index = EXCP_DATA_ABORT;
2151 }
2152 return 1;
2153 }
qemu 的内存访问模拟,qemu 并没有将内存访问用 IR 表示,这样一方面会更加翻译难度,同时也占
用大量的 code_buffer。Qemu 使用了外调函数来实现这个功能。在对 target 翻译过程中,对于内存访问操
作会生成如下形式的中间码:
tmp = gen_ld16s(addr, IS_USER(s));
783 static inline TCGv gen_ld16s(TCGv addr, int index)
784 {
785 TCGv tmp = tcg_temp_new_i32();
786 tcg_gen_qemu_ld16s(tmp, addr, index);
787 return tmp;
788 }
接下来在 tcg 翻译将中间码翻译成 host 机器码时,会首先查询 tlb,如果命中就直接返回结果;否则就调用
以下 qemu_ld/st_helpers 中的函数进行 target 的虚拟地址和物理地址转换的工作。
928 #ifdef CONFIG_SOFTMMU
929
930 #include "../../softmmu_defs.h"
931
932 #ifdef CONFIG_TCG_PASS_AREG0
933 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
934 int mmu_idx) */
935 static const void * const qemu_ld_helpers[4] = {
936 helper_ldb_mmu,
937 helper_ldw_mmu,
938 helper_ldl_mmu,
939 helper_ldq_mmu,
940 };
941
942 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
943 uintxx_t val, int mmu_idx) */
944 static const void * const qemu_st_helpers[4] = {
945 helper_stb_mmu,
946 helper_stw_mmu,
947 helper_stl_mmu,
948 helper_stq_mmu,
949 };
950 #else
951 /* legacy helper signature: __ld_mmu(target_ulong addr, int
952 mmu_idx) */
953 static void *qemu_ld_helpers[4] = {
954 __ldb_mmu,
955 __ldw_mmu,
956 __ldl_mmu,
957 __ldq_mmu,
958 };
959
960 /* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
961 int mmu_idx) */
962 static void *qemu_st_helpers[4] = {
963 __stb_mmu,
964 __stw_mmu,
965 __stl_mmu,
966 __stq_mmu,
967 };
968 #endif
969 #endif
971 #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) 972 973 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) 974 { 975 int addr_reg, data_reg, data_reg2, bswap; 976 #ifdef CONFIG_SOFTMMU 977 int mem_index, s_bits; 978 # if TARGET_LONG_BITS == 64 979 int addr_reg2; 980 # endif 981 uint32_t *label_ptr; 982 #endif 983 984 #ifdef TARGET_WORDS_BIGENDIAN 985 bswap = 1; 986 #else 987 bswap = 0; 988 #endif 989 data_reg = *args++; 990 if (opc == 3) 991 data_reg2 = *args++; 992 else 993 data_reg2 = 0; /* suppress warning */ 994 addr_reg = *args++; 995 #ifdef CONFIG_SOFTMMU 996 # if TARGET_LONG_BITS == 64 997 addr_reg2 = *args++; 998 # endif 999 mem_index = *args; 1000 s_bits = opc & 3; 1001 1002 /* Should generate something like the following: 1003 * shr r8, addr_reg, #TARGET_PAGE_BITS 1004 * and r0, r8, #(CPU_TLB_SIZE - 1) @ Assumption: CPU_TLB_BITS <= 8 1005 * add r0, env, r0 lsl #CPU_TLB_ENTRY_BITS 1006 */ 1007 # if CPU_TLB_BITS > 8 1008 # error 1009 # endif 1010 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_R8, 1011 0, addr_reg, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); 1012 tcg_out_dat_imm(s, COND_AL, ARITH_AND, 1013 TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
上面的内存访问中将查找tlb部分直接翻译成了host指令,而对于target 的mmu转换则使用了外调相送的函数来实现的。这样对提升速度是有好处的。1014 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0, 1015 TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS)); 1016 /* In the 1017 * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))] 1018 * below, the offset is likely to exceed 12 bits if mem_index != 0 and 1019 * not exceed otherwise, so use an 1020 * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table) 1021 * before. 1022 */ 1023 if (mem_index) 1024 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0, 1025 (mem_index << (TLB_SHIFT & 1)) | 1026 ((16 - (TLB_SHIFT >> 1)) << 8)); 1027 tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0, 1028 offsetof(CPUArchState, tlb_table[0][0].addr_read)); 1029 tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1, 1030 TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); 1031 /* Check alignment. */ 1032 if (s_bits) 1033 tcg_out_dat_imm(s, COND_EQ, ARITH_TST, 1034 0, addr_reg, (1 << s_bits) - 1); 1035 # if TARGET_LONG_BITS == 64 1036 /* XXX: possibly we could use a block data load or writeback in 1037 * the first access. */ 1038 tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 1039 offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4); 1040 tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, 1041 TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0)); 1042 # endif 1043 tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 1044 offsetof(CPUArchState, tlb_table[0][0].addend)); 1045 1046 switch (opc) { 1047 case 0: 1048 tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); 1049 break; 1050 case 0 | 4: 1051 tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); 1052 break; 1053 case 1: 1054 tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); 1055 if (bswap) { 1056 tcg_out_bswap16(s, COND_EQ, data_reg, data_reg); 1057 } 1058 break; 1059 case 1 | 4: 1060 if (bswap) { 1061 tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); 1062 tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg); 1063 } else { 1064 tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); 1065 } 1066 break; 1067 case 2: 1068 default: 1069 tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); 1070 if (bswap) { 1071 tcg_out_bswap32(s, COND_EQ, data_reg, data_reg); 1072 } 1073 break; 1074 case 3: 1075 if (bswap) { 1076 tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg); 1077 tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4); 1078 tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2); 1079 tcg_out_bswap32(s, COND_EQ, data_reg, data_reg); 1080 } else { 1081 tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); 1082 tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); 1083 } 1084 break; 1085 } 1086 1087 label_ptr = (void *) s->code_ptr; 1088 tcg_out_b_noaddr(s, COND_EQ); 1089 1090 /* TODO: move this code to where the constants pool will be */ 1091 if (addr_reg != TCG_REG_R0) { 1092 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 1093 TCG_REG_R0, 0, addr_reg, SHIFT_IMM_LSL(0)); 1094 } 1095 # if TARGET_LONG_BITS == 32 1096 tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R1, 0, mem_index); 1097 # else 1098 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 1099 TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0)); 1100 tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index); 1101 # endif 1102 #ifdef CONFIG_TCG_PASS_AREG0 1103 /* XXX/FIXME: suboptimal and incorrect for 64 bit */ 1104 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 1105 tcg_target_call_iarg_regs[2], 0, 1106 tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0)); 1107 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 1108 tcg_target_call_iarg_regs[1], 0, 1109 tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0)); 1110 1111 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 1112 tcg_target_call_iarg_regs[0], 0, TCG_AREG0, 1113 SHIFT_IMM_LSL(0)); 1114 #endif 1115 tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]); 1116 1117 switch (opc) { 1118 case 0 | 4: 1119 tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0); 1120 break; 1121 case 1 | 4: 1122 tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0); 1123 break; 1124 case 0: 1125 case 1: 1126 case 2: 1127 default: 1128 if (data_reg != TCG_REG_R0) { 1129 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 1130 data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0)); 1131 } 1132 break; 1133 case 3: 1134 if (data_reg != TCG_REG_R0) { 1135 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 1136 data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0)); 1137 } 1138 if (data_reg2 != TCG_REG_R1) { 1139 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 1140 data_reg2, 0, TCG_REG_R1, SHIFT_IMM_LSL(0)); 1141 } 1142 break; 1143 } 1144 1145 reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); 1146 #else /* !CONFIG_SOFTMMU */ 1147 if (GUEST_BASE) { 1148 uint32_t offset = GUEST_BASE; 1149 int i; 1150 int rot; 1151 1152 while (offset) { 1153 i = ctz32(offset) & ~1; 1154 rot = ((32 - i) << 7) & 0xf00; 1155 1156 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R8, addr_reg, 1157 ((offset >> i) & 0xff) | rot); 1158 addr_reg = TCG_REG_R8; 1159 offset &= ~(0xff << i); 1160 } 1161 } 1162 switch (opc) { 1163 case 0: 1164 tcg_out_ld8_12(s, COND_AL, data_reg, addr_reg, 0); 1165 break; 1166 case 0 | 4: 1167 tcg_out_ld8s_8(s, COND_AL, data_reg, addr_reg, 0); 1168 break; 1169 case 1: 1170 tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0); 1171 if (bswap) { 1172 tcg_out_bswap16(s, COND_AL, data_reg, data_reg); 1173 } 1174 break; 1175 case 1 | 4: 1176 if (bswap) { 1177 tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0); 1178 tcg_out_bswap16s(s, COND_AL, data_reg, data_reg); 1179 } else { 1180 tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0); 1181 } 1182 break; 1183 case 2: 1184 default: 1185 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0); 1186 if (bswap) { 1187 tcg_out_bswap32(s, COND_AL, data_reg, data_reg); 1188 } 1189 break; 1190 case 3: 1191 /* TODO: use block load - 1192 * check that data_reg2 > data_reg or the other way */ 1193 if (data_reg == addr_reg) { 1194 tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4); 1195 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0); 1196 } else { 1197 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0); 1198 tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4); 1199 } 1200 if (bswap) { 1201 tcg_out_bswap32(s, COND_AL, data_reg, data_reg); 1202 tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2); 1203 } 1204 break; 1205 } 1206 #endif 1207 }