分析代码
/*
* Create the identity mapping.
*/
adrp x0, idmap_pg_dir
adrp x3, __idmap_text_start // __pa(__idmap_text_start)
#ifdef CONFIG_ARM64_VA_BITS_52
mrs_s x6, SYS_ID_AA64MMFR2_EL1
and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
mov x5, #52
cbnz x6, 1f
#endif
mov x5, #VA_BITS_MIN
1:
adr_l x6, vabits_actual
str x5, [x6]
dmb sy
dc ivac, x6 // Invalidate potentially stale cache line
/*
* VA_BITS may be too small to allow for an ID mapping to be created
* that covers system RAM if that is located sufficiently high in the
* physical address space. So for the ID map, use an extended virtual
* range in that case, and configure an additional translation level
* if needed.
*
* Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
* entire ID map region can be mapped. As T0SZ == (64 - #bits used),
* this number conveniently equals the number of leading zeroes in
* the physical address of __idmap_text_end.
*/
adrp x5, __idmap_text_end
clz x5, x5
cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
b.ge 1f // .. then skip VA range extension
adr_l x6, idmap_t0sz
str x5, [x6]
dmb sy
dc ivac, x6 // Invalidate potentially stale cache line
#if (VA_BITS < 48)
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
/*
* If VA_BITS < 48, we have to configure an additional table level.
* First, we have to verify our assumption that the current value of
* VA_BITS was chosen such that all translation levels are fully
* utilised, and that lowering T0SZ will always result in an additional
* translation level to be configured.
*/
#if VA_BITS != EXTRA_SHIFT
#error "Mismatch between VA_BITS and page size/number of translation levels"
#endif
mov x4, EXTRA_PTRS
create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
#else
/*
* If VA_BITS == 48, we don't have to configure an additional
* translation level, but the top-level table has more entries.
*/
mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
str_l x4, idmap_ptrs_per_pgd, x5
#endif
1:
第1行到第3行的注释表明,开始建立页表的恒等映射
第4行,将 idmap_pg_dir 的物理地址存储在x0寄存器,idmap_pg_dir 在上一节讲过
_rodata = .;
idmap_pg_dir = .;
. += ((((((48)) - 4) / (12 - 3)) - 1) * (1 << 12));
idmap_pg_end = .;
第5行,将 __idmap_text_start 的物理地址赋值给x3寄存器, __idmap_text_start 的定义也在链接脚本里面
#define IDMAP_TEXT \
. = ALIGN(SZ_4K); \
__idmap_text_start = .; \
*(.idmap.text) \
__idmap_text_end = .;
由此可见,它是 .idmap.text 的起始地址,而 .idmap.text 则在 head.S 里面定义,其打头函数便是 el2_setup
.section ".idmap.text","awx"
/*
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
*
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
* booted in EL1 or EL2 respectively.
*/
SYM_FUNC_START(el2_setup)
第7行到第12行的执行依赖于条件编译 CONFIG_ARM64_VA_BITS_52,我的内核只使能了 CONFIG_ARM64_VA_BITS_48,所以我们跳过
cat config | grep CONFIG_ARM64_VA_BITS
# CONFIG_ARM64_VA_BITS_39 is not set
CONFIG_ARM64_VA_BITS_48=y
CONFIG_ARM64_VA_BITS=48
第13行将 VA_BITS_MIN 的值赋值给了x5,我们在内核源代码搜索一下 VA_BITS_MIN 的定义
#if VA_BITS > 48
#define VA_BITS_MIN (48)
#else
#define VA_BITS_MIN (VA_BITS)
#endif
VA_BITS_MIN 的值依赖于 VA_BITS
#define VA_BITS (CONFIG_ARM64_VA_BITS)
CONFIG_ARM64_VA_BITS 的值我们前面找到过,值为48,所以 VA_BITS 的值为48,VA_BITS_MIN 的值也为48,x5的值也为48
第15行将 vabits_actual 的地址赋值给x6,vabits_actual 是一个全局变量,看名称是虚拟地址的真实位数
u64 __section(".mmuoff.data.write") vabits_actual;
EXPORT_SYMBOL(vabits_actual);
adr_l 不是标准的arm64指令或者伪指令,在linux kernel的定义如下
/*
* @dst: destination register (64 bit wide)
* @sym: name of the symbol
*/
.macro adr_l, dst, sym
adrp \dst, \sym
add \dst, \dst, :lo12:\sym
.endm
其实就是把一个值的物理地址存放在某个寄存器中
第16行,x5的值存放在 vabits_actual 里面,也就是说,虚拟地址的真实位宽位48
第20行到第31行的含义48bits可能无法覆盖真正物理地址大小的情况的处理,而现实是48位宽的物理内存很大了,我们不考虑不能覆盖的情况,简化分析
第32行表示,将 __idmap_text_end 的物理页地址(也就是高12bit地址)存放到x5寄存器里
第33行的含义是获取 __idmap_text_end 的高位地址前面的0的个数,并存在x5里
第34行涉及到一个宏定义 TCR_T0SZ
$ grep -rnw TCR_T0SZ arch/arm64/
arch/arm64/include/asm/pgtable-hwdef.h:182:#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
宏定义里面又涉及到另外一个宏定义 TCR_T0SZ_OFFSET
$ grep -rnw TCR_T0SZ_OFFSET arch/arm64/
arch/arm64/include/asm/pgtable-hwdef.h:180:#define TCR_T0SZ_OFFSET 0
所以 TCR_T0SZ(VA_BITS_MIN) 的值为 64-48==16
该行语句的含义是 __idmap_text_end 的物理地址前面的0的数目和16相比,哪个更多
第35行的含义是如果 __idmap_text_end 的物理地址前面的0的数目比16更多,换个说法,__idmap_text_end 的地址范围是否超过了48bit,如果没有,那么跳转到第67行,这也是符合现实预期的。