kernel : linux 4.4
参考文章: 内存初始化代码分析(一):identity mapping和kernel image mapping (wowotech.net)
先给出映射图:
代码在head.S中__create_page_tables:
create_pgd_entry x0, x3, x5, x6
mov x5, x3 // __pa(__idmap_text_start)
adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
create_block_map x0, x7, x3, x5, x6
此时的
x0:idmap_pg_dir 0x1462000
x3:__pa(__idmap_text_start) //0xC17000
x5:0x28
x6:swapper_pg_dir+#SWAPPER_DIR_SIZE=0x1467000
寄存器内容可以参考我之前文章在arm64 head.S中用汇编实现打印64位寄存器和字符的宏,打印出来。
create_pgd_entry代码如下,
/*
* Macro to populate the PGD (and possibily PUD) for the corresponding
* block entry in the next level (tbl) for the given virtual address.
*
* Preserves: tbl, next, virt
* Corrupts: tmp1, tmp2
*/
.macro create_pgd_entry, tbl, virt, tmp1, tmp2
create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
#if SWAPPER_PGTABLE_LEVELS > 3
create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2
#endif
#if SWAPPER_PGTABLE_LEVELS > 2
create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
#endif
.endm
SWAPPER_PGTABLE_LEVELS为2,所以两个if分支都不执行。只执行create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
这里是在level 1 table(也叫PUD)中填充某个entry。
create_table_entry代码如下:
/*
* Macro to create a table entry to the next page.
*
* tbl: page table address
* virt: virtual address
* shift: #imm page table shift
* ptrs: #imm pointers per table page
*
* Preserves: virt
* Corrupts: tmp1, tmp2
* Returns: tbl -> next level table page address
*/
.macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
lsr \tmp1, \virt, #\shift
and \tmp1, \tmp1, #\ptrs - 1 // table index
add \tmp2, \tbl, #PAGE_SIZE
orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
str \tmp2, [\tbl, \tmp1, lsl #3]
add \tbl, \tbl, #PAGE_SIZE // next level table page
.endm
代入实参,代码如下
lsr x5, x3, #30 //x5=x3>>30 x3即下图的VA 虚拟地址, VA bits[38:30]是PUD表(也叫level 1表)中的index, x5=0
and x5, x5, #512 - 1 // table index x5=x5&511=x5&0x1ff 总共512个index
add x6, x0, #4096 //x6=x0+4096, x0是实际的物理地址,也是页表地址, x6是x0的下一个页 0x1463000
orr x6, x6, #(3<<0) // address of next table and entry type //x6=x6|(3<<0) table entry
str x6, [x0, x5, lsl #3] //[x0+x5*(1<<3)] = x6, (0x1462000+0*8)地址内容0x1463000
add x0, x0, #4096 // next level table page //x0=x0+4096, x0=0x1462000+4096=0x1463000
虚拟地址的某几位对应某表index的关系如下
根据虚拟地址(VA)的bits[38:30]算出level 1 table中的index
然后按下面的table描述定义,用物理地址bits[47:12]填充该index对应地址内容(是一个table描述符)bits[47:12],bit[1:0]为3表示是table描述符
即level 1 table地址0x1462000,其index 0, 表项内容为0x1463000
伪代码如下
.macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
伪代码:
{
tmp1 = virt >> shift
tmp1 = tmp1 & (ptrs-1)
tmp2 = tbl + 4096
tmp2 = tmp2 | #PMD_TYPE_TABLE
char *p = tbl + tmp1*8 //8表示表中每项占据64位
*p = tmp2
tbl += 4096
}
这里的tbl是0x1462000,是level1 table,
shifit是30
虚拟地址virt是0xC17000,表示__pa(__idmap_text_start),查看vmlinux.lds中
. = ALIGN(0x00001000); __idmap_text_start = .; *(.idmap.text) __idmap_text_end = .;
表示段.idmap.text的首地址, 查看system.map该段函数有
其bits[38:30]表示level 1 table中的index,值为0,每个表项占据8字节
PMD_TYPE_TABLE是3表示每个表项table 描述符(2 表示block 描述符),table entry格式如下
这里页大小是4KB,因此m是12
即 需要将level 2 table地址的[47:12], 填入上述table描述符的bits[47:12]
level 1 table地址0x1462000, index 0,表项内容0x1463003
create_table_entry创建完level 1 table的第0表项后,继续填充level 2 table。level 2 table每个表项是block 描述符,每个block地址是2M对齐,
mov x5, x3 // __pa(__idmap_text_start)
adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
create_block_map x0, x7, x3, x5, x6
此时参数
x0=0x1463000
x3:__pa(__idmap_text_start) //0xC17000 .idmap.text代码段的地址
x5:x3
x6:0xC17448
x7:SWAPPER_MM_MMUFLAGS 0xf11
create_block_map代码如下
/*
* Macro to populate block entries in the page table for the start..end
* virtual range (inclusive).
*
* Preserves: tbl, flags
* Corrupts: phys, start, end, pstate
*/
.macro create_block_map, tbl, flags, phys, start, end
lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT
lsr \start, \start, #SWAPPER_BLOCK_SHIFT
and \start, \start, #PTRS_PER_PTE - 1 // table index
orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry
lsr \end, \end, #SWAPPER_BLOCK_SHIFT
and \end, \end, #PTRS_PER_PTE - 1 // table end index
9999: str \phys, [\tbl, \start, lsl #3] // store the entry
add \start, \start, #1 // next entry
add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block
cmp \start, \end
b.ls 9999b
.endm
SWAPPER_BLOCK_SHIFT:0x15 =21 表示VA bits[29:21]
SWAPPER_BLOCK_SIZE:0x200000 2M大小
PTRS_PER_PTE:0x200 = 512
代入实际参数
lsr x3, x3, #21 //x3=x3>>21 得到x3的bit[63:21], 最重要是获得Output address的bit[47:21], 值为6
lsr x5, x5, #21 //获得VA bits[63:21]
and x5, x5, #512 - 1 // table index //获得VA bits[29:21]即level2 table index, 起始index 值为6
orr x3, x7, x3, lsl #21 // table entry x3=x7 | (x3<<21), x7是属性占据bit[11:0]=0xf11, level2 table index对应的block entry
lsr x6, x6, #21
and x6, x6, #512 - 1 // table end index level2 table 结束index 值也为6
9999: str x3, [x0, x5, lsl #3] // store the entry //x0是level2 table的地址 从x5开始到x6结束,填充每个block entry(descriptor)
add x5, x5, #1 // next entry
add x3, x3, #0x200000 // next block
cmp x5, x6
b.ls 9999b
伪代码如下:
.macro create_block_map, tbl, flags, phys, start, end
伪代码:
{
phys = phys >> 21
start = start >> 21
start = start & (512-1) //获得start的bits[29:21]
phys = flags | (phys<<21)
end = end >>21
end = end & (512-1) //获得end的bits[29:21]
int i;
char *p;
for (i=start; i<= end; i++) {
p = tbl + start*8;
*p = phys;
phys += 0x200000; //一个block有2M大小
}
}
block描述符
这里n取21,Output address即映射后的物理地址,所以应该将物理地址的bits[47:21],填入该描述符的bits[47:21], 其他字段意思如下
这里需要映射的物理地址是__idmap_text_start到__idmap_text_end,即物理地址0xC17000到0xC17448,作为物理地址需要算出bits[47:21]填充到level 2 table的某个表项中
因为是identity 映射,即物理地址和虚拟地址一样,所以0xC17000到0xC17448,也是虚拟地址,作为虚拟地址,需要算出level 2 table 的index
0xC17000的bits[47:21]是6,0xC177448的bits[47:21]是6,所以只需要一个表项6就够了
此时的level 2 table地址0x1463000,index 6的表项内容:bits[47:21]是0x6, 对应block地址0xC00000, bits[11:0]是0xf11
将level 1 table地址,level 1 table的0表项,level 2 table的第6表项打印出来,打印代码
create_pgd_entry x0, x3, x5, x6
mov x5, x3 // __pa(__idmap_text_start)
adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
create_block_map x0, x7, x3, x5, x6
print_char x8, x1,x2,#0x3a // ':'
print_reg64 x25,x8 //address of the level 1 table
ldr x9, [x25, #0] //content of the (levwl 1 table + index0)
print_char x10, x1,x2,#0x3a // ':'
print_reg64 x9,x10 //print the content of the (levwl 1 table + index0)
lsr x9, x9, #12 //level2 table address[47:12]
lsl x9,x9,#12 //level2 table address
ldr x10, [x9, #48] //content of the (level 2 table + index6)
print_char x11, x1,x2,#0x3a // ':'
print_reg64 x10,x11 //print the content of the (level 2 table + index0)
测试如下:
即
level 1 table地址0x1462000, 第一表项内容0x1463003,
获取level 2 table地址: 因为每个table占据一页,所以4K对齐后地址0x1463000
__idmap_text_start值是0xC17448, 其既是虚拟地址,作为虚拟地址时,其bits[29:21]=6, 即占据level 2 table第6个表项, 地址为0x1463000+6*8,
该地址对应的是block描述符
__idmap_text_start又是物理地址,需要将bits[47:21](值为0b110)存入描述符的bits[47:21]
打印出的block描述符为0xC00F11, bits[47:21]为0b110, 测试正确