环境: linux kernel 5.10 arm64
/*
* The p*d_populate functions call virt_to_phys implicitly so they can't be used
* directly on kernel symbols (bm_p*d). This function is called too early to use
* lm_alias so __p*d_populate functions must be used to populate with the
* physical address from __pa_symbol.
*/
void __init early_fixmap_init(void)
{
pgd_t *pgdp;
p4d_t *p4dp, p4d;
pud_t *pudp;
pmd_t *pmdp;
unsigned long addr = FIXADDR_START;
//add by xiawei
unsigned long vmemmap_size = VMEMMAP_SIZE;
unsigned long page_offset = PAGE_OFFSET;
unsigned long modules_end = MODULES_END;
unsigned long bpf_jit_region_start = BPF_JIT_REGION_START;
unsigned long bpf_jit_region_end = BPF_JIT_REGION_END;
unsigned long modules_vaddr = MODULES_VADDR;
unsigned long vmemmap_start = VMEMMAP_START;
unsigned long vmemmap_end = VMEMMAP_END;
unsigned long pci_io_end = PCI_IO_END;
unsigned long pci_io_start = PCI_IO_START;
unsigned long fixaddr_top = FIXADDR_TOP;
unsigned long fixaddr_start = FIXADDR_START;
unsigned long fixaddr_size = FIXADDR_SIZE;
//add by xiawei print the result
int pgdt_index = (addr >> 39) & 511;
int pudt_index = (addr >> 30) & 511;
int pmdt_index = (addr >> 21) & 511;
int ptet_index = (addr >> 12) & 511;
unsigned long pgd_p, vpgd_p;
unsigned long pud_p, vpud_p;
unsigned long pmd_p, vpmd_p;
unsigned long pte_p;
pr_notice("addr is 0x%lx\n", addr); //add by xiawei addr is 0xfffffdfffe5f9000
pr_notice("VMEMMAP_SIZE :0x%lx, PAGE_OFFSET :0x%lx, KIMAGE_VADDR(MODULES_END):0x%lx, BPF_JIT_REGION_START:0x%lx,"
"BPF_JIT_REGION_END:0x%lx, MODULES_VADDR:0x%lx, VMEMMAP_START:0x%lx, VMEMMAP_END:0x%lx,"
"PCI_IO_END:0x%lx, PCI_IO_START:0x%lx, FIXADDR_TOP:0x%lx, FIXADDR_START:0x%lx, fixaddr_size:0x%lx\n", vmemmap_size, page_offset, modules_end,
bpf_jit_region_start, bpf_jit_region_end, modules_vaddr, vmemmap_start, vmemmap_end, pci_io_end,
pci_io_start, fixaddr_top, fixaddr_start, fixaddr_size);
pgdp = pgd_offset_k(addr);
p4dp = p4d_offset(pgdp, addr);
p4d = READ_ONCE(*p4dp);
pr_notice("swapper_pg_dir is %llx, init_mm.pgd :%llx, pgdp:%llx, p4dp:%llx, p4d:%llx\n", (u64)swapper_pg_dir, (u64)init_mm.pgd, (u64)pgdp, (u64)p4dp, (u64)p4d.pgd.pgd); //add by xiawei
if (CONFIG_PGTABLE_LEVELS > 3 &&
!(p4d_none(p4d) || p4d_page_paddr(p4d) == __pa_symbol(bm_pud))) {
/*
* We only end up here if the kernel mapping and the fixmap
* share the top level pgd entry, which should only happen on
* 16k/4 levels configurations.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
pudp = pud_offset_kimg(p4dp, addr);
} else {
if (p4d_none(p4d)) {
pr_notice("p4d_none(p4d)\n");//add by xiawei
__p4d_populate(p4dp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
}
pudp = fixmap_pud(addr);
pr_notice("pudp:%llx, __pa_symbol(bm_pud):%llx, bm_pud:%llx, PAGE_END:%llx, kimage_voffset:%llx, &kimage_voffset:%llx\n",
(u64)pudp, __pa_symbol(bm_pud), (u64)bm_pud, (u64)PAGE_END, kimage_voffset, (u64)&kimage_voffset); //add by xiawei
pr_notice("vabits_actual :0x%llx, &vabits_actual :%llx\n", vabits_actual, (u64)&vabits_actual);//add by xiawei
}
if (pud_none(READ_ONCE(*pudp))) {
pr_notice("pud_none(READ_ONCE(*pudp))\n");//add by xiawei
__pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
}
pmdp = fixmap_pmd(addr);
__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
//add by xiawei
pgd_p = (unsigned long)(*((unsigned long*)init_pg_dir + pgdt_index)); //物理地址
pr_notice("addr is :%lx, pgdt_index(0x%x):%lx, __pa_symbol(bm_pud):%llx\n", addr, pgdt_index, pgd_p, __pa_symbol(bm_pud));
pgd_p = (pgd_p>>12) << 12;
vpgd_p = __phys_to_kimg(pgd_p);
pr_notice("vpgd_p is :%lx, bm_pud:%llx\n", vpgd_p, (u64)bm_pud);
pud_p = (unsigned long)(*((unsigned long*)vpgd_p + pudt_index));
pr_notice("addr is :%lx, pudt_index(0x%x):%lx, __pa_symbol(bm_pmd):%llx\n", addr, pudt_index, pud_p, __pa_symbol(bm_pmd));
pud_p = (pud_p>>12) << 12;
vpud_p = __phys_to_kimg(pud_p);
pr_notice("vpud_p is :%lx, bm_pmd:%llx\n", vpud_p, (u64)bm_pmd);
pmd_p = (unsigned long)(*((unsigned long*)vpud_p + pmdt_index));
pr_notice("addr is :%lx, pmdt_index(0x%x):%lx, __pa_symbol(bm_pte):%llx\n", addr, pmdt_index, pmd_p, __pa_symbol(bm_pte));
pmd_p = (pmd_p>>12) << 12;
vpmd_p = __phys_to_kimg(pmd_p);
pr_notice("vpmd_p is :%lx, bm_pte:%llx\n", vpmd_p, (u64)bm_pte);
pte_p = (unsigned long)(*((unsigned long*)vpmd_p + ptet_index)); //phy addr is 0, because is not malloced page
pr_notice("addr is :%lx, ptet_index(0x%x):%lx\n", addr, ptet_index, pte_p);
print_kimage_start();
/*
* The boot-ioremap range spans multiple pmds, for which
* we are not prepared:
*/
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
|| pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
pr_warn("pmdp %p != %p, %p\n",
pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
fix_to_virt(FIX_BTMAP_BEGIN));
pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
fix_to_virt(FIX_BTMAP_END));
pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
}
}
//test by xiawei
void print_kimage_start(void)
{
unsigned long addr = (unsigned long)_text;
int pgdt_index = (addr >> 39) & 511;
int pudt_index = (addr >> 30) & 511;
int pmdt_index = (addr >> 21) & 511;
unsigned long pgd_p, vpgd_p;
unsigned long pud_p, vpud_p;
unsigned long pmd_p, vpmd_p;
unsigned long page_offset;
unsigned long vir_text;
//int i;
//unsigned char *p;
pr_notice("_text result:\n");
pgd_p = (unsigned long)(*((unsigned long*)init_pg_dir + pgdt_index)); //物理地址
pr_notice("addr is :%lx, pgdt_index(0x%x):%lx\n", addr, pgdt_index, pgd_p);
pgd_p = (pgd_p>>12) << 12;
vpgd_p = __phys_to_kimg(pgd_p);
pr_notice("vpgd_p is :%lx\n", vpgd_p);
pud_p = (unsigned long)(*((unsigned long*)vpgd_p + pudt_index));
pr_notice("addr is :%lx, pudt_index(0x%x):%lx\n", addr, pudt_index, pud_p);
pud_p = (pud_p>>12) << 12;
vpud_p = __phys_to_kimg(pud_p);
pr_notice("vpud_p is :%lx\n", vpud_p);
//2M大小
pmd_p = (unsigned long)(*((unsigned long*)vpud_p + pmdt_index));
pr_notice("addr is :%lx, pmdt_index(0x%x):%lx\n", addr, pmdt_index, pmd_p);
pmd_p = (pmd_p>>12) << 12;
vpmd_p = __phys_to_kimg(pmd_p);
pr_notice("vpmd_p is :%lx\n", vpmd_p);
page_offset = addr & ((1<<21)-1);
vir_text = vpmd_p | page_offset;
pr_notice("virt _text is :%lx\n", vir_text);
pr_notice("_end result:\n");
addr = (unsigned long)_end;
pgdt_index = (addr >> 39) & 511;
pudt_index = (addr >> 30) & 511;
pmdt_index = (addr >> 21) & 511;
pgd_p = (unsigned long)(*((unsigned long*)init_pg_dir + pgdt_index)); //物理地址
pr_notice("addr is :%lx, pgdt_index(0x%x):%lx\n", addr, pgdt_index, pgd_p);
pgd_p = (pgd_p>>12) << 12;
vpgd_p = __phys_to_kimg(pgd_p);
pr_notice("vpgd_p is :%lx\n", vpgd_p);
pud_p = (unsigned long)(*((unsigned long*)vpgd_p + pudt_index));
pr_notice("addr is :%lx, pudt_index(0x%x):%lx\n", addr, pudt_index, pud_p);
pud_p = (pud_p>>12) << 12;
vpud_p = __phys_to_kimg(pud_p);
pr_notice("vpud_p is :%lx\n", vpud_p);
//2M大小
pmd_p = (unsigned long)(*((unsigned long*)vpud_p + pmdt_index));
pr_notice("addr is :%lx, pmdt_index(0x%x):%lx\n", addr, pmdt_index, pmd_p);
pmd_p = (pmd_p>>12) << 12;
vpmd_p = __phys_to_kimg(pmd_p);
pr_notice("vpmd_p is :%lx\n", vpmd_p);
page_offset = addr & ((1<<21)-1);
vir_text = vpmd_p | page_offset;
pr_notice("virt _end is :%lx\n", vir_text);
// p = (unsigned char *)vir_text;
// //print the content
// pr_notice("the _text 100 content is :\n");
// for (i=0; i<100; i++) {
// if (!(i%16) && !i) {
// pr_notice("\n");
// }
// pr_notice("%02x ", p[i]);
// }
// pr_notice("\n");
return;
}
打印结果
[ 0.692482] addr is 0xfffffdfffe5f9000
[ 0.692497] VMEMMAP_SIZE :0x20000000000, PAGE_OFFSET :0xffff000000000000, KIMAGE_VADDR(MODULES_END):0xffff800010000000, BPF_JIT_REGION_START:0xffff800000000000,BPF_JIT_REGION_END:0xffff800008000000, MODULES_VADDR:0xffff800008000000, VMEMMAP_START:0xfffffdffffe00000, VMEMMAP_END:0xffffffffffe00000,PCI_IO_END:0xfffffdffffc00000, PCI_IO_START:0xfffffdfffec00000, FIXADDR_TOP:0xfffffdfffea00000, FIXADDR_START:0xfffffdfffe5f9000, fixaddr_size:0x407000
[ 0.692502] swapper_pg_dir is ffff800011675000, init_mm.pgd :ffff800012129000, pgdp:ffff800012129fd8, p4dp:ffff800012129fd8, p4d:0
[ 0.692504] p4d_none(p4d)
[ 0.692510] pudp:ffff80001208bff8, __pa_symbol(bm_pud):248b000, bm_pud:ffff80001208b000, PAGE_END:ffff800000000000, kimage_voffset:ffff80000fc00000, &kimage_voffset:ffff8000115dd580
[ 0.692513] vabits_actual :0x30, &vabits_actual :ffff800012087810
[ 0.692515] pud_none(READ_ONCE(*pudp))
[ 0.692519] addr is :fffffdfffe5f9000, pgdt_index(0x1fb):248b003, __pa_symbol(bm_pud):248b000
[ 0.692522] vpgd_p is :ffff80001208b000, bm_pud:ffff80001208b000
[ 0.692526] addr is :fffffdfffe5f9000, pudt_index(0x1ff):248c003, __pa_symbol(bm_pmd):248c000
[ 0.692529] vpud_p is :ffff80001208c000, bm_pmd:ffff80001208c000
[ 0.692533] addr is :fffffdfffe5f9000, pmdt_index(0x1f2):248d003, __pa_symbol(bm_pte):248d000
[ 0.692536] vpmd_p is :ffff80001208d000, bm_pte:ffff80001208d000
[ 0.692539] addr is :fffffdfffe5f9000, ptet_index(0x1f9):0
[ 0.692542] _text result:
[ 0.692545] addr is :ffff800010000000, pgdt_index(0x100):252a003
[ 0.692548] vpgd_p is :ffff80001212a000
[ 0.692551] addr is :ffff800010000000, pudt_index(0x0):252b003
[ 0.692554] vpud_p is :ffff80001212b000
[ 0.692557] addr is :ffff800010000000, pmdt_index(0x80):400701
[ 0.692560] vpmd_p is :ffff800010000000
[ 0.692562] virt _text is :ffff800010000000
[ 0.692564] _end result:
[ 0.692567] addr is :ffff800012130000, pgdt_index(0x100):252a003
[ 0.692570] vpgd_p is :ffff80001212a000
[ 0.692573] addr is :ffff800012130000, pudt_index(0x0):252b003
[ 0.692575] vpud_p is :ffff80001212b000
[ 0.692578] addr is :ffff800012130000, pmdt_index(0x90):2400701
[ 0.692581] vpmd_p is :ffff800012000000
[ 0.692583] virt _end is :ffff800012130000
1.
SYM_FUNC_START_LOCAL(__create_page_tables)
//test by xiawei to print _text phy address
print_char x0, x1,x2,#0x3a // ':'
adr_l x1, _text
print_reg64 x1,x6
print_char x0, x1,x2,#0x3a // ':'
adr_l x1, swapper_pg_dir
print_reg64 x1,x6
说明_text物理地址为0x0000000000400000, swapper_pg_dir物理地址为0x0000000001A75000
2.
#define CONFIG_ARM64_VA_BITS 48
#define VA_BITS (CONFIG_ARM64_VA_BITS)
#define _PAGE_OFFSET(va) (-(UL(1) << (va)))
#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS))
因为0 = 0xffffffff ffffffff + 1,
所以PAGE_OFFSET = -(1ul<<48) = -(1ul<<48) + 0 = -(1ul<<48) + 0xffffffff ffffffff + 1
= 0xfffeffff ffffffff + 1 = 0xffff0000 00000000
验证代码如下:
#define VA_BITS (48)
#define _PAGE_OFFSET(va) (-(1UL << (va)))
#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS))
void main()
{
unsigned long aa = PAGE_OFFSET;
printf("aa:0x%lx\n", aa);
unsigned long bb = 1UL << 39;
printf("bb:0x%lx\n", bb);
unsigned long cc = (((((((-(((1UL)) << ((((48))) - 1))))) + (0x08000000))) + (0x08000000)));
printf("cc:0x%lx\n", cc);
unsigned int dd = -(1<<1);
printf("dd:0x%x\n", dd);
}
3.
vmlinux.lds.S中
在vmlinux.lds中KIMAGE_VADDR被扩展为
即KIMAGE_VADDR = (((((((-(((1)) << ((((48))) - 1))))) + (0x08000000))) + (0x08000000)))
= -(1 << (48 - 1)) + (0x08000000) + (0x08000000)
= -(1<<47) + 0 + 0x10000000
= -(1<<47) + 0xffffffff ffffffff + 1 + 0x10000000
= 0xffff7fff ffffffff + 1 + 0x10000000
= 0xffff8000 00000000 + 0x10000000
= 0xffff8000 10000000
对应System.map中的_text虚拟地址
同时在arch/arm64/include/asm/memory.h中KIMAGE_VADDR定义
#define KIMAGE_VADDR (MODULES_END)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VSIZE (SZ_128M) //即2^27 = 1<<27 = 0x8000000
#define MODULES_VADDR (BPF_JIT_REGION_END)
#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
#define BPF_JIT_REGION_SIZE (SZ_128M) //即0x8000000
#define BPF_JIT_REGION_START (KASAN_SHADOW_END)
#define KASAN_SHADOW_END (_PAGE_END(VA_BITS_MIN))
分析KASAN_SHADOW_END
#define VA_BITS_MIN (VA_BITS) //即48
#define _PAGE_END(va) (-(UL(1) << ((va) - 1)))
即_PAGE_END(48) = (-(UL(1) << ((48) - 1))) = -(1<<47) = -(1<<47) + 0
= -(1<<47) + 0xffffffff ffffffff + 1
= 0xffff8000 00000000
所以回溯
#define KASAN_SHADOW_END 0xffff8000 00000000
#define BPF_JIT_REGION_START 0xffff8000 00000000
#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
= 0xffff8000 00000000 + 0x8000000 = 0xffff8000 08000000
#define MODULES_VADDR (BPF_JIT_REGION_END)
= 0xffff8000 08000000
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
= 0xffff8000 08000000 + 0x8000000
= 0xffff8000 10000000
#define KIMAGE_VADDR (MODULES_END)
= 0xffff8000 10000000
4.
orangepi@orangepi5plus:~$ dmesg
[ 0.693878] addr is 0xfffffdfffe5f9000
[ 0.693894] VMEMMAP_SIZE :0x20000000000, PAGE_OFFSET :0xffff000000000000, KIMAGE_VADDR(MODULES_END):0xffff800010000000, BPF_JIT_REGION_START:0xffff800000000000,BPF_JIT_REGION_END:0xffff800008000000, MODULES_VADDR:0xffff800008000000, VMEMMAP_START:0xfffffdffffe00000, VMEMMAP_END:0xffffffffffe00000,PCI_IO_END:0xfffffdffffc00000, PCI_IO_START:0xfffffdfffec00000, FIXADDR_TOP:0xfffffdfffea00000, FIXADDR_START:0xfffffdfffe5f9000, fixaddr_size:0x407000
(注:没有使能CONFIG_KASAN_GENERIC,所以没有kasan shadow region)
即
bpf jit region 0xffff8000 00000000 -- 0xffff8000 08000000-1
modules 0xffff8000 08000000 -- 0xffff8000 10000000-1
kimage 0xffff8000 10000000
fixed mappings 0xfffffdfffe5f9000 -- 0xfffffdfffea00000-1
PCI I/O space 0xfffffdfffec00000 -- 0xfffffdffffc00000-1
vmemmap 0xfffffdffffe00000 -- 0xffffffffffe00000-1
5.
参考Documentation\arm64\memory.rst (注含kasan shadow region)
AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
Start End Size Use
-----------------------------------------------------------------------
0000000000000000 0000ffffffffffff 256TB user
ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
ffff800000000000 ffff9fffffffffff 32TB kasan shadow region
ffffa00000000000 ffffa00007ffffff 128MB bpf jit region
ffffa00008000000 ffffa0000fffffff 128MB modules
ffffa00010000000 fffffdffbffeffff ~93TB vmalloc
fffffdffbfff0000 fffffdfffe5f8fff ~998MB [guard region]
fffffdfffe5f9000 fffffdfffe9fffff 4124KB fixed mappings
fffffdfffea00000 fffffdfffebfffff 2MB [guard region]
fffffdfffec00000 fffffdffffbfffff 16MB PCI I/O space
fffffdffffc00000 fffffdffffdfffff 2MB [guard region]
fffffdffffe00000 ffffffffffdfffff 2TB vmemmap
ffffffffffe00000 ffffffffffffffff 2MB [guard region]
6.
early_fixmap_init函数
unsigned long addr = FIXADDR_START;
pgdp = pgd_offset_k(addr);
p4dp = p4d_offset(pgdp, addr);
p4d = READ_ONCE(*p4dp);
/*
* a shortcut which implies the use of the kernel's pgd, instead
* of a process's
*/
#ifndef pgd_offset_k
#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
#endif
/*
* a shortcut to get a pgd_t in a given mm
*/
#ifndef pgd_offset
#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
#endi
struct mm_struct init_mm = {
.mm_rb = RB_ROOT,
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
.mm_rb_lock = __RW_LOCK_UNLOCKED(init_mm.mm_rb_lock),
#endif
.pgd = swapper_pg_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
.write_protect_seq = SEQCNT_ZERO(init_mm.write_protect_seq),
MMAP_LOCK_INITIALIZER(init_mm)
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
.cpu_bitmap = CPU_BITS_NONE,
INIT_MM_CONTEXT(init_mm)
};
#define INIT_MM_CONTEXT(name) \
.pgd = init_pg_dir,
所以init_mm.pgd被重载为init_pg_dir, 查看System.map中init_pg_dir地址
init_pg_dir对应的物理内存处存储L0页表,存储kernel image虚拟地址到物理地址的线性映射关系。kernel image 起始地址ffff800010000000,物理地址0x0000000000400000,见代码:
/*
* Map the kernel image (starting with PHYS_OFFSET).
*/
adrp x0, init_pg_dir
mov_q x5, KIMAGE_VADDR // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
mov x4, PTRS_PER_PGD
adrp x6, _end // runtime __pa(_end)
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
add x6, x6, x5 // runtime __va(_end)
map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
加打印
pr_notice("swapper_pg_dir is %llx, init_mm.pgd :%llx, pgdp:%llx, p4dp:%llx,
p4d:%llx\n", (u64)swapper_pg_dir, (u64)init_mm.pgd, (u64)pgdp, (u64)p4dp,
(u64)p4d.pgd.pgd);
后结果 : addr is 0xfffffdfffe5f9000
swapper_pg_dir is ffff800011675000, init_mm.pgd :ffff800012129000, pgdp:ffff800012129fd8, p4dp:ffff800012129fd8, p4d:248b003
分析:
#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address)
{
return (pgd + pgd_index(address)); //init_mm.pgd + pgd_index(address)
};
#define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define CONFIG_PGTABLE_LEVELS 4
#define ARM64_HW_PGTABLE_LEVEL_SHIFT(0) ((PAGE_SHIFT - 3) * (4 - (0)) + 3)
#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT //12
ARM64_HW_PGTABLE_LEVEL_SHIFT(0) = ((12 - 3) * (4 - (0)) + 3)
= 9*4 +3 = 39
PGDIR_SHIFT = 39
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
= (1<<(48-39))
= 1<<9 = 512
pgd_index(a) = (((a) >> 39) & (512 - 1))
Translation table lookup with 4KB pages::
+--------+--------+--------+--------+--------+--------+--------+--------+
|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
+--------+--------+--------+--------+--------+--------+--------+--------+
| | | | | |
| | | | | v
| | | | | [11:0] in-page offset
| | | | +-> [20:12] L3 index
| | | +-----------> [29:21] L2 index
| | +---------------------> [38:30] L1 index
| +-------------------------------> [47:39] L0 index
+-------------------------------------------------> [63] TTBR0/1
pgd_offset_k(0xfffffdfffe5f9000) = init_mm.pgd + pgd_index(0xfffffdfffe5f9000)
= (pgd_t *)ffff800012129000 + (0xfffffdfffe5f9000 >> 39) & (512 - 1)
= ffff800012129000 + sizeof((pgd_t *)) * ((0xfffffdfffe5f9000 >> 39) & (512 - 1))
= ffff800012129000 + 8*(0x1FFFFFB & (512-1))
= ffff800012129000 + 8 * 0x1fb //L0(pgd)表项索引为0x1fb
= ffff800012129000 + 0xfd8
= ffff800012129fd8
所以pgdp = pgd_offset_k(addr) = 0xffff800012129fd8
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
return (p4d_t *)pgd;
}
所以p4dp = p4d_offset(pgdp, addr) = (p4d_t *)pgdp = 0xffff800012129fd8; //没有p4d表,将p4d表地址等同于pgd表
p4d = READ_ONCE(*p4dp)=0x248b003 //L0表中index为0x1fb表项的内容,对应L1表的物理地址为0x248b000,(3表示表项的属性),那么对应哪块内存呢?
因为_text物理地址为0x0000000000400000, 虚拟地址为ffff800010000000
那么 物理地址 0x248b000, 对应的虚拟地址是0x248b000 - 0x400000 + ffff800010000000
= 0xffff80001208B000
查找System.map
即对应变量bm_pud
else {
if (p4d_none(p4d))
__p4d_populate(p4dp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
pudp = fixmap_pud(addr);
}
添加打印
pr_notice("pudp:%llx, __pa_symbol(bm_pud):%llx, bm_pud:%llx, PAGE_END:%llx, kimage_voffset:%llx\n",
(u64)pudp, __pa_symbol(bm_pud), (u64)bm_pud, (u64)PAGE_END, kimage_voffset);
打印结果
[ 0.692457] pudp:ffff80001208bff8, __pa_symbol(bm_pud):248b000, bm_pud:ffff80001208b000, PAGE_END:ffff800000000000, kimage_voffset:ffff80000fc00000
可知bm_pud的虚拟地址和物理地址和上面一样。
插入一个遗漏的代码,需要满足
p4d_page_paddr(p4d) == __pa_symbol(bm_pud)
static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
{
return __p4d_to_phys(p4d);
}
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
分析
#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
#define pte_val(x) ((x).pte)
#define PTE_ADDR_MASK PTE_ADDR_LOW
#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
#define _AT(T,X) ((T)(X))
PAGE_SHIFT为12
static inline pte_t p4d_pte(p4d_t p4d)
{
return __pte(p4d_val(p4d));
}
#define __pte(x) ((pte_t) { (x) } )
#define p4d_val(x) (pgd_val((x).pgd))
p4d_page_paddr(p4d) = __p4d_to_phys(p4d) = __pte_to_phys(p4d_pte(p4d))
= (pte_val(p4d_pte(p4d)) & PTE_ADDR_MASK)
= p4d_pte(p4d).pte & ((1 << (48 - 12) - 1) << 12)
= __pte(p4d_val(p4d)) & ((1 << (36) - 1) << 12)
= p4d_val(p4d) & ((1 << (36) - 1) << 12)
= p4d.pgd & ((1 << (36) - 1) << 12)
= 0x248b003 & ((1 << 36 - 1) << 12)
注:((1 << 36 - 1) << 12) 表示bit[48:12]全为1,bit[11:0]为0。0x248b003 相与表示低12位清0,并取bit[48:12]
= 0x248b000
接着分析
__pa_symbol(bm_pud)
#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
#define __pa(x) __virt_to_phys((unsigned long)(x))
#define __virt_to_phys(x) __virt_to_phys_nodebug(x)
#define __virt_to_phys_nodebug(x) ({ \
phys_addr_t __x = (phys_addr_t)(__tag_reset(x)); \
__is_lm_address(__x) ? __lm_to_phys(__x) : __kimg_to_phys(__x); \
})
#define __tag_reset(addr) (addr)
/*
* Check whether an arbitrary address is within the linear map, which
* lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the
* kernel's TTBR1 address range.
*/
#define __is_lm_address(addr) (((u64)(addr) ^ PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET))
#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
#define __kimg_to_phys(addr) ((addr) - kimage_voffset)
#define RELOC_HIDE(ptr, off) \
({ \
unsigned long __ptr; \
__asm__ ("" : "=r"(__ptr) : "0"(ptr)); \
(typeof(ptr)) (__ptr + (off)); \
})
即 (typeof(ptr)) (__ptr + (off));
代入参数
__pa_symbol(bm_pud) = __pa(RELOC_HIDE((unsigned long)(bm_pud), 0))
= __virt_to_phys((unsigned long)(bm_pud))
= __virt_to_phys_nodebug((phys_addr_t)(bm_pud))
__is_lm_address(bm_pud) = (((u64)(bm_pud) ^ PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET))
= (((u64)(bm_pud) ^ 0xffff000000000000) < (ffff800000000000- 0xffff000000000000))
= (ffff80001208b000 ^ 0xffff000000000000) < 0x0000800000000000
= 0x000080001208b000 < 0x0000800000000000 ?
= 为假 (因为0x000080001208b000比0x0000800000000000大)
(注1: lm_address指 区间:ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map即[ffff0000 00000000, ffff8000 00000000) 其中ffff0000 00000000表示PAGE_OFFSET,
ffff8000 00000000表示PAGE_END。
注2: kernel image镜像起始地址为ffff8000 10000000,所以整个kernel image中的符号都不是lm_address 参考 arm64架构linux内核地址转换__pa(x)与__va(x)分析 - 温暖的电波 - 博客园 (cnblogs.com)
注3: PAGE_END数值计算过程
extern u64 vabits_actual;
#define PAGE_END (_PAGE_END(vabits_actual))
#define _PAGE_END(vabits_actual) (-(UL(1) << ((vabits_actual) - 1)))
= -(UL(1) << (48- 1))
= -(1<<47) + 0
= 0xffffffff ffffffff + 1 - 1<<47
= 0xffffffff ffffffff + 1 - 0x00008000 00000000
= 0xffff7fff ffffffff + 1
= 0xffff8000 00000000
) //注结束
因为__is_lm_address(bm_pud)为假,所以执行__kimg_to_phys(bm_pud)
__kimg_to_phys(bm_pud) = ((bm_pud) - kimage_voffset)
注:由打印可知kimage_voffset为ffff80000fc00000 (_text虚拟地址 - _text物理地址)
pr_notice("pudp:%llx, __pa_symbol(bm_pud):%llx, bm_pud:%llx, PAGE_END:%llx, kimage_voffset:%llx, &kimage_voffset:%llx\n",
(u64)pudp, __pa_symbol(bm_pud), (u64)bm_pud, (u64)PAGE_END, kimage_voffset, (u64)&kimage_voffset);
pr_notice("vabits_actual :0x%llx, &vabits_actual :%llx\n", vabits_actual, (u64)&vabits_actual);
[ 0.690824] pudp:ffff80001208bff8, __pa_symbol(bm_pud):248b000,
bm_pud:ffff80001208b000, PAGE_END:ffff800000000000,
kimage_voffset:ffff80000fc00000, &kimage_voffset:ffff8000115dd300
[ 0.690827] vabits_actual :0x30, &vabits_actual :ffff800012087810
= ffff80001208b000 - ffff80000fc00000
= 248b000
另一个方法计算bm_pud的物理地址
因为_text物理地址为0x0000000000400000, 虚拟地址为ffff800010000000
那么 bm_pud虚拟地址为ffff80001208b000,
则物理地址 = ffff80001208b000 - ffff800010000000 + 0x0000000000400000
= 208b000 + 0x0000000000400000
= 248b000
注:ffff800010000000 - 0x0000000000400000 即kimage_voffset