aarm64为打开MMU而进行的CPU初始化
ARM64的启动过程之(三):为打开MMU而进行的CPU初始化
cpu确定当系统VA_BITS和PAGE_SIZE 通过寄存器TCR
aarch64的TCR寄存器介绍_arm tcr 寄存器-CSDN博客
类型定义
arm64设置64K的页表,这种情况下PGTABLE_LEVELS = 3
//arch/arm64/include/asm/pgtable-types.h
typedef u64 pteval_t;
typedef u64 pmdval_t;
typedef u64 pudval_t;
typedef u64 p4dval_t;
typedef u64 pgdval_t;
typedef struct { pteval_t pte; } pte_t;
#if CONFIG_PGTABLE_LEVELS > 2 //大于2级页表才有pmd_t
typedef struct { pmdval_t pmd; } pmd_t;
#define pmd_val(x) ((x).pmd)
#endif
#if CONFIG_PGTABLE_LEVELS > 3 //大于3级页表才有pud_t
typedef struct { pudval_t pud; } pud_t;
#define pud_val(x) ((x).pud)
#endif
typedef struct { pgdval_t pgd; } pgd_t; //pgd_t总是存在
#define pgd_val(x) ((x).pgd)
#if CONFIG_PGTABLE_LEVELS == 2 //等于2级页表,则没有pmd,pud和p4d
#include <asm-generic/pgtable-nopmd.h>
#include <asm-generic/pgtable-nopud.h>
#define __PAGETABLE_PMD_FOLDED 1
typedef struct { pud_t pud; } pmd_t; //根据include关系:pmd_t 就是pud_t,pud_t又是p4d_t,p4d_t里面又是pgd_t
#define set_pud(pudptr, pudval) set_pmd((pmd_t *)(pudptr), (pmd_t) { pudval })
static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
{
return (pmd_t *)pud;
}
#elif CONFIG_PGTABLE_LEVELS == 3 //等于3级页表,则没有pud和p4d
#include <asm-generic/pgtable-nopud.h>
#include <asm-generic/pgtable-nop4d.h>
#define __PAGETABLE_PUD_FOLDED 1
typedef struct { p4d_t p4d; } pud_t;
#define pud_val(x) (p4d_val((x).p4d))
#define set_p4d(p4dptr, p4dval) set_pud((pud_t *)(p4dptr), (pud_t) { p4dval })
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
return (pud_t *)p4d;
}
#elif CONFIG_PGTABLE_LEVELS == 4 //等于4级页表,则没有p4d
#include <asm-generic/pgtable-nop4d.h>
#define __PAGETABLE_P4D_FOLDED 1
typedef struct { pgd_t pgd; } p4d_t;
#define p4d_val(x) (pgd_val((x).pgd))
#define set_pgd(pgdptr, pgdval) set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval })
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
return (p4d_t *)pgd;
}
#endif
头文件关系:
include/linux/pgtable.h
#include <asm/pgtable.h> //arch/arm64/include/asm/pgtable.h
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable-prot.h>
[root@localhost linux-5.10]# grep -R "pgtable-types.h" arch/arm64/include/
arch/arm64/include/asm/kasan.h:#include <asm/pgtable-types.h>
arch/arm64/include/asm/pgtable-prot.h:#include <asm/pgtable-types.h>
arch/arm64/include/asm/page.h:#include <asm/pgtable-types.h>
arch/arm64/include/asm/mte.h:#include <asm/pgtable-types.h>
内核头文件使用架构相关的arch/arm64/include/asm/pgtable-types.h来根据PGTABLE_LEVELS来包含include/asm-generic/pgtable-noXXX.h
//include/asm-generic/pgtable-nopud.h
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
return (pud_t *)p4d;
}
#define pud_offset pud_offset
如果架构没有定义pud_offset则使用通用的:
//include/linux/pgtable.h
#ifndef pud_offset
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
}
#define pud_offset pud_offset
#endif
只有三级levels时 各级PxD_SHIFT的大小:
include/asm-generic/pgtable-nop4d.h:#define P4D_SHIFT PGDIR_SHIFT
include/asm-generic/pgtable-nopud.h:#define PUD_SHIFT P4D_SHIFT
//arch/arm64/include/asm/pgtable-hwdef.h
#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))
/*
* PMD_SHIFT determines the size a level 2 page table entry can map.
*/
#if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PTRS_PER_PMD PTRS_PER_PTE
#endif
/*
* PUD_SHIFT determines the size a level 1 page table entry can map.
*/
#if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
#define PTRS_PER_PUD PTRS_PER_PTE
#endif
ARM64_HW_PGTABLE_LEVEL_SHIFT 宏的作用:
/*
* Size mapped by an entry at level n ( 0 <= n <= 3)
* We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
* in the final page. The maximum number of translation levels supported by
* the architecture is 4. Hence, starting at at level n, we have further
* ((4 - n) - 1) levels of translation excluding the offset within the page.
* So, the total number of bits mapped by an entry at level n is :
*
* ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
*
* Rearranging it a bit we get :
* (4 - n) * (PAGE_SHIFT - 3) + 3
*/
#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
该宏的注释翻译一下,大概的意思是:
在每一级“translation levels”中,也就是PGD, PUD, PMD 中,
都是使用 (PAGE_SHIFT - 3) 个bits作为map。且当前架构arm64 支持的最大的页表级数为4。
因此,除了PAGE_SHIFT,Ln 级索引(L0 索引即PGD, L1 索引即PUD,以此类推)的偏移还有
((4 - n) - 1) 个 (PAGE_SHIFT - 3) 。因此计算公式就是
((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
对照下图,4K页表L1-L3 索引所在的偏移量分别是
L3 索引所在的偏移量: PAGE_SHIFT = 12
L2 索引所在的偏移量:1 * (PAGE_SHIFT-3) + PAGE_SHIFT = 21
L1 索引所在的偏移量:2 * (PAGE_SHIFT-3) + PAGE_SHIFT = 30
L0 索引所在的偏移量:3 * (PAGE_SHIFT-3) + PAGE_SHIFT = 39
验证一下,Ln 索引对应的偏移为 ARM64_HW_PGTABLE_LEVEL_SHIFT(n),计算结果如下
/*
* PGDIR_SHIFT = ARM64_HW_PGTABLE_LEVEL_SHIFT(0) = 39
* PUD_SHIFT = ARM64_HW_PGTABLE_LEVEL_SHIFT(1) = 30
* PMD_SHIFT = ARM64_HW_PGTABLE_LEVEL_SHIFT(2) = 21
*/
如果是64K:只有3级页表
/*
* PGDIR_SHIFT = ARM64_HW_PGTABLE_LEVEL_SHIFT(0) = 42
* PMD_SHIFT = ARM64_HW_PGTABLE_LEVEL_SHIFT(2) = 29
*/
宏:
static void show_macros(void)
{
printk("PGDIR_SHIFT = %d\n", PGDIR_SHIFT); //page shift所能映射区域大小的对数
printk("P4D_SHIFT = %d\n", P4D_SHIFT);
printk("PUD_SHIFT = %d\n", PUD_SHIFT);
printk("PMD_SHIFT = %d\n", PMD_SHIFT);
printk("PAGE_SHIFT = %d\n", PAGE_SHIFT);
printk("PTRS_PER_PGD = %d\n", PTRS_PER_PGD);
printk("PTRS_PER_P4D = %d\n", PTRS_PER_P4D);
printk("PTRS_PER_PUD = %d\n", PTRS_PER_PUD);
printk("PTRS_PER_PMD = %d\n", PTRS_PER_PMD);
printk("PTRS_PER_PTE = %d\n", PTRS_PER_PTE);
printk("PAGE_MASK = 0x%lx\n", PAGE_MASK);
}
pmd_alloc:
include/linux/mm.h
#if defined(CONFIG_MMU)
static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
unsigned long address)
{
return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
NULL : p4d_offset(pgd, address);
}
static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
unsigned long address)
{
return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
NULL : pud_offset(p4d, address);
}
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? //申请一页物理内存
NULL: pmd_offset(pud, address); //返回pmd表中的某项的地址
}
#endif /* CONFIG_MMU */
pte_alloc不需要返回pte_t:
#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
__pmd_alloc分配pmd表物理页:
分析:
#ifndef __PAGETABLE_PMD_FOLDED //注意宏。非no-pmd.h
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
spinlock_t *ptl;
pmd_t *new = pmd_alloc_one(mm, address); //申请一页物理内存,并将地址强制转成pmt_t类型。
if (!new)
return -ENOMEM;
smp_wmb(); /* See comment in __pte_alloc */
ptl = pud_lock(mm, pud);
if (!pud_present(*pud)) {
mm_inc_nr_pmds(mm);
pud_populate(mm, pud, new); //填充pud表的某个表项:设置pud_t中的pud的数值为new的值。填充地址。此时pud_t->pud为
} else /* Another has populated it */
pmd_free(mm, new);
spin_unlock(ptl);
return 0;
}
#endif /* __PAGETABLE_PMD_FOLDED */
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
struct page *page;
gfp_t gfp = GFP_PGTABLE_USER;
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
page = alloc_pages(gfp, 0);
if (!page)
return NULL;
if (!pgtable_pmd_page_ctor(page)) {
__free_pages(page, 0);
return NULL;
}
return (pmd_t *)page_address(page);
}
pud_populate设置pud表项
//arch/arm64/include/asm/pgalloc.h
static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); //pmd表的地址|表属性prot。最终构造一个pud_t值,填充到pudp指向的位置。
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
__pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE); //pmdp转成物理地址,就是pmd表的物理地址
}
//include/linux/pgtable.h
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
}
static inline unsigned long pmd_index(unsigned long address)
{
return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}
//arch/arm64/include/asm/pgtable.h
static inline phys_addr_t pud_page_paddr(pud_t pud)
{
return __pud_to_phys(pud); //获取pud->pud 里面是指向pmd的页表的物理地址
}
static inline unsigned long pud_page_vaddr(pud_t pud)
{
return (unsigned long)__va(pud_page_paddr(pud)); //将pud->pud转换成虚拟地址
}
流程图:
分析__handle_mm_fault也错误处理页表
//mm/memory.c
//arm64 /64K /levels=3
static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
struct vm_fault vmf = {
.vma = vma,
.address = address & PAGE_MASK,
.flags = flags,
.pgoff = linear_page_index(vma, address),
.gfp_mask = __get_fault_gfp_mask(vma),
};
unsigned int dirty = flags & FAULT_FLAG_WRITE;
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
p4d_t *p4d;
vm_fault_t ret;
pgd = pgd_offset(mm, address); //#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
//pgd_offset 都是有的include/linux/pgtable.h
p4d = p4d_alloc(mm, pgd, address); //此时p4d返回的就是pgd
if (!p4d)
return VM_FAULT_OOM;
vmf.pud = pud_alloc(mm, p4d, address); //pud也是不存在,返回p4d=pgd
if (!vmf.pud)
return VM_FAULT_OOM;
retry_pud:
if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) {
ret = create_huge_pud(&vmf);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
} else {
pud_t orig_pud = *vmf.pud;
barrier();
if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) {
/* NUMA case for anonymous PUDs would go here */
if (dirty && !pud_write(orig_pud)) {
ret = wp_huge_pud(&vmf, orig_pud);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
} else {
huge_pud_set_accessed(&vmf, orig_pud);
return 0;
}
}
}
vmf.pmd = pmd_alloc(mm, vmf.pud, address); //pmd_alloc存在,这里分配一页,且 __pmd_alloc->pud_populate会设置将vmf.pud设置为address指向的pmd页表地址
//到这里是,最终将pgd_t表项的内容,设置为pmd页表了。但是流程上依然像pgd,p4d,pud都有
....
return handle_pte_fault(&vmf); //后续设置页表流程。。。
}
====================================================================
pgd_t * pgd;
p4d_t * p4d;
pud_t * pud;
pmd_t * pmd;
pgd = pgd_offset(mm,address) //根据address,获取pgd_t
p4d = p4d_alloc(mm,pgd,address)
如果p4d存在(5level),则分配物理页,并pgd_populate,返回address对应的p4d_t表项
否则返回pgd
pud = pud_alloc(mm,p4d,address)
如果pud存在(4level),则分配物理页,并p4d_populate,返回address对应的pud_t表项
否则返回p4d = pgd
pmd = pmd_alloc(mm,pud,address)
如果pmd存在(3level),则分配物理页,并pud_populate,返回address对应的pmd_t表项
否则返回pud = p4d = pgd
handle_pte_fault
do_anonymous_page:
分配pte
#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
//pmd_none一般是存在,level3下实际上就是pgd_t
//__pte_alloc
// pte_alloc_one //分配pte页表
// pmd_populate //设置pmd表项为pte
pte_t * pte ;
pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);
->pte_offset_map(dir, address)
pte_offset_kernel((dir), (address))
return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
p4d_alloc分析:
如果levels=3
nopud/nop4d/
p4d_none 总返回 0 ;认为p4d不为空,存在
pgd_none 总返回 0 ;认为pgd不为空,存在
static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
unsigned long address)
{
return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ? //__p4d_alloc 返回0
NULL : p4d_offset(pgd, address); //函数最终返回p4d_offset
//也就是说如果levels=3 p4d不存在的时候总是返回pgd,但不会分配数据
}
//include/linux/mm.h
#ifdef __PAGETABLE_P4D_FOLDED
static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
unsigned long address)
{
return 0;
}
#else
int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
#endif
//mm/memory.c
#ifndef __PAGETABLE_P4D_FOLDED
/*
* Allocate p4d page table.
* We've already handled the fast-path in-line.
*/
int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
p4d_t *new = p4d_alloc_one(mm, address); //只有x86有,否则include/asm-generic/pgtable-nop4d.h
if (!new) //#define p4d_alloc_one(mm, address) NULL
return -ENOMEM;
smp_wmb(); /* See comment in __pte_alloc */
spin_lock(&mm->page_table_lock);
if (pgd_present(*pgd)) /* Another has populated it */
p4d_free(mm, new);
else
pgd_populate(mm, pgd, new); //设置pgd表项为p4d
spin_unlock(&mm->page_table_lock);
return 0;
}
#endif /* __PAGETABLE_P4D_FOLDED */
其它pud、pmd页表也类似
static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
unsigned long address)
{
return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
NULL : pud_offset(p4d, address);
}
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
NULL: pmd_offset(pud, address);
}
缺页过程的页表创建设置流程.drawio: