pmd_alloc/pte_alloc/pud_alloc设置页表

aarm64为打开MMU而进行的CPU初始化

ARM64的启动过程之(三):为打开MMU而进行的CPU初始化

cpu确定当系统VA_BITS和PAGE_SIZE 通过寄存器TCR

aarch64的TCR寄存器介绍_arm tcr 寄存器-CSDN博客

类型定义

arm64设置64K的页表,这种情况下PGTABLE_LEVELS = 3
//arch/arm64/include/asm/pgtable-types.h

typedef u64 pteval_t;
typedef u64 pmdval_t;
typedef u64 pudval_t;
typedef u64 p4dval_t;
typedef u64 pgdval_t;
 
typedef struct { pteval_t pte; } pte_t;

#if CONFIG_PGTABLE_LEVELS > 2           //大于2级页表才有pmd_t
typedef struct { pmdval_t pmd; } pmd_t;
#define pmd_val(x)      ((x).pmd)
#endif

#if CONFIG_PGTABLE_LEVELS > 3         //大于3级页表才有pud_t
typedef struct { pudval_t pud; } pud_t;
#define pud_val(x)      ((x).pud)
#endif

typedef struct { pgdval_t pgd; } pgd_t;  //pgd_t总是存在
#define pgd_val(x)      ((x).pgd)


#if CONFIG_PGTABLE_LEVELS == 2          //等于2级页表,则没有pmd,pud和p4d
#include <asm-generic/pgtable-nopmd.h>
	#include <asm-generic/pgtable-nopud.h>
	#define __PAGETABLE_PMD_FOLDED 1
	typedef struct { pud_t pud; } pmd_t;   //根据include关系:pmd_t 就是pud_t,pud_t又是p4d_t,p4d_t里面又是pgd_t
	#define set_pud(pudptr, pudval)                 set_pmd((pmd_t *)(pudptr), (pmd_t) { pudval })

	static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
	{
			return (pmd_t *)pud;
	}
	
#elif CONFIG_PGTABLE_LEVELS == 3           //等于3级页表,则没有pud和p4d
#include <asm-generic/pgtable-nopud.h>
	#include <asm-generic/pgtable-nop4d.h>
	#define __PAGETABLE_PUD_FOLDED 1
	typedef struct { p4d_t p4d; } pud_t;
	#define pud_val(x)   (p4d_val((x).p4d))
	#define set_p4d(p4dptr, p4dval) set_pud((pud_t *)(p4dptr), (pud_t) { p4dval })

	static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
	{
			return (pud_t *)p4d;
	}

	
	
#elif CONFIG_PGTABLE_LEVELS == 4       //等于4级页表,则没有p4d
#include <asm-generic/pgtable-nop4d.h>
	#define __PAGETABLE_P4D_FOLDED 1
	typedef struct { pgd_t pgd; } p4d_t;
	#define p4d_val(x)   (pgd_val((x).pgd))
	#define set_pgd(pgdptr, pgdval) set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval })

	static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
	{   
			return (p4d_t *)pgd; 
	} 
#endif

头文件关系: 

include/linux/pgtable.h
	#include <asm/pgtable.h>  //arch/arm64/include/asm/pgtable.h
	#include <asm/pgtable-hwdef.h>
	#include <asm/pgtable-prot.h>
[root@localhost linux-5.10]# grep -R "pgtable-types.h" arch/arm64/include/

arch/arm64/include/asm/kasan.h:#include <asm/pgtable-types.h>
arch/arm64/include/asm/pgtable-prot.h:#include <asm/pgtable-types.h>
arch/arm64/include/asm/page.h:#include <asm/pgtable-types.h>
arch/arm64/include/asm/mte.h:#include <asm/pgtable-types.h>
		
内核头文件使用架构相关的arch/arm64/include/asm/pgtable-types.h来根据PGTABLE_LEVELS来包含include/asm-generic/pgtable-noXXX.h

//include/asm-generic/pgtable-nopud.h
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
	return (pud_t *)p4d;
}
#define pud_offset pud_offset


如果架构没有定义pud_offset则使用通用的:

//include/linux/pgtable.h 

#ifndef pud_offset
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
	return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
}
#define pud_offset pud_offset
#endif





只有三级levels时 各级PxD_SHIFT的大小:

include/asm-generic/pgtable-nop4d.h:#define P4D_SHIFT        PGDIR_SHIFT
include/asm-generic/pgtable-nopud.h:#define PUD_SHIFT    P4D_SHIFT

//arch/arm64/include/asm/pgtable-hwdef.h

#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)

#define PTRS_PER_PTE            (1 << (PAGE_SHIFT - 3))

/*
 * PMD_SHIFT determines the size a level 2 page table entry can map.
 */
#if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SHIFT               ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE                (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK                (~(PMD_SIZE-1))
#define PTRS_PER_PMD            PTRS_PER_PTE
#endif

/*
 * PUD_SHIFT determines the size a level 1 page table entry can map.
 */
#if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SHIFT               ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE                (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK                (~(PUD_SIZE-1))
#define PTRS_PER_PUD            PTRS_PER_PTE
#endif

ARM64_HW_PGTABLE_LEVEL_SHIFT 宏的作用:

/*                                                                               
 * Size mapped by an entry at level n ( 0 <= n <= 3)                             
 * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits         
 * in the final page. The maximum number of translation levels supported by      
 * the architecture is 4. Hence, starting at at level n, we have further         
 * ((4 - n) - 1) levels of translation excluding the offset within the page. 
 * So, the total number of bits mapped by an entry at level n is :               
 *                                                                               
 *  ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT                                
 *                                                                               
 * Rearranging it a bit we get :                                                 
 *   (4 - n) * (PAGE_SHIFT - 3) + 3                                              
 */ 
#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)

该宏的注释翻译一下,大概的意思是:

在每一级“translation levels”中,也就是PGD, PUD, PMD 中,
都是使用 (PAGE_SHIFT - 3) 个bits作为map。且当前架构arm64 支持的最大的页表级数为4。
因此,除了PAGE_SHIFT,Ln 级索引(L0 索引即PGD, L1 索引即PUD,以此类推)的偏移还有
((4 - n) - 1) 个 (PAGE_SHIFT - 3) 。因此计算公式就是
((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT

对照下图,4K页表L1-L3 索引所在的偏移量分别是
L3 索引所在的偏移量: PAGE_SHIFT = 12
L2 索引所在的偏移量:1 * (PAGE_SHIFT-3) + PAGE_SHIFT = 21
L1 索引所在的偏移量:2 * (PAGE_SHIFT-3) + PAGE_SHIFT = 30
L0 索引所在的偏移量:3 * (PAGE_SHIFT-3) + PAGE_SHIFT = 39

验证一下,Ln 索引对应的偏移为 ARM64_HW_PGTABLE_LEVEL_SHIFT(n),计算结果如下

 /* 
  *  PGDIR_SHIFT = ARM64_HW_PGTABLE_LEVEL_SHIFT(0) = 39
  *  PUD_SHIFT	 = ARM64_HW_PGTABLE_LEVEL_SHIFT(1) = 30
  *  PMD_SHIFT	 = ARM64_HW_PGTABLE_LEVEL_SHIFT(2) = 21
  */


如果是64K:只有3级页表
 /* 
  *  PGDIR_SHIFT = ARM64_HW_PGTABLE_LEVEL_SHIFT(0) = 42
  *  PMD_SHIFT	 = ARM64_HW_PGTABLE_LEVEL_SHIFT(2) = 29
  */

宏:

static void show_macros(void)
{
 printk("PGDIR_SHIFT = %d\n", PGDIR_SHIFT); //page shift所能映射区域大小的对数
 printk("P4D_SHIFT = %d\n", P4D_SHIFT);
 printk("PUD_SHIFT = %d\n", PUD_SHIFT);
 printk("PMD_SHIFT = %d\n", PMD_SHIFT);
 printk("PAGE_SHIFT = %d\n", PAGE_SHIFT);

 printk("PTRS_PER_PGD = %d\n", PTRS_PER_PGD);
 printk("PTRS_PER_P4D = %d\n", PTRS_PER_P4D);
 printk("PTRS_PER_PUD = %d\n", PTRS_PER_PUD);
 printk("PTRS_PER_PMD = %d\n", PTRS_PER_PMD);
 printk("PTRS_PER_PTE = %d\n", PTRS_PER_PTE);
 printk("PAGE_MASK = 0x%lx\n", PAGE_MASK);
}

pmd_alloc: 

include/linux/mm.h 

#if defined(CONFIG_MMU)

static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
		unsigned long address)
{
	return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
		NULL : p4d_offset(pgd, address);
}

static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
		unsigned long address)
{
	return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
		NULL : pud_offset(p4d, address);
}

static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
	return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?  //申请一页物理内存
		NULL: pmd_offset(pud, address); //返回pmd表中的某项的地址
}
#endif /* CONFIG_MMU */


pte_alloc不需要返回pte_t:
#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))


 __pmd_alloc分配pmd表物理页:

分析:

#ifndef __PAGETABLE_PMD_FOLDED //注意宏。非no-pmd.h
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
	spinlock_t *ptl;
	pmd_t *new = pmd_alloc_one(mm, address); //申请一页物理内存,并将地址强制转成pmt_t类型。
	if (!new)
		return -ENOMEM;

	smp_wmb(); /* See comment in __pte_alloc */

	ptl = pud_lock(mm, pud);
	if (!pud_present(*pud)) { 
		mm_inc_nr_pmds(mm);
		pud_populate(mm, pud, new);   //填充pud表的某个表项:设置pud_t中的pud的数值为new的值。填充地址。此时pud_t->pud为
	} else	/* Another has populated it */
		pmd_free(mm, new);
	spin_unlock(ptl);
	return 0;
}
#endif /* __PAGETABLE_PMD_FOLDED */

static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
	struct page *page;
	gfp_t gfp = GFP_PGTABLE_USER;

	if (mm == &init_mm)
		gfp = GFP_PGTABLE_KERNEL;
	page = alloc_pages(gfp, 0);
	if (!page)
		return NULL;
	if (!pgtable_pmd_page_ctor(page)) {
		__free_pages(page, 0);
		return NULL;
	}
	return (pmd_t *)page_address(page);
}


 pud_populate设置pud表项


//arch/arm64/include/asm/pgalloc.h
static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
{
	set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot)); //pmd表的地址|表属性prot。最终构造一个pud_t值,填充到pudp指向的位置。
}

static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
	__pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE); //pmdp转成物理地址,就是pmd表的物理地址
}


//include/linux/pgtable.h
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);  
}

static inline unsigned long pmd_index(unsigned long address)
{
	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}

//arch/arm64/include/asm/pgtable.h
static inline phys_addr_t pud_page_paddr(pud_t pud)
{
	return __pud_to_phys(pud); //获取pud->pud 里面是指向pmd的页表的物理地址
}

static inline unsigned long pud_page_vaddr(pud_t pud)
{
	return (unsigned long)__va(pud_page_paddr(pud)); //将pud->pud转换成虚拟地址
}

 流程图:

分析__handle_mm_fault也错误处理页表


//mm/memory.c
//arm64 /64K /levels=3
static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
		unsigned long address, unsigned int flags)
{
	struct vm_fault vmf = {
		.vma = vma,
		.address = address & PAGE_MASK,
		.flags = flags,
		.pgoff = linear_page_index(vma, address),
		.gfp_mask = __get_fault_gfp_mask(vma),
	};
	unsigned int dirty = flags & FAULT_FLAG_WRITE;
	struct mm_struct *mm = vma->vm_mm;
	pgd_t *pgd;
	p4d_t *p4d;
	vm_fault_t ret;

	pgd = pgd_offset(mm, address); //#define pgd_offset(mm, address)		pgd_offset_pgd((mm)->pgd, (address))   
								//pgd_offset 都是有的include/linux/pgtable.h
	p4d = p4d_alloc(mm, pgd, address); //此时p4d返回的就是pgd
	if (!p4d)
		return VM_FAULT_OOM;

	vmf.pud = pud_alloc(mm, p4d, address); //pud也是不存在,返回p4d=pgd
	if (!vmf.pud)
		return VM_FAULT_OOM;
retry_pud:
	if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) {
		ret = create_huge_pud(&vmf);
		if (!(ret & VM_FAULT_FALLBACK))
			return ret;
	} else {
		pud_t orig_pud = *vmf.pud;

		barrier();
		if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) {

			/* NUMA case for anonymous PUDs would go here */

			if (dirty && !pud_write(orig_pud)) {
				ret = wp_huge_pud(&vmf, orig_pud);
				if (!(ret & VM_FAULT_FALLBACK))
					return ret;
			} else {
				huge_pud_set_accessed(&vmf, orig_pud);
				return 0;
			}
		}
	}

	vmf.pmd = pmd_alloc(mm, vmf.pud, address);   //pmd_alloc存在,这里分配一页,且 __pmd_alloc->pud_populate会设置将vmf.pud设置为address指向的pmd页表地址
        //到这里是,最终将pgd_t表项的内容,设置为pmd页表了。但是流程上依然像pgd,p4d,pud都有
	
	....
	
	return handle_pte_fault(&vmf); //后续设置页表流程。。。
	
	
}

====================================================================
pgd_t * pgd;
p4d_t * p4d;
pud_t * pud;
pmd_t * pmd;

pgd = pgd_offset(mm,address)  //根据address,获取pgd_t
p4d = p4d_alloc(mm,pgd,address)
	  如果p4d存在(5level),则分配物理页,并pgd_populate,返回address对应的p4d_t表项
	  否则返回pgd
pud = pud_alloc(mm,p4d,address)
	  如果pud存在(4level),则分配物理页,并p4d_populate,返回address对应的pud_t表项
	  否则返回p4d = pgd
pmd = pmd_alloc(mm,pud,address)
	  如果pmd存在(3level),则分配物理页,并pud_populate,返回address对应的pmd_t表项
	  否则返回pud = p4d = pgd
	  
handle_pte_fault
	do_anonymous_page:
分配pte
#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd))
//pmd_none一般是存在,level3下实际上就是pgd_t
//__pte_alloc
//	pte_alloc_one  //分配pte页表
//	pmd_populate   //设置pmd表项为pte	

pte_t * pte ;
pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
			&vmf->ptl);
	->pte_offset_map(dir, address)	
		pte_offset_kernel((dir), (address))
			return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);

p4d_alloc分析: 

如果levels=3
nopud/nop4d/
p4d_none 总返回 0 ;认为p4d不为空,存在
pgd_none 总返回 0 ;认为pgd不为空,存在


static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
		unsigned long address)
{
	return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?        //__p4d_alloc 返回0
		NULL : p4d_offset(pgd, address);                                       //函数最终返回p4d_offset
																//也就是说如果levels=3 p4d不存在的时候总是返回pgd,但不会分配数据
}


//include/linux/mm.h
#ifdef __PAGETABLE_P4D_FOLDED
static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
						unsigned long address)
{
	return 0;
}
#else
int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
#endif

//mm/memory.c
#ifndef __PAGETABLE_P4D_FOLDED
/*
 * Allocate p4d page table.
 * We've already handled the fast-path in-line.
 */
int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
	p4d_t *new = p4d_alloc_one(mm, address);         //只有x86有,否则include/asm-generic/pgtable-nop4d.h    
	if (!new) 										 //#define p4d_alloc_one(mm, address)		NULL
		return -ENOMEM;

	smp_wmb(); /* See comment in __pte_alloc */

	spin_lock(&mm->page_table_lock);
	if (pgd_present(*pgd))		/* Another has populated it */
		p4d_free(mm, new);
	else
		pgd_populate(mm, pgd, new);         //设置pgd表项为p4d
	spin_unlock(&mm->page_table_lock);
	return 0;
}
#endif /* __PAGETABLE_P4D_FOLDED */


其它pud、pmd页表也类似

static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
		unsigned long address)
{
	return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
		NULL : pud_offset(p4d, address);
}

static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
	return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
		NULL: pmd_offset(pud, address);
}

 缺页过程的页表创建设置流程.drawio:

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

古井无波 2024

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值