ioremap执行过程

驱动程序映射bar物理地址到虚拟地址时,通常使用ioremap系列函数。接下来,分析相关实现方式。

early_ioremap_init

  early_ioremap_init在start_kernel -> setup_arch函数中调用:

void __init early_ioremap_init(void)
{
        pmd_t *pmd;

#ifdef CONFIG_X86_64
        BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1)); // 检查低12位是否为0,为0继续执行
        | |
        \/
   		/*  #define __fix_to_virt(x)        (FIXADDR_TOP - ((x) << PAGE_SHIFT))  , FIXADDR_TOP :从 vsyscall空间的基址取整产生的地址,属于固定映射区域的最高地址*/
#else
        WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
#endif

        early_ioremap_setup(); // 初期固定映射地址填充slot_virt 数组

		||
		\/

		void __init early_ioremap_setup(void)
		{
        int i;

        for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
                if (WARN_ON(prev_map[i])) // prev_map 数组用于记录初期 ioremap 区域的地址(临时启动时映射,实际上有512个,在ioremap可用之前使用),这里还未使用,指向为0
                /* static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; * /
                /* #define __initdata	__section(.init.data); __initdata数据存储在.init.data段,这些内存在内核初始化结束后释放掉 */
                /* #define FIX_BTMAPS_SLOTS        8 */
                        break;

        for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
                slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); // 固定映射区域的虚拟地址
		}

  继续往下看:

pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));

||
\/

static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
        /* Don't assume we're using swapper_pg_dir at this point */
        pgd_t *base = __va(read_cr3_pa()); 从cr3寄存器读出基地址 
        /* __read_cr3() & CR3_ADDR_MASK */
        /* 屏蔽地址空间ID和SME加密位 */
        
        pgd_t *pgd = &base[pgd_index(addr)]; // pgd页目录(page global directory)
        /* pgd_index(addr) -> #define pgd_index(a)  (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) */
        /* #define PGDIR_SHIFT     pgdir_shift */
        /* unsigned int pgdir_shift __ro_after_init = 39; */
		/*	#define PTRS_PER_PGD    512 */
		
		
        p4d_t *p4d = p4d_offset(pgd, addr);  // 4级页目录
		||
		\/
		static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
		{
	        if (!pgtable_l5_enabled()) // 如果.config中没有开启CONFIG_X86_5LEVEL
	                return (p4d_t *)pgd; 
	        return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); // pgd + 4级页表偏移值(偏移值为0,目前的计算机内存4级表应该足够使用了)
	        /* (pgd.pgd & PGD_ALLOWED_BITS) + (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1) */
		}

		/* #define PGD_ALLOWED_BITS        (PGD_PAE_PHYS_MASK | _PAGE_PRESENT | \
                                 _PAGE_PWT | _PAGE_PCD | \
                                 _PAGE_SOFTW1 | _PAGE_SOFTW2 | _PAGE_SOFTW3) */
        /* #define PTRS_PER_P4D            ptrs_per_p4d */
        /* unsigned int ptrs_per_p4d __ro_after_init = 1; */
        /* define P4D_SHIFT               39 */

		

        pud_t *pud = pud_offset(p4d, addr); // pud页目录(page upper directory)
        ||
        \/
        static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
		{
		        return p4d_pgtable(*p4d) + pud_index(address); // 偏移范围小于等于511
		        /* (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)) +   pud_index(address) */
		}

		/* #define pud_index(x)    (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) */
		/* #define PUD_SHIFT       30 */
		/* #define PTRS_PER_PUD    512 */



        pmd_t *pmd = pmd_offset(pud, addr); // pmd页目录(page middle directory)
        ||
        \/
        static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
		{
		        return pud_pgtable(*pud) + pmd_index(address); 
		        /* (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud)) +  pmd_index(address);
		}
		||
		\/
		static inline pudval_t pud_pfn_mask(pud_t pud)
		{
		        if (native_pud_val(pud) & _PAGE_PSE) // 4 MB or 2MB page
		                return PHYSICAL_PUD_PAGE_MASK; // 值为 0000 0000 0000 0000 0000 0000 0000 0011 1111 1111 1111 1111 111   (31位)  —  (51位)
		        else
		                return PTE_PFN_MASK; // 值为 0000 0000 0000 1111 1111 1111 1111 1111 1111 1111 1111 1111 111   (12位)  —  (51位)
		}
		
		/* #define PHYSICAL_PUD_PAGE_MASK  (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) */
		/* #define PUD_PAGE_MASK           (~(PUD_PAGE_SIZE-1)) */
			   /* #define PUD_PAGE_SIZE           (_AC(1, UL) << PUD_SHIFT) */
		/* #define __PHYSICAL_MASK         physical_mask */
		      /*  phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; */
		/* #else
		   #define __PHYSICAL_MASK         ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
		   #endif  */
		/* #define __PHYSICAL_MASK_SHIFT   52 */
		   
		/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
		/* #define PTE_PFN_MASK            ((pteval_t)PHYSICAL_PAGE_MASK) */
		/* #define PHYSICAL_PAGE_MASK      (((signed long)PAGE_MASK) & __PHYSICAL_MASK) */
		/* #define PAGE_MASK               (~(PAGE_SIZE-1)) */

        return pmd;
}

/*
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
EXPORT_SYMBOL(page_offset_base);
unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
EXPORT_SYMBOL(vmalloc_base);
unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
EXPORT_SYMBOL(vmemmap_base);
#endif

#define __PAGE_OFFSET_BASE_L4   _AC(0xffff888000000000, UL)
#define __VMALLOC_BASE_L4       0xffffc90000000000UL
#define __VMEMMAP_BASE_L4       0xffffea0000000000UL

*/

  继续往下看:

memset(bm_pte, 0, sizeof(bm_pte)); // bm_pte占一页大小
/* static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; */

pmd_populate_kernel(&init_mm, pmd, bm_pte); // 为init_mm对象分配pte,并记录pmd
||
\/
static inline void pmd_populate_kernel(struct mm_struct *mm,
                                       pmd_t *pmd, pte_t *pte)
{
        paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);  // mmu分配pte -> mm
        /* PVOP_VCALL2(mmu.alloc_pte, mm, pfn) ; mmu.alloc_pte = pfn */ 
        set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); // mmu设置pmd <- pmd
        /* PVOP_VCALL2(mmu.set_pmd, pmdp, native_pmd_val(pmd)); mmu.set_pmd = native_pmd_val(pmd) */
}

/* #define __pa(x)         __phys_addr((unsigned long)(x)) */
/* x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET)); */
/* #define __START_KERNEL_map      _AC(0xffffffff80000000, UL) */  




 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { // 如果pmd不属于固定映射区域
                WARN_ON(1);
                printk(KERN_WARNING "pmd %p != %p\n",
                       pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
                printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
                        fix_to_virt(FIX_BTMAP_BEGIN));
                printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END):   %08lx\n",
                        fix_to_virt(FIX_BTMAP_END));

                printk(KERN_WARNING "FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
                printk(KERN_WARNING "FIX_BTMAP_BEGIN:     %d\n",
                       FIX_BTMAP_BEGIN);
        }
ioremap

 

void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
{
	enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;

        return __ioremap_caller(phys_addr, size, pcm,
                                __builtin_return_address(0), false);
}
||
\/
static void __iomem *
__ioremap_caller(resource_size_t phys_addr, unsigned long size,
                 enum page_cache_mode pcm, void *caller, bool encrypted)
{
	...
	__ioremap_check_mem(phys_addr, size, &io_desc); // 检查物理内存是否被标记,不涵盖EFI runtime services数据区域,但当SEV处于活动状态时,必须对其进行加密映射

	if (io_desc.flags & IORES_MAP_SYSTEM_RAM) { // 不能映射RAM内存
                WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
                          &phys_addr, &last_addr);
                return NULL;
        }

		offset = phys_addr & ~PAGE_MASK; 
        phys_addr &= PHYSICAL_PAGE_MASK;
        size = PAGE_ALIGN(last_addr+1) - phys_addr;

        retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
                                                pcm, &new_pcm); // 在rbtrees(红黑树)中嵌入节点

		 prot = PAGE_KERNEL_IO;
        if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted) // 如果正在映射的页面在内存中并且SEV处于活动状态,则设置内存加密。在TDX客户机中,默认情况下内存被标记为私有,如果加密未请求(已使用加密),显式设置解密

*确保在中启用了内存加密属性
                prot = pgprot_encrypted(prot);
        else
                prot = pgprot_decrypted(prot);

		area = get_vm_area_caller(size, VM_IOREMAP, caller); //执行vmap分配函数,设置虚拟内存区域
        if (!area)
                goto err_free_memtype;
        area->phys_addr = phys_addr;
        vaddr = (unsigned long) area->addr;

		if (memtype_kernel_map_sync(phys_addr, size, pcm))
                goto err_free_area;

		 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) // 将物理地址映射到虚拟地址
                goto err_free_area;

        ret_addr = (void __iomem *) (vaddr + offset);
        mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); // 注册trace

		if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size)) // 检查请求是否跨越iomem资源树中的任何条
                pr_warn("caller %pS mapping multiple BARs\n", caller);

        return ret_addr;
err_free_area:
        free_vm_area(area);
err_free_memtype:
        memtype_free(phys_addr, phys_addr + size);
        return NULL;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

坤昱

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值