驱动程序映射bar物理地址到虚拟地址时,通常使用ioremap系列函数。接下来,分析相关实现方式。
early_ioremap_init
early_ioremap_init在start_kernel -> setup_arch函数中调用:
void __init early_ioremap_init(void)
{
pmd_t *pmd;
#ifdef CONFIG_X86_64
BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1)); // 检查低12位是否为0,为0继续执行
| |
\/
/* #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) , FIXADDR_TOP :从 vsyscall空间的基址取整产生的地址,属于固定映射区域的最高地址*/
#else
WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
#endif
early_ioremap_setup(); // 初期固定映射地址填充slot_virt 数组
||
\/
void __init early_ioremap_setup(void)
{
int i;
for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
if (WARN_ON(prev_map[i])) // prev_map 数组用于记录初期 ioremap 区域的地址(临时启动时映射,实际上有512个,在ioremap可用之前使用),这里还未使用,指向为0
/* static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; * /
/* #define __initdata __section(.init.data); __initdata数据存储在.init.data段,这些内存在内核初始化结束后释放掉 */
/* #define FIX_BTMAPS_SLOTS 8 */
break;
for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); // 固定映射区域的虚拟地址
}
继续往下看:
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
||
\/
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
/* Don't assume we're using swapper_pg_dir at this point */
pgd_t *base = __va(read_cr3_pa()); 从cr3寄存器读出基地址
/* __read_cr3() & CR3_ADDR_MASK */
/* 屏蔽地址空间ID和SME加密位 */
pgd_t *pgd = &base[pgd_index(addr)]; // pgd页目录(page global directory)
/* pgd_index(addr) -> #define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) */
/* #define PGDIR_SHIFT pgdir_shift */
/* unsigned int pgdir_shift __ro_after_init = 39; */
/* #define PTRS_PER_PGD 512 */
p4d_t *p4d = p4d_offset(pgd, addr); // 4级页目录
||
\/
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
if (!pgtable_l5_enabled()) // 如果.config中没有开启CONFIG_X86_5LEVEL
return (p4d_t *)pgd;
return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); // pgd + 4级页表偏移值(偏移值为0,目前的计算机内存4级表应该足够使用了)
/* (pgd.pgd & PGD_ALLOWED_BITS) + (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1) */
}
/* #define PGD_ALLOWED_BITS (PGD_PAE_PHYS_MASK | _PAGE_PRESENT | \
_PAGE_PWT | _PAGE_PCD | \
_PAGE_SOFTW1 | _PAGE_SOFTW2 | _PAGE_SOFTW3) */
/* #define PTRS_PER_P4D ptrs_per_p4d */
/* unsigned int ptrs_per_p4d __ro_after_init = 1; */
/* define P4D_SHIFT 39 */
pud_t *pud = pud_offset(p4d, addr); // pud页目录(page upper directory)
||
\/
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
return p4d_pgtable(*p4d) + pud_index(address); // 偏移范围小于等于511
/* (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)) + pud_index(address) */
}
/* #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) */
/* #define PUD_SHIFT 30 */
/* #define PTRS_PER_PUD 512 */
pmd_t *pmd = pmd_offset(pud, addr); // pmd页目录(page middle directory)
||
\/
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
return pud_pgtable(*pud) + pmd_index(address);
/* (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud)) + pmd_index(address);
}
||
\/
static inline pudval_t pud_pfn_mask(pud_t pud)
{
if (native_pud_val(pud) & _PAGE_PSE) // 4 MB or 2MB page
return PHYSICAL_PUD_PAGE_MASK; // 值为 0000 0000 0000 0000 0000 0000 0000 0011 1111 1111 1111 1111 111 (31位) — (51位)
else
return PTE_PFN_MASK; // 值为 0000 0000 0000 1111 1111 1111 1111 1111 1111 1111 1111 1111 111 (12位) — (51位)
}
/* #define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) */
/* #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) */
/* #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) */
/* #define __PHYSICAL_MASK physical_mask */
/* phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; */
/* #else
#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
#endif */
/* #define __PHYSICAL_MASK_SHIFT 52 */
/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
/* #define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) */
/* #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) */
/* #define PAGE_MASK (~(PAGE_SIZE-1)) */
return pmd;
}
/*
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
EXPORT_SYMBOL(page_offset_base);
unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
EXPORT_SYMBOL(vmalloc_base);
unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
EXPORT_SYMBOL(vmemmap_base);
#endif
#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL)
#define __VMALLOC_BASE_L4 0xffffc90000000000UL
#define __VMEMMAP_BASE_L4 0xffffea0000000000UL
*/
继续往下看:
memset(bm_pte, 0, sizeof(bm_pte)); // bm_pte占一页大小
/* static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; */
pmd_populate_kernel(&init_mm, pmd, bm_pte); // 为init_mm对象分配pte,并记录pmd
||
\/
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
{
paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); // mmu分配pte -> mm
/* PVOP_VCALL2(mmu.alloc_pte, mm, pfn) ; mmu.alloc_pte = pfn */
set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); // mmu设置pmd <- pmd
/* PVOP_VCALL2(mmu.set_pmd, pmdp, native_pmd_val(pmd)); mmu.set_pmd = native_pmd_val(pmd) */
}
/* #define __pa(x) __phys_addr((unsigned long)(x)) */
/* x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET)); */
/* #define __START_KERNEL_map _AC(0xffffffff80000000, UL) */
if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { // 如果pmd不属于固定映射区域
WARN_ON(1);
printk(KERN_WARNING "pmd %p != %p\n",
pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
fix_to_virt(FIX_BTMAP_BEGIN));
printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n",
fix_to_virt(FIX_BTMAP_END));
printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n",
FIX_BTMAP_BEGIN);
}
ioremap
void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
{
enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
return __ioremap_caller(phys_addr, size, pcm,
__builtin_return_address(0), false);
}
||
\/
static void __iomem *
__ioremap_caller(resource_size_t phys_addr, unsigned long size,
enum page_cache_mode pcm, void *caller, bool encrypted)
{
...
__ioremap_check_mem(phys_addr, size, &io_desc); // 检查物理内存是否被标记,不涵盖EFI runtime services数据区域,但当SEV处于活动状态时,必须对其进行加密映射
if (io_desc.flags & IORES_MAP_SYSTEM_RAM) { // 不能映射RAM内存
WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
&phys_addr, &last_addr);
return NULL;
}
offset = phys_addr & ~PAGE_MASK;
phys_addr &= PHYSICAL_PAGE_MASK;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
pcm, &new_pcm); // 在rbtrees(红黑树)中嵌入节点
prot = PAGE_KERNEL_IO;
if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted) // 如果正在映射的页面在内存中并且SEV处于活动状态,则设置内存加密。在TDX客户机中,默认情况下内存被标记为私有,如果加密未请求(已使用加密),显式设置解密
*确保在中启用了内存加密属性
prot = pgprot_encrypted(prot);
else
prot = pgprot_decrypted(prot);
area = get_vm_area_caller(size, VM_IOREMAP, caller); //执行vmap分配函数,设置虚拟内存区域
if (!area)
goto err_free_memtype;
area->phys_addr = phys_addr;
vaddr = (unsigned long) area->addr;
if (memtype_kernel_map_sync(phys_addr, size, pcm))
goto err_free_area;
if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) // 将物理地址映射到虚拟地址
goto err_free_area;
ret_addr = (void __iomem *) (vaddr + offset);
mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); // 注册trace
if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size)) // 检查请求是否跨越iomem资源树中的任何条
pr_warn("caller %pS mapping multiple BARs\n", caller);
return ret_addr;
err_free_area:
free_vm_area(area);
err_free_memtype:
memtype_free(phys_addr, phys_addr + size);
return NULL;
}