1 简介
在缺页中断处理中,匿名页面处理的核心函数是 do_anonymous_page() , 其代码实现在 mm/memory.c 中。在Linux内核中没有关联到文件映射的页面称为匿名映射——Anonymous Page , 简称 anon page。
2 源码解析
static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct mem_cgroup *memcg;
struct page *page;
vm_fault_t ret = 0;
pte_t entry;
/* File mapping without ->vm_ops ? */
/*如果是共享则意味着之前以及通过mmap方式在其他进程申请过物理内存,vma应该存在对应物理内存映射,不应该再发生page fault*/
if (vma->vm_flags & VM_SHARED)
return VM_FAULT_SIGBUS;
/*
* Use pte_alloc() instead of pte_alloc_map(). We can't run
* pte_offset_map() on pmds where a huge pmd might be created
* from a different thread.
*
* pte_alloc_map() is safe to use under down_write(mmap_sem) or when
* parallel threads are excluded by other means.
*
* Here we only have down_read(mmap_sem).
*/
/*如果pte不存在,则分配pte,并建立pmd与pte的关系*/
if (pte_alloc(vma->vm_mm, vmf->pmd, vmf->address))
return VM_FAULT_OOM;
/* See the comment in pte_alloc_one_map() */
if (unlikely(pmd_trans_unstable(vmf->pmd)))
return 0;
/* Use the zero-page for reads */
/*如果是读操作触发的缺页,并且没有禁止zero page,则映射到零页面*/
if (!(vmf->flags & FAULT_FLAG_WRITE) &&
!mm_forbids_zeropage(vma->vm_mm)) {
entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address),
vma->vm_page_prot));//生成一个特殊页表项,映射到专有的0页,一页大小
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);//据pmd,address找到pte表对应的一个表项,并且lock住
if (!pte_none(*vmf->pte))//如果页表项不为空。第一次访问,应该为空,所以可能别的进程在使用此物理地址,跳unlock
goto unlock;
ret = check_stable_address_space(vma->vm_mm);//对vma地址空间进行检查
if (ret)
goto unlock;
/* Deliver the page fault to userland, check inside PT lock */
if (userfaultfd_missing(vma)) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
return handle_userfault(vmf, VM_UFFD_MISSING);
}
goto setpte;
}
/* Allocate our own private page. */
if (unlikely(anon_vma_prepare(vma)))//对vma进行预处理,主要是创建anon_vma和anon_vma_chain,为后续反向映射做准备
goto oom;
page = alloc_zeroed_user_highpage_movable(vma, vmf->address);//分配一个物理页面,优先从highmem分配
if (!page)
goto oom;
if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg,
false))//申请内存成功之后,将新申请的page加入到mcgroup管理
goto oom_free_page;
/*
* The memory barrier inside __SetPageUptodate makes sure that
* preceeding stores to the page contents become visible before
* the set_pte_at() write.
*/
__SetPageUptodate(page);//更新page flag为PG_uptodate,意味着page 内容已经更新
entry = mk_pte(page, vma->vm_page_prot);//获取pte 并重新按照需求构建entry
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);//将pte lock防止同时更新和更多虚拟内存对物理内存映射
if (!pte_none(*vmf->pte))
goto release;
ret = check_stable_address_space(vma->vm_mm);//对vma地址空间进行检查
if (ret)
goto release;
/* Deliver the page fault to userland, check inside PT lock */
if (userfaultfd_missing(vma)) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
mem_cgroup_cancel_charge(page, memcg, false);
put_page(page);
return handle_userfault(vmf, VM_UFFD_MISSING);
}
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);//增加mm_countrt应用计数
page_add_new_anon_rmap(page, vma, vmf->address, false);//将此页添加到LRU链表中去,回收的时候使用
mem_cgroup_commit_charge(page, memcg, false, false);
lru_cache_add_active_or_unevictable(page, vma);
setpte:
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);//设置页表项
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, vmf->address, vmf->pte);//更新mmu的cache
unlock:
pte_unmap_unlock(vmf->pte, vmf->ptl);
return ret;
release:
mem_cgroup_cancel_charge(page, memcg, false);
put_page(page);
goto unlock;
oom_free_page:
put_page(page);
oom:
return VM_FAULT_OOM;
}
该函数的主要逻辑是根据flags是否具有可写属性,分为读操作触发缺页和写操作触发缺页两部分:
- 读操作触发缺页:映射到一个全填充为0的零页面(ZERO_PAGE)。
- 写操作触发缺页:alloc_zeroed_user_highpage_movable()函数分配一个可写的页面,其分配掩码是 GFP_HIGHUSER | __GFP_MOVABLE ,意味着会优先从高端内存中分配,每次分配一页,假设需要分配多页,则会触发多次缺页中断。继续往下追踪,会看到最终调用到伙伴系统的核心函数 alloc_pages()来分配物理页面。然后通过 mk_pte()、pte_mkdirty() 和 pte_mkwrite()等宏生成一个新的pte entry,并通过set_pte_at()函数设置到硬件硬件页表中。