在缺页中断处理中,匿名页面处理的核心函数是do_anonymous_page(),代码实现在mm/memory.c文件中。在linux内核中没有关联到文件映射的页面称为匿名页面(Anonmous Page,简称anon page)。
[handle_pte_fault()->do_anonymous_page()]
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags)
{
struct mem_cgroup *memcg;
struct page *page;
spinlock_t *ptl;
pte_t entry;
pte_unmap(page_table);
/* Check if we need to add a guard page to the stack */
/*判断当前VMA是否需要添加一个guard page(大小为一个page)作为安全垫*/
if (check_stack_guard_page(vma, address) < 0)
return VM_FAULT_SIGSEGV;
/*下面根据参数flags是否需要可写权限,代码可以分为如下两个部分*/
/* Use the zero-page for reads */
/*(1) 分配属性是只读的,当需要分配的内存只有只读属性,系统会使用一个全填充为0的
全局页面empty_zero_page,称为零页面(ZERO_PAGE),这个零页面是一个
special mmaping的页面。那么这个零页面是怎么来的呢?立即查看下面的代码。
为什么需要分配这个零页面:原因是其属性为只读,而且全是0,系统没必要去浪费内存空间,
当其需要写入时,即可以触发一个写时复制缺页中断,重新分配内存。*/
if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {
/*使用零页面来生成一个新的PTE entry,然后使用pte_mkspecial()设置新
PTE entry中的PTE_SPECIAL位,在2级页表的ARM32实现中没有PTE_SPECIAL比特
位,而在ARM64的实现中有比特位*/
entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
vma->vm_page_prot));
/*获取当前pte页表项,注意这里获取一个spinlock锁,所以在函数返回时需要释放这个锁,下面查看宏的实现*/
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
/*如果获取的pte表项内容为空,那么跳转到setpte标签处设置硬件pte表项,即
把新的PTE entry设置到硬件页表中*/
if (!pte_none(*page_table))
goto unlock;
goto setpte;
}
/*(2) 分配属性是可写的,使用alloc_zeroed_user_highpage_movable()函数来分配
一个可写的匿名页面,其分配页面的掩码是(__GFP_MOVABLE | __GFP_WAIT | __GFP_IO|__GFP_FS|
__GFP_HARDWALL|__GFP_HIGHMEM),最终还是调用伙伴系统的核心函数alloc_pages(),所以
这里会优先分配高端内存*/
/* Allocate our own private page. */
if (unlikely(anon_vma_prepare(vma)))
goto oom;
page = alloc_zeroed_user_highpage_movable(vma, address);
if (!page)
goto oom;
/*
* The memory barrier inside __SetPageUptodate makes sure that
* preceeding stores to the page contents become visible before
* the set_pte_at() write.
*/
__SetPageUptodate(page);
if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
goto oom_free_page;
/*通过mk_pte,pte_mkdirty和pte_mkwrite等宏生成一个新PTE entry,
并通过set_pte_at()函数设置到硬件页表中。*/
entry = mk_pte(page, vma->vm_page_prot);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!pte_none(*page_table))
goto release;
/*增加系统中匿名页面的统计计数,匿名页面的计数类型是MM_ANONPAGE*/
inc_mm_counter_fast(mm, MM_ANONPAGES);
/*把匿名页面添加到RMAP反向映射系统中*/
page_add_new_anon_rmap(page, vma, address);
mem_cgroup_commit_charge(page, memcg, false);
/*把匿名页面添加到LRU链表中,在kswap内核模块会用到LRU链表*/
lru_cache_add_active_or_unevictable(page, vma);
setpte:
set_pte_at(mm, address, page_table, entry);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, page_table);
unlock:
pte_unmap_unlock(page_table, ptl);
return 0;
release:
mem_cgroup_cancel_charge(page, memcg);
page_cache_release(page);
goto unlock;
oom_free_page:
page_cache_release(page);
oom:
return VM_FAULT_OOM;
}
零页面的由来
[arch/arm/mm/mmu.c]
/*
* empty_zero_page is a special page that is used for
* zero-initialized data and COW.
*/
struct page *empty_zero_page;
EXPORT_SYMBOL(empty_zero_page);
[arch/arm/include/asm/pgtable.h]
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
*/
extern struct page *empty_zero_page;
#define ZERO_PAGE(vaddr) (empty_zero_page)
[include/asm-generic/pgtable.h]
#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr))
系统启动时,paging_init()函数分配一个页面用作零页面。
/*
* paging_init() sets up the page tables, initialises the zone memory
* maps, and sets up the zero page, bad page and bad page tables.
*/
void __init paging_init(const struct machine_desc *mdesc)
{
void *zero_page;
build_mem_type_table();
prepare_page_table();
map_lowmem();
dma_contiguous_remap();
devicemaps_init(mdesc);
kmap_init();
tcm_init();
top_pmd = pmd_off_k(0xffff0000);
/* allocate the zero page. */
zero_page = early_alloc(PAGE_SIZE);
bootmem_init();
empty_zero_page = virt_to_page(zero_page);
__flush_dcache_page(NULL, empty_zero_page);
}
回到do_anonymous_page()函数
pte_offset_map_lock()宏
#define pte_offset_map_lock(mm, pmd, address, ptlp) \
({ \
spinlock_t *__ptl = pte_lockptr(mm, pmd); \
pte_t *__pte = pte_offset_map(pmd, address); \
*(ptlp) = __ptl; \
spin_lock(__ptl); \
__pte; \
})
/*
* We use mm->page_table_lock to guard all pagetable pages of the mm.
*/
static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
{
return &mm->page_table_lock;
}
回到do_anonymous_page()函数