匿名页的反向映射
反向映射的解析流程(Kernel 3.10):
1. 根据struct page 确定其anon_vma 及index。
2. 根据index从annon_vma的rb tree中找出所有的avc
3. 根据avc找出对应的vma。
先看看匿名页的回收:
mm/rmap.c
static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
{
struct anon_vma *anon_vma;
pgoff_t pgoff;
struct anon_vma_chain *avc;
int ret = SWAP_AGAIN;
anon_vma = page_lock_anon_vma_read(page);
if (!anon_vma)
return ret;
pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
struct vm_area_struct *vma = avc->vma;
unsigned long address;
/*
* During exec, a temporary VMA is setup and later moved.
* The VMA is moved under the anon_vma lock but not the
* page tables leading to a race where migration cannot
* find the migration ptes. Rather than increasing the
* locking requirements of exec(), migration skips
* temporary VMAs until after exec() completes.
*/
if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
is_vma_temporary_stack(vma))
continue;
address = vma_address(page, vma);
ret = try_to_unmap_one(page, vma, address, flags);
if (ret != SWAP_AGAIN || !page_mapped(page))
break;
}
page_unlock_anon_vma_read(anon_vma);
return ret;
}
那相关数据结构的关系是如何建立起来的?
先看fork时的相关点:
dup_mmap
对于父进程中的vma 一个一个clone。
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
struct anon_vma_chain *avc, *pavc;
struct anon_vma *root = NULL;
list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma;
avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
if (unlikely(!avc)) {
unlock_anon_vma_root(root);
root = NULL;
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
goto enomem_failure;
}
anon_vma = pavc->anon_vma;
root = lock_anon_vma_root(root, anon_vma);
anon_vma_chain_link(dst, avc, anon_vma);
}
unlock_anon_vma_root(root);
return 0;
enomem_failure:
unlink_anon_vmas(dst);
return -ENOMEM;
}
在clone parent的vma的同时,将对应的avc也都clone了一份。这些clone出来的avc挂在新vma的same_vma字段上,同时插入到原av的rb tree中。接着加上自己新的avc及av。
这样,从av的rb tree中可以得知,对应的vma究竟fork了多少次。
anon_vma = anon_vma_alloc();
if (!anon_vma)
goto out_error;
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
goto out_error_free_anon_vma;
/*
* The root anon_vma's spinlock is the lock actually used when we
* lock any of the anon_vmas in this anon_vma tree.
*/
anon_vma->root = pvma->anon_vma->root;
/*
* With refcounts, an anon_vma can stay around longer than the
* process it belongs to. The root anon_vma needs to be pinned until
* this anon_vma is freed, because the lock lives in the root.
*/
get_anon_vma(anon_vma->root);
/* Mark this anon_vma as the one where our new (COWed) pages go. */
vma->anon_vma = anon_vma;
anon_vma_lock_write(anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
anon_vma_unlock_write(anon_vma);
关于anon_vma_chain_link
static void anon_vma_chain_link(struct vm_area_struct *vma,
struct anon_vma_chain *avc,
struct anon_vma *anon_vma)
{
avc->vma = vma;
avc->anon_vma = anon_vma;
list_add(&avc->same_vma, &vma->anon_vma_chain);
anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
}
再看page fault 时的扩展:
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
page->mapping = (struct address_space *) anon_vma;
page->index = linear_page_index(vma, address);
新分配的page 其mapping 字段是指向anon_vma的。通过fork的方式来达到多个进程的vma 引用同一个page。
这个page的mapping 始终执行父进程时创建的av,fork 子进程时,将对应的vma插入到av的rb_root 上。这样可以就可以很容易的得知哪些进程的哪些vma可能引用了本page。
COW时:
当COW时候,修改者申请新的page来建立映射,同样需要为新page设置av。 旧的就不管了,因为反向映射的本质只是用来确定哪些进程来引用了本page。
void page_add_new_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
SetPageSwapBacked(page);
atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
if (!PageTransHuge(page))
__inc_zone_page_state(page, NR_ANON_PAGES);
else
__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__page_set_anon_rmap(page, vma, address, 1);
if (!mlocked_vma_newpage(vma, page))
lru_cache_add_lru(page, LRU_ACTIVE_ANON);
else
add_page_to_unevictable_list(page);
}