linux 2.4.6 mm/swap-state.c源代码分析,Linux内核源代码情景分析笔记---对照4.2.5内核 续...

/*

* These routines also need to handle stuff like marking pages dirty

* and/or accessed for architectures that don't do it in hardware (most

* RISC architectures). The early dirtying is also good on the i386.

*

* There is also a hook called "update_mmu_cache()" that architectures

* with external mmu caches can use to update those (ie the Sparc or

* PowerPC hashed page tables that act as extended TLBs).

*

* We enter with non-exclusive mmap_sem (to exclude vma changes,

* but allow concurrent faults), and pte mapped but not yet locked.

* We return with pte unmapped and unlocked.

*

* The mmap_sem may have been released depending on flags and our

* return value. See filemap_fault() and __lock_page_or_retry().

*/

static int handle_pte_fault(struct mm_struct *mm,

struct vm_area_struct *vma, unsigned long address,

pte_t *pte, pmd_t *pmd, unsigned int flags)

{

pte_t entry;

spinlock_t *ptl;

/*

* some architectures can have larger ptes than wordsize,

* e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y,

* so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses.

* The code below just needs a consistent view for the ifs and

* we later double check anyway with the ptl lock held. So here

* a barrier will do.

*/

entry = *pte;

barrier();

if (!pte_present(entry)) {

if (pte_none(entry)) {

if (vma->vm_ops)

return do_fault(mm, vma, address, pte, pmd,

flags, entry);

return do_anonymous_page(mm, vma, address, pte, pmd,

flags);

}

return do_swap_page(mm, vma, address,

pte, pmd, flags, entry);

}

if (pte_protnone(entry))

return do_numa_page(mm, vma, address, entry, pte, pmd);

ptl = pte_lockptr(mm, pmd);

spin_lock(ptl);

if (unlikely(!pte_same(*pte, entry)))

goto unlock;

if (flags & FAULT_FLAG_WRITE) {

if (!pte_write(entry))

return do_wp_page(mm, vma, address,

pte, pmd, ptl, entry);

entry = pte_mkdirty(entry);

}

entry = pte_mkyoung(entry);

if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) {

update_mmu_cache(vma, address, pte);

} else {

/*

* This is needed only for protection faults but the arch code

* is not yet telling us if this is a protection fault or not.

* This still avoids useless tlb flushes for .text page faults

* with threads.

*/

if (flags & FAULT_FLAG_WRITE)

flush_tlb_fix_spurious_fault(vma, address);

}

unlock:

pte_unmap_unlock(pte, ptl);

return 0;

}

关于2.4的do_no_page复杂了好多,判断项也多了不少。。/mm/memory.c

/*

* We enter with non-exclusive mmap_sem (to exclude vma changes,

* but allow concurrent faults), and pte mapped but not yet locked.

* We return with mmap_sem still held, but pte unmapped and unlocked.

*/

static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,

unsigned long address, pte_t *page_table, pmd_t *pmd,

unsigned int flags)

{

struct mem_cgroup *memcg;

struct page *page;

spinlock_t *ptl;

pte_t entry;

pte_unmap(page_table);

/* File mapping without ->vm_ops ? */

if (vma->vm_flags & VM_SHARED)

return VM_FAULT_SIGBUS;

/* Check if we need to add a guard page to the stack */

if (check_stack_guard_page(vma, address) < 0)

return VM_FAULT_SIGSEGV;

/* Use the zero-page for reads */

if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {

entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),

vma->vm_page_prot));

page_table = pte_offset_map_lock(mm, pmd, address, &ptl);

if (!pte_none(*page_table))

goto unlock;

goto setpte;

}

/* Allocate our own private page. */

if (unlikely(anon_vma_prepare(vma)))

goto oom;

page = alloc_zeroed_user_highpage_movable(vma, address);

if (!page)

goto oom;

if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))

goto oom_free_page;

/*

* The memory barrier inside __SetPageUptodate makes sure that

* preceeding stores to the page contents become visible before

* the set_pte_at() write.

*/

__SetPageUptodate(page);

entry = mk_pte(page, vma->vm_page_prot);

if (vma->vm_flags & VM_WRITE)

entry = pte_mkwrite(pte_mkdirty(entry));

page_table = pte_offset_map_lock(mm, pmd, address, &ptl);

if (!pte_none(*page_table))

goto release;

inc_mm_counter_fast(mm, MM_ANONPAGES);

page_add_new_anon_rmap(page, vma, address);

mem_cgroup_commit_charge(page, memcg, false);

lru_cache_add_active_or_unevictable(page, vma);

setpte:

set_pte_at(mm, address, page_table, entry);

/* No need to invalidate - it was non-present before */

update_mmu_cache(vma, address, page_table);

unlock:

pte_unmap_unlock(page_table, ptl);

return 0;

release:

mem_cgroup_cancel_charge(page, memcg);

page_cache_release(page);

goto unlock;

oom_free_page:

page_cache_release(page);

oom:

return VM_FAULT_OOM;

}

do_anonymous_page()

/*

* The in-memory structure used to track swap areas.

*/

struct swap_info_struct {

unsigned longflags;/* SWP_USED etc: see above */

signed shortprio;/* swap priority of this type */

struct plist_node list;/* entry in swap_active_head */

struct plist_node avail_list;/* entry in swap_avail_head */

signed chartype;/* strange name for an index */

unsigned intmax;/* extent of the swap_map */

unsigned char *swap_map;/* vmalloc'ed array of usage counts */

struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */

struct swap_cluster_info free_cluster_head; /* free cluster list head */

struct swap_cluster_info free_cluster_tail; /* free cluster list tail */

unsigned int lowest_bit;/* index of first free in swap_map */

unsigned int highest_bit;/* index of last free in swap_map */

unsigned int pages;/* total of usable pages of swap */

unsigned int inuse_pages;/* number of those currently in use */

unsigned int cluster_next;/* likely index for next allocation */

unsigned int cluster_nr;/* countdown to next cluster search */

struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */

struct swap_extent *curr_swap_extent;

struct swap_extent first_swap_extent;

struct block_device *bdev;/* swap device or bdev of swap file */

struct file *swap_file;/* seldom referenced */

unsigned int old_block_size;/* seldom referenced */

#ifdef CONFIG_FRONTSWAP

unsigned long *frontswap_map;/* frontswap in-use, one bit per page */

atomic_t frontswap_pages;/* frontswap pages in-use counter */

#endif

spinlock_t lock;/*

* protect map scan related fields like

* swap_map, lowest_bit, highest_bit,

* inuse_pages, cluster_next,

* cluster_nr, lowest_alloc,

* highest_alloc, free/discard cluster

* list. other fields are only changed

* at swapon/swapoff, so are protected

* by swap_lock. changing flags need

* hold this lock and swap_lock. If

* both locks need hold, hold swap_lock

* first.

*/

struct work_struct discard_work; /* discard worker */

struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */

struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */

};

swap_info_struct /linux/swap.h

int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)

{

int error;

error = radix_tree_maybe_preload(gfp_mask);

if (!error) {

error = __add_to_swap_cache(page, entry);

radix_tree_preload_end();

}

return error;

}

add_to_swap_cache /mm/swap_state.c

0818b9ca8b590ca3270a3433284dd417.png

/*

* __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,

* but sets SwapCache flag and private instead of mapping and index.

*/

int __add_to_swap_cache(struct page *page, swp_entry_t entry)

{

int error;

struct address_space *address_space;

VM_BUG_ON_PAGE(!PageLocked(page), page);

VM_BUG_ON_PAGE(PageSwapCache(page), page);

VM_BUG_ON_PAGE(!PageSwapBacked(page), page);

page_cache_get(page);

SetPageSwapCache(page);

set_page_private(page, entry.val);

address_space = swap_address_space(entry);

spin_lock_irq(&address_space->tree_lock);

error = radix_tree_insert(&address_space->page_tree,

entry.val, page);

if (likely(!error)) {

address_space->nrpages++;

__inc_zone_page_state(page, NR_FILE_PAGES);

INC_CACHE_INFO(add_total);

}

spin_unlock_irq(&address_space->tree_lock);

if (unlikely(error)) {

/*

* Only the context which have set SWAP_HAS_CACHE flag

* would call add_to_swap_cache().

* So add_to_swap_cache() doesn't returns -EEXIST.

*/

VM_BUG_ON(error == -EEXIST);

set_page_private(page, 0UL);

ClearPageSwapCache(page);

page_cache_release(page);

}

return error;

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值