1.概述
本文主要解析struct page的flags成员中相关bit的含义以及其重要的标志位说明,简单说明一下Linux内核常见的PageXXX()/SetPageXXX()/ClearPageXXX()函数的作用和实现。
内核版本:linux-4.9
2.struct page结构
struct page {
/* 第一个双字大小的区块 */
unsigned long flags; /* Atomic flags, some possibly
* updated asynchronously */
union {
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
* memory, low bit is set, and
* it points to anon_vma object:
* see PAGE_MAPPING_ANON below.
*/
void *s_mem; /* slab first object */
atomic_t compound_mapcount; /* first tail page */
/* page_deferred_list().next -- second tail page */
};
/* 第二个双字大小的区块 */
union {
pgoff_t index; /* Our offset within mapping. */
void *freelist; /* sl[aou]b first free object */
/* page_deferred_list().prev -- second tail page */
};
union {
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
/* Used for cmpxchg_double in slub */
unsigned long counters;
#else
/*
* Keep _refcount separate from slub cmpxchg_double data.
* As the rest of the double word is protected by slab_lock
* but _refcount is not.
*/
unsigned counters;
#endif
struct {
union {
/*
* Count of ptes mapped in mms, to show when
* page is mapped & limit reverse map searches.
*
* Extra information about page type may be
* stored here for pages that are never mapped,
* in which case the value MUST BE <= -2.
* See page-flags.h for more details.
*/
atomic_t _mapcount;
unsigned int active; /* SLAB */
struct { /* SLUB */
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
int units; /* SLOB */
};
/*
* Usage count, *USE WRAPPER FUNCTION* when manual
* accounting. See page_ref.h
*/
atomic_t _refcount;
};
};
/*
* 第三个双字大小的区块
*
* WARNING: bit 0 of the first word encode PageTail(). That means
* the rest users of the storage space MUST NOT use the bit to
* avoid collision and false-positive PageTail().
*/
union {
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone_lru_lock !
* Can be used as a generic list
* by the page owner.
*/
struct dev_pagemap *pgmap; /* ZONE_DEVICE pages are never on an
* lru or handled by a slab
* allocator, this points to the
* hosting device page map.
*/
struct { /* slub per cpu partial pages */
struct page *next; /* Next partial slab */
#ifdef CONFIG_64BIT
int pages; /* Nr of partial slabs left */
int pobjects; /* Approximate # of objects */
#else
short int pages;
short int pobjects;
#endif
};
struct rcu_head rcu_head; /* Used by SLAB
* when destroying via RCU
*/
/* Tail pages of compound page */
struct {
unsigned long compound_head; /* If bit zero is set */
/* First tail page only */
#ifdef CONFIG_64BIT
/*
* On 64 bit system we have enough space in struct page
* to encode compound_dtor and compound_order with
* unsigned int. It can help compiler generate better or
* smaller code on some archtectures.
*/
unsigned int compound_dtor;
unsigned int compound_order;
#else
unsigned short int compound_dtor;
unsigned short int compound_order;
#endif
};
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
struct {
unsigned long __pad; /* do not overlay pmd_huge_pte
* with compound_head to avoid
* possible bit 0 collision.
*/
pgtable_t pmd_huge_pte; /* protected by page->ptl */
};
#endif
};
/* 剩余字节非双字对齐 */
union {
unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache;
* indicates order in the buddy
* system if PG_buddy is set.
*/
#if USE_SPLIT_PTE_PTLOCKS
#if ALLOC_SPLIT_PTLOCKS
spinlock_t *ptl;
#else
spinlock_t ptl;
#endif
#endif
struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */
};
#ifdef CONFIG_MEMCG
struct mem_cgroup *mem_cgroup;
#endif
/*
* On machines where all RAM is mapped into kernel address space,
* we can simply calculate the virtual address. On machines with
* highmem some memory is mapped into kernel virtual memory
* dynamically, so we need a place to store that address.
* Note that this field could be 16 bits on x86 ... ;)
*
* Architectures with slow multiplication can define
* WANT_PAGE_VIRTUAL in asm/page.h
*/
#if defined(WANT_PAGE_VIRTUAL)
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_KMEMCHECK
/*
* kmemcheck wants to track the status of each byte in a page; this
* is a pointer to such a status block. NULL if not tracked.
*/
void *shadow;
#endif
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
}
linux内核内存管理的核心数据结构是struct page,其他所有内存管理设施都是以它为中心展开的,如vma管理、缺页中断、页面分配与回收等,该数据结构定义在include/linux/mm_types.h头文件中,大量使用了c语言的union来优化其数据结构大小,因为每个物理页面(页帧)都需要一个struct page数据结构,因此会占用较多的空间。
3.flags成员介绍
struct page中的flags成员是页面的标志位集合,这些标志位是内存管理中非常重要的部分,具体定义在include/linux/page-flags.h文件中,其中重要的标志位如下:
enum pageflags {
PG_locked, /* 表示页面已上锁,不要访问 */
PG_error, /* 表示页面发生IO错误 */
PG_referenced, /* 用于RCU算法 */
PG_uptodate, /* 表示页面内容有效,当该页面上读操作完成后,设置该标志位 */
PG_dirty, /* 表示页面是脏页,内容被修改过 */
PG_lru, /* 表示该页面在lrc链表中 */
PG_active, /* 表示该页面在活跃lru链表中 */
PG_slab, /* 表示该页面是属于slab分配器创建的slab */
PG_owner_priv_1, /* 页面的所有者使用,如果是pagecache页面,文件系统可能使用*/
PG_arch_1, /* 与体系架构相关的页面状态位 */
PG_reserved, /* 表示该页面不可被换出,防止该page被交换到swap */
PG_private, /* 如果page中的private成员非空,则需要设置该标志,如果是pagecache, 包含fs-private data */
PG_private_2, /* 如果是pagecache, 包含fs aux data */
PG_writeback, /* 页面正在回写 */
PG_head, /* A head page */
PG_swapcache, /* 表示该page处于swap cache中 */
PG_mappedtodisk, /* 表示page中的数据在后备存储器中有对应 */
PG_reclaim, /* 表示该page要被回收,决定要回收某个page后,需要设置该标志 */
PG_swapbacked, /* 该page的后备存储器是swap/ram,一般匿名页才可以回写swap分区 */
PG_unevictable, /* 该page被锁住,不能回收,并会出现在LRU_UNEVICTABLE链表中,它包括的几种page:ramdisk或ramfs使用的页、shm_locked、mlock锁定的页 */
#ifdef CONFIG_MMU
PG_mlocked, /* 该page在vma中被锁定,一般是通过系统调用mlock()锁定了一段内存 */
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PG_uncached, /* 该page是uncache的 */
#endif
#ifdef CONFIG_MEMORY_FAILURE
PG_hwpoison, /* hardware poisoned page. Don't touch */
#endif
#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
PG_young,
PG_idle,
#endif
__NR_PAGEFLAGS,
/* Filesystems */
PG_checked = PG_owner_priv_1,
/* Two page bits are conscripted by FS-Cache to maintain local caching
* state. These bits are set on pages belonging to the netfs's inodes
* when those inodes are being locally cached.
*/
PG_fscache = PG_private_2, /* page backed by cache */
/* XEN */
/* Pinned in Xen as a read-only pagetable page. */
PG_pinned = PG_owner_priv_1,
/* Pinned as part of domain save (see xen_mm_pin_all()). */
PG_savepinned = PG_dirty,
/* Has a grant mapping of another (foreign) domain's page. */
PG_foreign = PG_owner_priv_1,
/* SLOB */
PG_slob_free = PG_private,
/* Compound pages. Stored in first tail page's flags */
PG_double_map = PG_private_2,
/* non-lru isolated movable page */
PG_isolated = PG_reclaim,
};
内核定义了一些标准宏,用于检查页面是否设置了某个特定的标志位或者用于操作某些特定的标志位,这些宏的名称都有一定的规律,具体如下:
PageXXX():用于检查页面是否设置了PG_XXX标志位。如PageLRU(page)检查PG_lru标志位是否置位,PageSlab(page)检查PG_slab是否置位。
SetPageXXX():用于设置页中PG_XXX标志位,如SetPageDirty(page)设置页中PG_dirty标志位。
ClearPageXXX():用于无条件清除某个特定的标志位。
这些宏的实现都在include/linux/page-flags.h文件中定义。如下所示:
/*
* Macros to create function definitions for page flags
*/
#define TESTPAGEFLAG(uname, lname, policy) \
static __always_inline int Page##uname(struct page *page) \
{ return test_bit(PG_##lname, &policy(page, 0)->flags); }
#define SETPAGEFLAG(uname, lname, policy) \
static __always_inline void SetPage##uname(struct page *page) \
{ set_bit(PG_##lname, &policy(page, 1)->flags); }
#define CLEARPAGEFLAG(uname, lname, policy) \
static __always_inline void ClearPage##uname(struct page *page) \
{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define __SETPAGEFLAG(uname, lname, policy) \
static __always_inline void __SetPage##uname(struct page *page) \
{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
#define __CLEARPAGEFLAG(uname, lname, policy) \
static __always_inline void __ClearPage##uname(struct page *page) \
{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define TESTSETFLAG(uname, lname, policy) \
static __always_inline int TestSetPage##uname(struct page *page) \
{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
#define TESTCLEARFLAG(uname, lname, policy) \
static __always_inline int TestClearPage##uname(struct page *page) \
{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
#define PAGEFLAG(uname, lname, policy) \
TESTPAGEFLAG(uname, lname, policy) \
SETPAGEFLAG(uname, lname, policy) \
CLEARPAGEFLAG(uname, lname, policy)
#define __PAGEFLAG(uname, lname, policy) \
TESTPAGEFLAG(uname, lname, policy) \
__SETPAGEFLAG(uname, lname, policy) \
__CLEARPAGEFLAG(uname, lname, policy)
#define TESTSCFLAG(uname, lname, policy) \
TESTSETFLAG(uname, lname, policy) \
TESTCLEARFLAG(uname, lname, policy)
#define TESTPAGEFLAG_FALSE(uname) \
static inline int Page##uname(const struct page *page) { return 0; }
#define SETPAGEFLAG_NOOP(uname) \
static inline void SetPage##uname(struct page *page) { }
#define CLEARPAGEFLAG_NOOP(uname) \
static inline void ClearPage##uname(struct page *page) { }
#define __CLEARPAGEFLAG_NOOP(uname) \
static inline void __ClearPage##uname(struct page *page) { }
#define TESTSETFLAG_FALSE(uname) \
static inline int TestSetPage##uname(struct page *page) { return 0; }
#define TESTCLEARFLAG_FALSE(uname) \
static inline int TestClearPage##uname(struct page *page) { return 0; }
#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \
SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
#define TESTSCFLAG_FALSE(uname) \
TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
PAGEFLAG(Referenced, referenced, PF_HEAD)
TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
__SETPAGEFLAG(Referenced, referenced, PF_HEAD)
PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
__CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
TESTCLEARFLAG(Active, active, PF_HEAD)
__PAGEFLAG(Slab, slab, PF_NO_TAIL)
__PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL)
PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */
/* Xen */
PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND)
TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND)
PAGEFLAG(SavePinned, savepinned, PF_NO_COMPOUND);
PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND);
PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
__CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
__CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
__SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
/*
* Private page markings that may be used by the filesystem that owns the page
* for its own purposes.
* - PG_private and PG_private_2 cause releasepage() and co to be invoked
*/
PAGEFLAG(Private, private, PF_ANY) __SETPAGEFLAG(Private, private, PF_ANY)
__CLEARPAGEFLAG(Private, private, PF_ANY)
PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY)
PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
/*
* Only test-and-set exist for PG_writeback. The unconditional operators are
* risky: they bypass page accounting.
*/
TESTPAGEFLAG(Writeback, writeback, PF_NO_COMPOUND)
TESTSCFLAG(Writeback, writeback, PF_NO_COMPOUND)
PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
/* PG_readahead is only used for reads; PG_reclaim is only for writes */
PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL)
PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND)
#ifdef CONFIG_HIGHMEM
/*
* Must use a macro here due to header dependency issues. page_zone() is not
* available at this point.
*/
#define PageHighMem(__p) is_highmem_idx(page_zonenum(__p))
#else
PAGEFLAG_FALSE(HighMem)
#endif
#ifdef CONFIG_SWAP
PAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
#else
PAGEFLAG_FALSE(SwapCache)
#endif
PAGEFLAG(Unevictable, unevictable, PF_HEAD)
__CLEARPAGEFLAG(Unevictable, unevictable, PF_HEAD)
TESTCLEARFLAG(Unevictable, unevictable, PF_HEAD)
#ifdef CONFIG_MMU
PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
__CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL)
#else
PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
TESTSCFLAG_FALSE(Mlocked)
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND)
#else
PAGEFLAG_FALSE(Uncached)
#endif
#ifdef CONFIG_MEMORY_FAILURE
PAGEFLAG(HWPoison, hwpoison, PF_ANY)
TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
#define __PG_HWPOISON (1UL << PG_hwpoison)
#else
PAGEFLAG_FALSE(HWPoison)
#define __PG_HWPOISON 0
#endif
#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
TESTPAGEFLAG(Young, young, PF_ANY)
SETPAGEFLAG(Young, young, PF_ANY)
TESTCLEARFLAG(Young, young, PF_ANY)
PAGEFLAG(Idle, idle, PF_ANY)
#endif