Linux源码剖析struct page结构体flags成员

概述

struct page是mm种最核心的结构体之一,可以说整个内存管理就是围绕page展开的,不同场景下page的状态各有不同,page->flags标志位是描述page状态的重要成员,定义在include/linux/page-flags.h:

enum pageflags {
    PG_locked,      /* Page is locked. Don't touch. */
    //跟page reclaim的二次机会法有关
    PG_referenced,
    //page缓存内存和磁盘数据一致;或者最新的需要写回磁盘的数据
    PG_uptodate,
    //代表是脏页
    PG_dirty,
    //page在lru链表中
    PG_lru,
    //page在active lru链表中
    PG_active,

    PG_workingset,
    PG_waiters,     /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
    //IO错误
    PG_error,
    //page是对应的是slab内存
    PG_slab,
    PG_owner_priv_1,    /* Owner use. If pagecache, fs may use*/
    PG_arch_1,
    //不能换出
    PG_reserved,
    PG_private,     /* If pagecache, has fs-private data */
    PG_private_2,       /* If pagecache, has fs aux data */
    //正在回写
    PG_writeback,       /* Page is under writeback */
    PG_head,        /* A head page */
    PG_mappedtodisk,    /* Has blocks allocated on-disk */
    //马上开始回收,回收前设置
    PG_reclaim,     /* To be reclaimed asap */
    //匿名页和shmem page设置该条件
    PG_swapbacked,      /* Page is backed by RAM/swap */
    PG_unevictable,     /* Page is "unevictable"  */
#ifdef CONFIG_MMU
    //被mlock了
    PG_mlocked,     /* Page is vma mlocked */
#endif

#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
    PG_young,
    PG_idle,
#endif
};
PG_locked

表示page被lock,上锁之后其他等待该lock的调用会睡眠等待,主要是用于page的竞态保护,内核提供lock_page/trylock_page/unlock_page相关函数。

trylock_page:检测PG_locked flag,如果已经设置返回false,否则设置返回true。

lock_page: 先检测是否上锁,如果trylock_page返回false代表已经设置过了,那么调用进__lock_page会睡眠等待,如果未设置过,那么trylock_page直接设置返回。

 unlock_page: clear掉PG_locked flags,唤醒等待的进程

 PG_Dirty

到底什么哪些类型的页面,什么场景会设置脏页?

设置:只要写回磁盘或者交换分区(包括zram压缩方式的page)都会设置PG_Dirty,比如要回收的匿名页(add_to_swap中设置PG_Dirty),shmem页面,或者file-back的页面。因为如果不设置PG_Dirty标志(真正clean的page除外)就无法pageout写回磁盘或者交换区(包括zram)。

清除:通常准备写入磁盘或者交换分区前clear,见下面的pageout函数。


/*
 * shrink_page_list() returns the number of reclaimed pages
 */
static unsigned int shrink_page_list(struct list_head *page_list,
				     struct pglist_data *pgdat,
				     struct scan_control *sc,
				     enum ttu_flags ttu_flags,
				     struct reclaim_stat *stat,
				     bool ignore_references)
{
	LIST_HEAD(ret_pages);
	LIST_HEAD(free_pages);
	unsigned int nr_reclaimed = 0;
	unsigned int pgactivate = 0;

	memset(stat, 0, sizeof(*stat));
	cond_resched();

	while (!list_empty(page_list)) {
        ...

		if (PageDirty(page)) {
            ...
			try_to_unmap_flush_dirty();
			switch (pageout(page, mapping)) {
            ...
		}

	return nr_reclaimed;
}
后台回写dirty page clear PG_dirty的调用栈:
#0  clear_page_dirty_for_io (page=0xffffea0000036a40) at mm/page-writeback.c:2663
#1  0xffffffff81578006 in mpage_submit_page (mpd=0xffff888004a3f7a0, page=0xffffea0000036a40) at fs/ext4/inode.c:2061
#2  0xffffffff81579c15 in mpage_map_and_submit_buffers (mpd=0xffff888004a3f7a0) at fs/ext4/inode.c:2329
#3  0xffffffff815837b3 in mpage_map_and_submit_extent (give_up_on_write=<optimized out>, mpd=<optimized out>, handle=<optimized out>) at fs/ext4/inode.c:2468
#4  ext4_writepages (mapping=<optimized out>, wbc=<optimized out>) at fs/ext4/inode.c:2781
#5  0xffffffff813433da in do_writepages (mapping=0xffff888000c827e0, wbc=0xffff888004a3fa60) at mm/page-writeback.c:2352
#6  0xffffffff8148d3f5 in __writeback_single_inode (inode=0xffff888000c82668, wbc=0xffff888004a3fa60) at fs/fs-writeback.c:1461
#7  0xffffffff8148dc9c in writeback_sb_inodes (sb=<optimized out>, wb=0xffff8880044a6060, work=0xffff888004a3fd50) at fs/fs-writeback.c:1721
#8  0xffffffff8148e12f in __writeback_inodes_wb (wb=0xffff8880044a6060, work=0xffff888004a3fd50) at fs/fs-writeback.c:1790
#9  0xffffffff8148e609 in wb_writeback (wb=0xffff8880044a6060, work=0xffff888004a3fd50) at fs/fs-writeback.c:1896
#10 0xffffffff81490495 in wb_check_background_flush (wb=<optimized out>) at fs/fs-writeback.c:1964
PG_Writeback

表示page正在回写,向swap分区写入和文件系统中向磁盘写入都会设置该标志位。一般调用set_page_writeback函数设置。一般在向块设备层submit io前设置,io完成取消。

swap分区写入场景举例:

设置writeback:

int __swap_writepage(struct page *page, struct writeback_control *wbc,
        bio_end_io_t end_write_func)
{
    ...
    set_page_writeback(page);
    unlock_page(page);
    submit_bio(bio);
out:
    return ret;
}

IO完成取消writeback标志:

 

普通文件设置PG_writeback是fs/ext4/page-io.c中:ext4_bio_write_page

PG_Reclaim

一般写回磁盘或者交换分区前设置,回写成功clear掉标志,ClearPageReclaim见上面的end_page_writeback,不论普通文件系统中文件会写磁盘,还是回写交换分区,io完成都会回调该函数,然后由于回写之前设置了PG_Reclaim,这里回写完成就会清理掉。

设置代码:


/*
 * pageout is called by shrink_page_list() for each dirty page.
 * Calls ->writepage().
 */
static pageout_t pageout(struct page *page, struct address_space *mapping)
{
	/*
	 * If the page is dirty, only perform writeback if that write
	 * will be non-blocking.  To prevent this allocation from being
	 * stalled by pagecache activity.  But note that there may be
	 * stalls if we need to run get_block().  We could test
	 * PagePrivate for that.
	 *
	 * If this process is currently in __generic_file_write_iter() against
	 * this page's queue, we can perform writeback even if that
	 * will block.
	 *
	 * If the page is swapcache, write it back even if that would
	 * block, for some throttling. This happens by accident, because
	 * swap_backing_dev_info is bust: it doesn't reflect the
	 * congestion state of the swapdevs.  Easy to fix, if needed.
	 */
	if (!is_page_cache_freeable(page))
		return PAGE_KEEP;
	if (!mapping) {
		/*
		 * Some data journaling orphaned pages can have
		 * page->mapping == NULL while being dirty with clean buffers.
		 */
		if (page_has_private(page)) {
			if (try_to_free_buffers(page)) {
				ClearPageDirty(page);
				pr_info("%s: orphaned page\n", __func__);
				return PAGE_CLEAN;
			}
		}
		return PAGE_KEEP;
	}
	if (mapping->a_ops->writepage == NULL)
		return PAGE_ACTIVATE;
	if (!may_write_to_inode(mapping->host))
		return PAGE_KEEP;

    //因为准备要回写磁盘或者交换分区了,清理PageDirty
	if (clear_page_dirty_for_io(page)) {
		int res;
		struct writeback_control wbc = {
			.sync_mode = WB_SYNC_NONE,
			.nr_to_write = SWAP_CLUSTER_MAX,
			.range_start = 0,
			.range_end = LLONG_MAX,
			.for_reclaim = 1,
		};

        //设置该标志,代表马上开始回收了
		SetPageReclaim(page);
		res = mapping->a_ops->writepage(page, &wbc);
		if (res < 0)
			handle_write_error(mapping, page, res);
		if (res == AOP_WRITEPAGE_ACTIVATE) {
			ClearPageReclaim(page);
			return PAGE_ACTIVATE;
		}

		if (!PageWriteback(page)) {
			/* synchronous write or broken a_ops? */
			ClearPageReclaim(page);
		}
		trace_mm_vmscan_writepage(page);
		inc_node_page_state(page, NR_VMSCAN_WRITE);
		return PAGE_SUCCESS;
	}

	return PAGE_CLEAN;
}
PG_uptodate

内核的注释:标识page的内容是否时”有效的“,一种情况就是read读取成功后会设置PG_uptodate:

* PG_uptodate tells whether the page's contents is valid.  When a read
 * completes, the page becomes uptodate, unless a disk I/O error happened.

关于read的情况参考:buffer_head数据结构_nginux的博客-CSDN博客

再文中最后分析read中断回调的时候,如果page中所有buffer_head都时uptodate的,那么就会调用setPageUptodate设置Page's PG_uptodate。

对于write的情况,只要数据写入pagecache就会设置uptodate,参考如下代码:

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值