page compaction 基础知识

一、 查看当前系统当前碎片化的情况

  • cat /proc/buddyinfo
    This file is used primarily for diagnosing memory fragmentation issues. **cat

  • /proc/pagetypeinfo
    The pagetypinfo begins with information on the size of a page block. It
    then gives the same type of information as buddyinfo except broken down
    by migrate-type and finishes with details on how many page blocks of each
    type exist.

二、内核将已分配页的3种不同类型

  • 不可移动页 : 内核分配的大多数内存属于该类别
  • 可移动页 : 用户态申请
  • 可回收页 : 文件系统的cache

分别介绍三种页的实现

  1. 不可移动页 — 在内存中有固定位置, 不能移动到其他地方.
  2. 可移动页 — 可以随意地移动.
  3. 可回收页 — 不能直接移动, 但可以删除, 其内容可以从某些源重新生成.

用户态使用malloc。在访问的时候会触发缺页终端,

/*
 * We enter with non-exclusive mmap_sem (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
 * We return with mmap_sem still held, but pte unmapped and unlocked.
 */
static int do_anonymous_page(struct fault_env *fe)
{
	struct vm_area_struct *vma = fe->vma;
	struct mem_cgroup *memcg;
	struct page *page;
	pte_t entry;
	page = alloc_zeroed_user_highpage_movable(vma, fe->address);
}

/**
 * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move
 * @vma: The VMA the page is to be allocated for
 * @vaddr: The virtual address the page will be inserted into
 *
 * This function will allocate a page for a VMA that the caller knows will
 * be able to migrate in the future using move_pages() or reclaimed
 */
static inline struct page *
alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
					unsigned long vaddr)
{
	return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr);
}

可以发现,通过匿名页处理后得到的是 __GFP_MOVABLE ,是可以移动的页。

三、fallback机制 (内核申请的页也未必都是不可移动的)

/*
 * This array describes the order lists are fallen back to when
 * the free lists for the desirable migrate type are depleted
 */
static int fallbacks[MIGRATE_TYPES][4] = {
    //  分配不可移动页失败的备用列表
	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_TYPES },
	//  分配可回收页失败时的备用列表
	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_TYPES },
	//  分配可移动页失败时的备用列表
	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
#ifdef CONFIG_CMA
	[MIGRATE_CMA]         = { MIGRATE_TYPES }, /* Never used */
#endif
#ifdef CONFIG_MEMORY_ISOLATION
	[MIGRATE_ISOLATE]     = { MIGRATE_TYPES }, /* Never used */
#endif
};

如果内核申请不可移动的页申请不到,就从可回收链表里拿,拿不到再从可移动链表里去拿。
用户空间申请的都是可移动的,直接从可移动的里面去拿,(有CMA的话,如果可移动页如果没有了,先从CMA里找,)如果找不到就从fallback链表找:先找可回收的,再找不可移动的。

四、 pageblock 作用
pageblock -migrate 是一起作用的。

/*
 * Isolate all pages that can be migrated from the first suitable block,
 * starting at the block pointed to by the migrate scanner pfn within
 * compact_control.
 */
static isolate_migrate_t isolate_migratepages(struct zone *zone,
					struct compact_control *cc)
{
	unsigned long block_start_pfn;
	unsigned long block_end_pfn;
	unsigned long low_pfn;
	struct page *page;
	const isolate_mode_t isolate_mode =
		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);

	/*
	 * Start at where we last stopped, or beginning of the zone as
	 * initialized by compact_zone()
	 */
	low_pfn = cc->migrate_pfn;
	block_start_pfn = pageblock_start_pfn(low_pfn);
	if (block_start_pfn < zone->zone_start_pfn)
		block_start_pfn = zone->zone_start_pfn;

	/* Only scan within a pageblock boundary */
	block_end_pfn = pageblock_end_pfn(low_pfn);

	/*
	 * Iterate over whole pageblocks until we find the first suitable.
	 * Do not cross the free scanner.
	 */
	for (; block_end_pfn <= cc->free_pfn;
			low_pfn = block_end_pfn,
			block_start_pfn = block_end_pfn,
			block_end_pfn += pageblock_nr_pages) {

		/*
		 * This can potentially iterate a massively long zone with
		 * many pageblocks unsuitable, so periodically check if we
		 * need to schedule, or even abort async compaction.
		 */
		if (!(low_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages))
						&& compact_should_abort(cc))
			break;

		page = pageblock_pfn_to_page(block_start_pfn, block_end_pfn,
									zone);
		if (!page)
			continue;

		/* If isolation recently failed, do not retry */
		if (!isolation_suitable(cc, page))
			continue;

		/*
		 * For async compaction, also only scan in MOVABLE blocks.
		 * Async compaction is optimistic to see if the minimum amount
		 * of work satisfies the allocation.
		 */
		if (!suitable_migration_source(cc, page))
			continue;

		/* Perform the isolation */
		low_pfn = isolate_migratepages_block(cc, low_pfn,
						block_end_pfn, isolate_mode);

		if (!low_pfn || cc->contended)
			return ISOLATE_ABORT;

		/*
		 * Either we isolated something and proceed with migration. Or
		 * we failed and compact_zone should decide if we should
		 * continue or not.
		 */
		break;
	}

	/* Record where migration scanner will be restarted. */
	cc->migrate_pfn = low_pfn;

	return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
}
for (; block_end_pfn <= cc->free_pfn;
		low_pfn = block_end_pfn,
		block_start_pfn = block_end_pfn,
		block_end_pfn += pageblock_nr_pages) 

可以发现以 pageblock 为单位,扫描可移动页,最终把可移动的页添加到struct compact_control结构中的migratepages链表中。

/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
#define pageblock_order     (MAX_ORDER-1)
#define pageblock_nr_pages  (1UL << pageblock_order)

由于每个迁移链表都应该有适当数量的内存,内核需要定义”适当”的概念. 这是通过两个全局变量pageblock_order和pageblock_nr_pages提供的. pageblock 就是为了这个而存在。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值