linux free_pages 伙伴系统释放内存过程

free_pages是伙伴算法释放内存过程,有两种情形,
一种是释放单个page到当前cpu cache区pcp中;
另一种是将内存释放到伙伴系统中;


具体看下面代码:


void free_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr));
__free_pages(virt_to_page((void *)addr), order);
}
}


void __free_pages(struct page *page, unsigned int order)
{
//先判断page是否有在使用中
if (put_page_testzero(page)) {
if (order == 0)
free_hot_cold_page(page, 0);
else
__free_pages_ok(page, order);
}
}


接下来先看释放单个page情形,释放时都是按照hot page进行释放,

表示这个page极有可能很快会使用到;


void free_hot_cold_page(struct page *page, int cold)
{
	struct zone *zone = page_zone(page);
	struct per_cpu_pages *pcp;
	unsigned long flags;
	int migratetype;

	//释放page前的准备工作,检查是否满足释放条件等
	if (!free_pages_prepare(page, 0))
		return;
		
    //获取page所在block的migrate type
	migratetype = get_pageblock_migratetype(page);
	set_freepage_migratetype(page, migratetype);
	local_irq_save(flags);
	__count_vm_event(PGFREE);

	/*
	 * We only track unmovable, reclaimable and movable on pcp lists.
	 * Free ISOLATE pages back to the allocator because they are being
	 * offlined but treat RESERVE as movable pages so we can get those
	 * areas back if necessary. Otherwise, we may have to free
	 * excessively into the page allocator
	 */
	if (migratetype >= MIGRATE_PCPTYPES) {
		if (unlikely(is_migrate_isolate(migratetype))) {
			free_one_page(zone, page, 0, migratetype);
			goto out;
		}
		migratetype = MIGRATE_MOVABLE;
	}
	//获取当前cpu pcp list,对于cold page,将其加入list尾部,hot page加入list 头部,便于优先使用
	pcp = &this_cpu_ptr(zone->pageset)->pcp;
	if (cold)
		list_add_tail(&page->lru, &pcp->lists[migratetype]);
	else
		list_add(&page->lru, &pcp->lists[migratetype]);
	//pcp list中page总数量加1,如果pcp中缓冲页面超过限制,就要释放batch个页面到伙伴系统中
	pcp->count++;
	if (pcp->count >= pcp->high) {//free batch pcp pages to 
		free_pcppages_bulk(zone, pcp->batch, pcp);
		pcp->count -= pcp->batch;
	}

out:
	local_irq_restore(flags);
}


static void free_pcppages_bulk(struct zone *zone, int count,
					struct per_cpu_pages *pcp)
{
	int migratetype = 0;
	int batch_free = 0;
	int to_free = count;

	spin_lock(&zone->lock);
	zone->pages_scanned = 0;

	while (to_free) {
		struct page *page;
		struct list_head *list;

		/*
		 * Remove pages from lists in a round-robin fashion. A
		 * batch_free count is maintained that is incremented when an
		 * empty list is encountered.  This is so more pages are freed
		 * off fuller lists instead of spinning excessively around empty
		 * lists
		 */
		do {
			batch_free++;
			if (++migratetype == MIGRATE_PCPTYPES)
				migratetype = 0;
			list = &pcp->lists[migratetype];
		} while (list_empty(list));

		/* This is the only non-empty list. Free them all. */
		if (batch_free == MIGRATE_PCPTYPES)
			batch_free = to_free;

		do {
			int mt;	/* migratetype of the to-be-freed page */

			page = list_entry(list->prev, struct page, lru);
			/* must delete as __free_one_page list manipulates */
			list_del(&page->lru);
			mt = get_freepage_migratetype(page);
			if (unlikely(has_isolate_pageblock(zone)))
				mt = get_pageblock_migratetype(page);
			/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
			//将每个page通过下面的函数释放到buddy system中
			__free_one_page(page, zone, 0, mt);
			trace_mm_page_pcpu_drain(page, 0, mt);
		} while (--to_free && --batch_free && !list_empty(list));
	}
	spin_unlock(&zone->lock);
}

order大于0的时候,通过下面过程进行内存回收:


static void __free_pages_ok(struct page *page, unsigned int order)
{
unsigned long flags;
int migratetype;


if (!free_pages_prepare(page, order))
return;


local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
migratetype = get_pageblock_migratetype(page);
set_freepage_migratetype(page, migratetype);
free_one_page(page_zone(page), page, order, migratetype);
local_irq_restore(flags);
}


其中核心函数也是 __free_one_page(page, zone, order, migratetype);
接下来重点介绍下这个函数:


/*
page:指向释放的页框块的首个page
zone:page所属zone区
order:当前page的order

页面回收是分配的逆过程,首先查找当前要释放的内存块同order是否能跟临近的伙伴合并,
如果能合并的话,再进一步查找order+1伙伴,确认能否合并为一个order+2的内存块,重复
这样的过程,直到不能继续合并;
如果不能合并的话,直接将要释放的pages加入到对应order的伙伴系统中;

注意查找伙伴系统的条件,比如某个order下四个内存区  A -- B -- C -- D 
AB,CD称之为伙伴,BC不能称为伙伴,因此查找伙伴index算法为
page_idx ^ (1 << order);

__find_buddy_index 函数查找对应的伙伴
page_is_buddy 判断两个内存块是否为伙伴,0 is not buddy, 1 is buddy
*/
static inline void __free_one_page(struct page *page,
		struct zone *zone, unsigned int order,
		int migratetype)
{
	unsigned long page_idx;
	unsigned long combined_idx;
	unsigned long uninitialized_var(buddy_idx);
	struct page *buddy;
	unsigned int max_order;

	//max_order=MAX_ORDER
	max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);

	VM_BUG_ON(!zone_is_initialized(zone));

	if (unlikely(PageCompound(page)))
		if (unlikely(destroy_compound_page(page, order)))
			return;

	VM_BUG_ON(migratetype == -1);
	if (likely(!is_migrate_isolate(migratetype)))
		__mod_zone_freepage_state(zone, 1 << order, migratetype);
		
	//计算page index
	page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);

	VM_BUG_ON(page_idx & ((1 << order) - 1));
	VM_BUG_ON(bad_range(zone, page));

continue_merging:
	/* 
	首先从同order查找,能合并的话,进一步往高一阶order查找
	*/
	while (order < max_order - 1) {
		//查找当前要释放的page的伙伴idx,先从同阶找buddy
		buddy_idx = __find_buddy_index(page_idx, order);
		buddy = page + (buddy_idx - page_idx);
		if (!page_is_buddy(page, buddy, order))//page,buddy不能合并,跳转到done_merging
			goto done_merging;
		/*
		 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
		 * merge with it and move up one order.
		 */
		//当前order能合并,如果有开CONFIG_DEBUG_PAGEALLOC,进入if,否则进入else
		if (page_is_guard(buddy)) {
			clear_page_guard_flag(buddy);
			set_page_private(page, 0);
			__mod_zone_freepage_state(zone, 1 << order,
						  migratetype);
		} else {
		//先将找到的buddy从伙伴系统删除,
			list_del(&buddy->lru);
			zone->free_area[order].nr_free--;
			if (is_migrate_cma(migratetype))
				zone->free_area[order].nr_free_cma--;
			rmv_page_order(buddy);
		}
		//接着将buddy跟page进行合并,计算合并后的page index
		combined_idx = buddy_idx & page_idx;
		page = page + (combined_idx - page_idx);
		page_idx = combined_idx;
		order++;
		//最后order+1后重复这个过程
	}
	if (max_order < MAX_ORDER) {
		/* If we are here, it means order is >= pageblock_order.
		 * We want to prevent merge between freepages on isolate
		 * pageblock and normal pageblock. Without this, pageblock
		 * isolation could cause incorrect freepage or CMA accounting.
		 *
		 * We don't want to hit this code for the more frequent
		 * low-order merging.
		 */
		if (unlikely(has_isolate_pageblock(zone))) {
			int buddy_mt;

			buddy_idx = __find_buddy_index(page_idx, order);
			buddy = page + (buddy_idx - page_idx);
			buddy_mt = get_pageblock_migratetype(buddy);

			if (migratetype != buddy_mt
					&& (is_migrate_isolate(migratetype) ||
					is_migrate_isolate(buddy_mt)))
				goto done_merging;
		}
		max_order++;
		goto continue_merging;
	}

done_merging:

	//重新设定这块内存order
	set_page_order(page, order);

	/*
	 * If this is not the largest possible page, check if the buddy
	 * of the next-highest order is free. If it is, it's possible
	 * that pages are being freed that will coalesce soon. In case,
	 * that is happening, add the free page to the tail of the list
	 * so it's less likely to be used soon and more likely to be merged
	 * as a higher order page
	 */
	 //page跟buddy能合并的情况下,检查能否跟更高一阶order合并
	if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
		struct page *higher_page, *higher_buddy;
		combined_idx = buddy_idx & page_idx;//合并后的首个page idx
		higher_page = page + (combined_idx - page_idx); //合并后的page
		buddy_idx = __find_buddy_index(combined_idx, order + 1);
		higher_buddy = higher_page + (buddy_idx - combined_idx);
		if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
		//这个是将page,buddy加入order free_list尾部,这样跟order+1可以link起来
			list_add_tail(&page->lru,
				&zone->free_area[order].free_list[migratetype]);
			goto out;
		}
	}
	//将page插入到对应order free_list头部
	list_add(&page->lru, &zone->free_area[order].free_list[migratetype]);
out:
	zone->free_area[order].nr_free++;
	if (is_migrate_cma(migratetype))
		zone->free_area[order].nr_free_cma++;
}


  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值