free_pages是伙伴算法释放内存过程,有两种情形,
一种是释放单个page到当前cpu cache区pcp中;
另一种是将内存释放到伙伴系统中;
具体看下面代码:
void free_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr));
__free_pages(virt_to_page((void *)addr), order);
}
}
void __free_pages(struct page *page, unsigned int order)
{
//先判断page是否有在使用中
if (put_page_testzero(page)) {
if (order == 0)
free_hot_cold_page(page, 0);
else
__free_pages_ok(page, order);
}
}
接下来先看释放单个page情形,释放时都是按照hot page进行释放,
order大于0的时候,通过下面过程进行内存回收:
static void __free_pages_ok(struct page *page, unsigned int order)
{
unsigned long flags;
int migratetype;
if (!free_pages_prepare(page, order))
return;
local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
migratetype = get_pageblock_migratetype(page);
set_freepage_migratetype(page, migratetype);
free_one_page(page_zone(page), page, order, migratetype);
local_irq_restore(flags);
}
其中核心函数也是 __free_one_page(page, zone, order, migratetype);
接下来重点介绍下这个函数:
一种是释放单个page到当前cpu cache区pcp中;
另一种是将内存释放到伙伴系统中;
具体看下面代码:
void free_pages(unsigned long addr, unsigned int order)
{
if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr));
__free_pages(virt_to_page((void *)addr), order);
}
}
void __free_pages(struct page *page, unsigned int order)
{
//先判断page是否有在使用中
if (put_page_testzero(page)) {
if (order == 0)
free_hot_cold_page(page, 0);
else
__free_pages_ok(page, order);
}
}
接下来先看释放单个page情形,释放时都是按照hot page进行释放,
表示这个page极有可能很快会使用到;
void free_hot_cold_page(struct page *page, int cold)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
unsigned long flags;
int migratetype;
//释放page前的准备工作,检查是否满足释放条件等
if (!free_pages_prepare(page, 0))
return;
//获取page所在block的migrate type
migratetype = get_pageblock_migratetype(page);
set_freepage_migratetype(page, migratetype);
local_irq_save(flags);
__count_vm_event(PGFREE);
/*
* We only track unmovable, reclaimable and movable on pcp lists.
* Free ISOLATE pages back to the allocator because they are being
* offlined but treat RESERVE as movable pages so we can get those
* areas back if necessary. Otherwise, we may have to free
* excessively into the page allocator
*/
if (migratetype >= MIGRATE_PCPTYPES) {
if (unlikely(is_migrate_isolate(migratetype))) {
free_one_page(zone, page, 0, migratetype);
goto out;
}
migratetype = MIGRATE_MOVABLE;
}
//获取当前cpu pcp list,对于cold page,将其加入list尾部,hot page加入list 头部,便于优先使用
pcp = &this_cpu_ptr(zone->pageset)->pcp;
if (cold)
list_add_tail(&page->lru, &pcp->lists[migratetype]);
else
list_add(&page->lru, &pcp->lists[migratetype]);
//pcp list中page总数量加1,如果pcp中缓冲页面超过限制,就要释放batch个页面到伙伴系统中
pcp->count++;
if (pcp->count >= pcp->high) {//free batch pcp pages to
free_pcppages_bulk(zone, pcp->batch, pcp);
pcp->count -= pcp->batch;
}
out:
local_irq_restore(flags);
}
static void free_pcppages_bulk(struct zone *zone, int count,
struct per_cpu_pages *pcp)
{
int migratetype = 0;
int batch_free = 0;
int to_free = count;
spin_lock(&zone->lock);
zone->pages_scanned = 0;
while (to_free) {
struct page *page;
struct list_head *list;
/*
* Remove pages from lists in a round-robin fashion. A
* batch_free count is maintained that is incremented when an
* empty list is encountered. This is so more pages are freed
* off fuller lists instead of spinning excessively around empty
* lists
*/
do {
batch_free++;
if (++migratetype == MIGRATE_PCPTYPES)
migratetype = 0;
list = &pcp->lists[migratetype];
} while (list_empty(list));
/* This is the only non-empty list. Free them all. */
if (batch_free == MIGRATE_PCPTYPES)
batch_free = to_free;
do {
int mt; /* migratetype of the to-be-freed page */
page = list_entry(list->prev, struct page, lru);
/* must delete as __free_one_page list manipulates */
list_del(&page->lru);
mt = get_freepage_migratetype(page);
if (unlikely(has_isolate_pageblock(zone)))
mt = get_pageblock_migratetype(page);
/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
//将每个page通过下面的函数释放到buddy system中
__free_one_page(page, zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
} while (--to_free && --batch_free && !list_empty(list));
}
spin_unlock(&zone->lock);
}
order大于0的时候,通过下面过程进行内存回收:
static void __free_pages_ok(struct page *page, unsigned int order)
{
unsigned long flags;
int migratetype;
if (!free_pages_prepare(page, order))
return;
local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
migratetype = get_pageblock_migratetype(page);
set_freepage_migratetype(page, migratetype);
free_one_page(page_zone(page), page, order, migratetype);
local_irq_restore(flags);
}
其中核心函数也是 __free_one_page(page, zone, order, migratetype);
接下来重点介绍下这个函数:
/*
page:指向释放的页框块的首个page
zone:page所属zone区
order:当前page的order
页面回收是分配的逆过程,首先查找当前要释放的内存块同order是否能跟临近的伙伴合并,
如果能合并的话,再进一步查找order+1伙伴,确认能否合并为一个order+2的内存块,重复
这样的过程,直到不能继续合并;
如果不能合并的话,直接将要释放的pages加入到对应order的伙伴系统中;
注意查找伙伴系统的条件,比如某个order下四个内存区 A -- B -- C -- D
AB,CD称之为伙伴,BC不能称为伙伴,因此查找伙伴index算法为
page_idx ^ (1 << order);
__find_buddy_index 函数查找对应的伙伴
page_is_buddy 判断两个内存块是否为伙伴,0 is not buddy, 1 is buddy
*/
static inline void __free_one_page(struct page *page,
struct zone *zone, unsigned int order,
int migratetype)
{
unsigned long page_idx;
unsigned long combined_idx;
unsigned long uninitialized_var(buddy_idx);
struct page *buddy;
unsigned int max_order;
//max_order=MAX_ORDER
max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
VM_BUG_ON(!zone_is_initialized(zone));
if (unlikely(PageCompound(page)))
if (unlikely(destroy_compound_page(page, order)))
return;
VM_BUG_ON(migratetype == -1);
if (likely(!is_migrate_isolate(migratetype)))
__mod_zone_freepage_state(zone, 1 << order, migratetype);
//计算page index
page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
VM_BUG_ON(page_idx & ((1 << order) - 1));
VM_BUG_ON(bad_range(zone, page));
continue_merging:
/*
首先从同order查找,能合并的话,进一步往高一阶order查找
*/
while (order < max_order - 1) {
//查找当前要释放的page的伙伴idx,先从同阶找buddy
buddy_idx = __find_buddy_index(page_idx, order);
buddy = page + (buddy_idx - page_idx);
if (!page_is_buddy(page, buddy, order))//page,buddy不能合并,跳转到done_merging
goto done_merging;
/*
* Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
* merge with it and move up one order.
*/
//当前order能合并,如果有开CONFIG_DEBUG_PAGEALLOC,进入if,否则进入else
if (page_is_guard(buddy)) {
clear_page_guard_flag(buddy);
set_page_private(page, 0);
__mod_zone_freepage_state(zone, 1 << order,
migratetype);
} else {
//先将找到的buddy从伙伴系统删除,
list_del(&buddy->lru);
zone->free_area[order].nr_free--;
if (is_migrate_cma(migratetype))
zone->free_area[order].nr_free_cma--;
rmv_page_order(buddy);
}
//接着将buddy跟page进行合并,计算合并后的page index
combined_idx = buddy_idx & page_idx;
page = page + (combined_idx - page_idx);
page_idx = combined_idx;
order++;
//最后order+1后重复这个过程
}
if (max_order < MAX_ORDER) {
/* If we are here, it means order is >= pageblock_order.
* We want to prevent merge between freepages on isolate
* pageblock and normal pageblock. Without this, pageblock
* isolation could cause incorrect freepage or CMA accounting.
*
* We don't want to hit this code for the more frequent
* low-order merging.
*/
if (unlikely(has_isolate_pageblock(zone))) {
int buddy_mt;
buddy_idx = __find_buddy_index(page_idx, order);
buddy = page + (buddy_idx - page_idx);
buddy_mt = get_pageblock_migratetype(buddy);
if (migratetype != buddy_mt
&& (is_migrate_isolate(migratetype) ||
is_migrate_isolate(buddy_mt)))
goto done_merging;
}
max_order++;
goto continue_merging;
}
done_merging:
//重新设定这块内存order
set_page_order(page, order);
/*
* If this is not the largest possible page, check if the buddy
* of the next-highest order is free. If it is, it's possible
* that pages are being freed that will coalesce soon. In case,
* that is happening, add the free page to the tail of the list
* so it's less likely to be used soon and more likely to be merged
* as a higher order page
*/
//page跟buddy能合并的情况下,检查能否跟更高一阶order合并
if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
struct page *higher_page, *higher_buddy;
combined_idx = buddy_idx & page_idx;//合并后的首个page idx
higher_page = page + (combined_idx - page_idx); //合并后的page
buddy_idx = __find_buddy_index(combined_idx, order + 1);
higher_buddy = higher_page + (buddy_idx - combined_idx);
if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
//这个是将page,buddy加入order free_list尾部,这样跟order+1可以link起来
list_add_tail(&page->lru,
&zone->free_area[order].free_list[migratetype]);
goto out;
}
}
//将page插入到对应order free_list头部
list_add(&page->lru, &zone->free_area[order].free_list[migratetype]);
out:
zone->free_area[order].nr_free++;
if (is_migrate_cma(migratetype))
zone->free_area[order].nr_free_cma++;
}