linux free 连续内存,Linux-3.14.12内存管理笔记【连续内存分配器（CMA）】

最新推荐文章于 2023-07-24 22:59:08 发布

鑨鑨

最新推荐文章于 2023-07-24 22:59:08 发布

阅读量552

点赞数

文章标签： linux free 连续内存

根据git的合入记录，CMA(Contiguous Memory Allocator，连续内存分配器)是在内核3.5的版本引入，由三星的工程师开发实现的，用于DMA映射框架下提升连续大块内存的申请。

其实现主要是在系统引导时获取内存，并将内存设置为MIGRATE_CMA迁移类型，然后再将内存归还给系统。内核分配内存时，在CMA管理内存中仅允许申请可移动类型内存页面(movable pages)，例如DMA映射时不使用的页面缓存。而通过dma_alloc_from_contiguous()申请大块连续内存时，将会把这些可移动页面从CMA管理区中迁移出去，以便腾出足够的连续内存空间满足申请需要。由此，实现了任何时刻只要系统中有足够的内存空间，便可以申请得到大块连续内存。

先由其初始化开始分析，于/drivers/base/dma-contiguous.c代码文件中，可以找到其初始化函数cma_init_reserved_areas()，其通过core_initcall()注册到系统初始化中。

先看一下cma_init_reserved_areas()实现：

【file:/drivers/base/dma-contiguous.c】

static int __init cma_init_reserved_areas(void)

{

int i;

for (i = 0; i < cma_area_count; i++) {

int ret = cma_activate_area(&cma_areas[i]);

if (ret)

return ret;

}

return 0;

}

其主要是通过遍历cma_areas的CMA管理区信息，调用cma_activate_area()将各个区进行初始化。其中cma_areas信息来自于DMA初始化时：start_kernel()->setup_arch()-> dma_contiguous_reserve()读取来自cmdline的信息，然后通过dma_contiguous_reserve_area()进行内存预留和cma_areas内存信息设置。具体这里不做深入分析。

而继续cma_activate_area()的实现：

【file:/drivers/base/dma-contiguous.c】

static int __init cma_activate_area(struct cma *cma)

{

int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);

unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;

unsigned i = cma->count >> pageblock_order;

struct zone *zone;

cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);

if (!cma->bitmap)

return -ENOMEM;

WARN_ON_ONCE(!pfn_valid(pfn));

zone = page_zone(pfn_to_page(pfn));

do {

unsigned j;

base_pfn = pfn;

for (j = pageblock_nr_pages; j; --j, pfn++) {

WARN_ON_ONCE(!pfn_valid(pfn));

if (page_zone(pfn_to_page(pfn)) != zone)

return -EINVAL;

}

init_cma_reserved_pageblock(pfn_to_page(base_pfn));

} while (--i);

return 0;

}

该函数主要是对CMA管理区进行初始化，先是kzalloc()申请位图，然后以最高阶pageblock_order的页面数量pageblock_nr_pages为单位对该区的内存页面进行检验，确保该数量单位的内存页面都合法且同处于一个内存管理区，也就是保证至少有一个最高阶的pageblock_nr_pages数量的内存块会被初始化，如果不够该数量，则返回-EINVAL错误。

而里面具体初始化页面的函数为init_cma_reserved_pageblock()：

【file:/drivers/base/dma-contiguous.c】

/* Free whole pageblock and set its migration type to MIGRATE_CMA. */

void __init init_cma_reserved_pageblock(struct page *page)

{

unsigned i = pageblock_nr_pages;

struct page *p = page;

do {

__ClearPageReserved(p);

set_page_count(p, 0);

} while (++p, --i);

set_pageblock_migratetype(page, MIGRATE_CMA);

if (pageblock_order >= MAX_ORDER) {

i = pageblock_nr_pages;

p = page;

do {

set_page_refcounted(p);

__free_pages(p, MAX_ORDER - 1);

p += MAX_ORDER_NR_PAGES;

} while (i -= MAX_ORDER_NR_PAGES);

} else {

set_page_refcounted(page);

__free_pages(page, pageblock_order);

}

adjust_managed_page_count(page, pageblock_nr_pages);

}

该函数先是set_page_count()将页面计数初始化，接着set_pageblock_migratetype()将页面设置为MIGRATE_CMA类型，然后set_page_refcounted()重置页面引用计数后通过__free_pages()将内存释放至伙伴管理算法中，最终是挂到了zone->free_area[order].free_list[MIGRATE_CMA](这里的order是pageblock_order或MAX_ORDER-1)，最后通过adjust_managed_page_count()修改内存管理页面数量。

初始化基本上就这样了。

而CMA的内存分配则是通过dma_generic_alloc_coherent()进行分配的。

【file:/arch/x86/kernel/pci-dma.c】

void *dma_generic_alloc_coherent(struct device *dev, size_t size,

dma_addr_t *dma_addr, gfp_t flag,

struct dma_attrs *attrs)

{

unsigned long dma_mask;

struct page *page;

unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;

dma_addr_t addr;

dma_mask = dma_alloc_coherent_mask(dev, flag);

flag |= __GFP_ZERO;

again:

page = NULL;

/* CMA can be used only in the context which permits sleeping */

if (flag & __GFP_WAIT)

page = dma_alloc_from_contiguous(dev, count, get_order(size));

/* fallback */

if (!page)

page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));

if (!page)

return NULL;

addr = page_to_phys(page);

if (addr + size > dma_mask) {

__free_pages(page, get_order(size));

if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {

flag = (flag & ~GFP_DMA32) | GFP_DMA;

goto again;

}

return NULL;

}

*dma_addr = addr;

return page_address(page);

}

如果希望从CMA管理区中获取内存，则分配标志flag需允许分配时休眠__GFP_WAIT。进而将通过dma_alloc_from_contiguous()获取到内存。

dma_alloc_from_contiguous()实现：

【file:/drivers/base/dma-contiguous.c】

/**

* dma_alloc_from_contiguous() - allocate pages from contiguous area

* @dev: Pointer to device for which the allocation is performed.

* @count: Requested number of pages.

* @align: Requested alignment of pages (in PAGE_SIZE order).

* This function allocates memory buffer for specified device. It uses

* device specific contiguous memory area if available or the default

* global one. Requires architecture specific get_dev_cma_area() helper

* function.

struct page *dma_alloc_from_contiguous(struct device *dev, int count,

unsigned int align)

{

unsigned long mask, pfn, pageno, start = 0;

struct cma *cma = dev_get_cma_area(dev);

struct page *page = NULL;

int ret;

if (!cma || !cma->count)

return NULL;

if (align > CONFIG_CMA_ALIGNMENT)

align = CONFIG_CMA_ALIGNMENT;

pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma,

count, align);

if (!count)

return NULL;

mask = (1 << align) - 1;

mutex_lock(&cma_mutex);

for (;;) {

pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,

start, count, mask);

if (pageno >= cma->count)

break;

pfn = cma->base_pfn + pageno;

ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);

if (ret == 0) {

bitmap_set(cma->bitmap, pageno, count);

page = pfn_to_page(pfn);

break;

} else if (ret != -EBUSY) {

break;

}

pr_debug("%s(): memory range at %p is busy, retrying\n",

__func__, pfn_to_page(pfn));

/* try again with a bit different memory target */

start = pageno + mask + 1;

}

mutex_unlock(&cma_mutex);

pr_debug("%s(): returned %p\n", __func__, page);

return page;

}

该函数通过dev_get_cma_area()获得设备使用的CMA管理区，然后通过bitmap_find_next_zero_area()查找到CMA管理区中合适大小的未被分配的页面空间，接着调用alloc_contig_range()尝试去分配该查找到的页面空间，如果查找到，则使用bitmap_set()将该空间的bitmap位图进行置位表示已被使用，完了pfn_to_page()通过页框号去得首页面的结构并返回。

其中bitmap_find_next_zero_area()的实现：

【file:/lib/bitmap.c】

* bitmap_find_next_zero_area - find a contiguous aligned zero area

* @map: The address to base the search on

* @size: The bitmap size in bits

* @start: The bitnumber to start searching at

* @nr: The number of zeroed bits we're looking for

* @align_mask: Alignment mask for zero area

* The @align_mask should be one less than a power of 2; the effect is that

* the bit offset of all zero areas this function finds is multiples of that

* power of 2. A @align_mask of 0 means no alignment is required.

unsigned long bitmap_find_next_zero_area(unsigned long *map,

unsigned long size,

unsigned long start,

unsigned int nr,

unsigned long align_mask)

{

unsigned long index, end, i;

again:

index = find_next_zero_bit(map, size, start);

/* Align allocation */

index = __ALIGN_MASK(index, align_mask);

end = index + nr;

if (end > size)

return end;

i = find_next_bit(map, end, index);

if (i < end) {

start = i + 1;

goto again;

}

return index;

}

该函数通过find_next_zero_bit()和find_next_bit()往返查找bit位置0与置1之间的空间，以期找到足够大的空间以实现空间分配的查找。

而alloc_contig_range()的实现：

【file:/mm/page_alloc.c】

/**

* alloc_contig_range() -- tries to allocate given range of pages

* @start: start PFN to allocate

* @end: one-past-the-last PFN to allocate

* @migratetype: migratetype of the underlaying pageblocks (either

* #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks

* in range must have the same migratetype and it must

* be either of the two.

* The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES

* aligned, however it's the caller's responsibility to guarantee that

* we are the only thread that changes migrate type of pageblocks the

* pages fall in.

* The PFN range must belong to a single zone.

* Returns zero on success or negative error code. On success all

* pages which PFN is in [start, end) are allocated for the caller and

* need to be freed with free_contig_range().

int alloc_contig_range(unsigned long start, unsigned long end,

unsigned migratetype)

{

unsigned long outer_start, outer_end;

int ret = 0, order;

struct compact_control cc = {

.nr_migratepages = 0,

.order = -1,

.zone = page_zone(pfn_to_page(start)),

.sync = true,

.ignore_skip_hint = true,

};

INIT_LIST_HEAD(&cc.migratepages);

* What we do here is we mark all pageblocks in range as

* MIGRATE_ISOLATE. Because pageblock and max order pages may

* have different sizes, and due to the way page allocator

* work, we align the range to biggest of the two pages so

* that page allocator won't try to merge buddies from

* different pageblocks and change MIGRATE_ISOLATE to some

* other migration type.

* Once the pageblocks are marked as MIGRATE_ISOLATE, we

* migrate the pages from an unaligned range (ie. pages that

* we are interested in). This will put all the pages in

* range back to page allocator as MIGRATE_ISOLATE.

* When this is done, we take the pages in range from page

* allocator removing them from the buddy system. This way

* page allocator will never consider using them.

* This lets us mark the pageblocks back as

* MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the

* aligned range but not in the unaligned, original range are

* put back to page allocator so that buddy can use them.

ret = start_isolate_page_range(pfn_max_align_down(start),

pfn_max_align_up(end), migratetype,

false);

if (ret)

return ret;

ret = __alloc_contig_migrate_range(&cc, start, end);

if (ret)

goto done;

* Pages from [start, end) are within a MAX_ORDER_NR_PAGES

* aligned blocks that are marked as MIGRATE_ISOLATE. What's

* more, all pages in [start, end) are free in page allocator.

* What we are going to do is to allocate all pages from

* [start, end) (that is remove them from page allocator).

* The only problem is that pages at the beginning and at the

* end of interesting range may be not aligned with pages that

* page allocator holds, ie. they can be part of higher order

* pages. Because of this, we reserve the bigger range and

* once this is done free the pages we are not interested in.

* We don't have to hold zone->lock here because the pages are

* isolated thus they won't get removed from buddy.

lru_add_drain_all();

drain_all_pages();

order = 0;

outer_start = start;

while (!PageBuddy(pfn_to_page(outer_start))) {

if (++order >= MAX_ORDER) {

ret = -EBUSY;

goto done;

}

outer_start &= ~0UL << order;

}

/* Make sure the range is really isolated. */

if (test_pages_isolated(outer_start, end, false)) {

pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n",

outer_start, end);

ret = -EBUSY;

goto done;

}

/* Grab isolated pages from freelists. */

outer_end = isolate_freepages_range(&cc, outer_start, end);

if (!outer_end) {

ret = -EBUSY;

goto done;

}

/* Free head and tail (if any) */

if (start != outer_start)

free_contig_range(outer_start, start - outer_start);

if (end != outer_end)

free_contig_range(end, outer_end - end);

done:

undo_isolate_page_range(pfn_max_align_down(start),

pfn_max_align_up(end), migratetype);

return ret;

}

该函数主要是用于分配指定页面号的连续内存空间，其特点是内存块不需要页面块或者内存页面阶对齐，而且需要由调用者保证单线程操作，所以在dma_alloc_from_contiguous()调用时是加了互斥锁做保护的，此外被分配的空间不允许跨内存管理区。

为了深入了解其动作，深入分析一下其调用的几个函数，先看一下start_isolate_page_range()：

【file:/mm/page_isolation.c】

* start_isolate_page_range() -- make page-allocation-type of range of pages

* to be MIGRATE_ISOLATE.

* @start_pfn: The lower PFN of the range to be isolated.

* @end_pfn: The upper PFN of the range to be isolated.

* @migratetype: migrate type to set in error recovery.

* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in

* the range will never be allocated. Any free pages and pages freed in the

* future will not be allocated again.

* start_pfn/end_pfn must be aligned to pageblock_order.

* Returns 0 on success and -EBUSY if any part of range cannot be isolated.

int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,

unsigned migratetype, bool skip_hwpoisoned_pages)

{

unsigned long pfn;

unsigned long undo_pfn;

struct page *page;

BUG_ON((start_pfn) & (pageblock_nr_pages - 1));

BUG_ON((end_pfn) & (pageblock_nr_pages - 1));

for (pfn = start_pfn;

pfn < end_pfn;

pfn += pageblock_nr_pages) {

page = __first_valid_page(pfn, pageblock_nr_pages);

if (page &&

set_migratetype_isolate(page, skip_hwpoisoned_pages)) {

undo_pfn = pfn;

goto undo;

}

return 0;

undo:

for (pfn = start_pfn;

pfn < undo_pfn;

pfn += pageblock_nr_pages)

unset_migratetype_isolate(pfn_to_page(pfn), migratetype);

return -EBUSY;

}

将页面类型设置为MIGRATE_ISOLATE意味着指定范围的空闲页面将不会被分配，值得注意的时候，这里的迁移类型变更和前面分析的页面迁移是一致的，都是基于pageblock_nr_pages为基数的页面个数做迁移的。

而里面调用的set_migratetype_isolate()：

【file:/mm/page_isolation.c】

int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)

{

struct zone *zone;

unsigned long flags, pfn;

struct memory_isolate_notify arg;

int notifier_ret;

int ret = -EBUSY;

zone = page_zone(page);

spin_lock_irqsave(&zone->lock, flags);

pfn = page_to_pfn(page);

arg.start_pfn = pfn;

arg.nr_pages = pageblock_nr_pages;

arg.pages_found = 0;

* It may be possible to isolate a pageblock even if the

* migratetype is not MIGRATE_MOVABLE. The memory isolation

* notifier chain is used by balloon drivers to return the

* number of pages in a range that are held by the balloon

* driver to shrink memory. If all the pages are accounted for

* by balloons, are free, or on the LRU, isolation can continue.

* Later, for example, when memory hotplug notifier runs, these

* pages reported as "can be isolated" should be isolated(freed)

* by the balloon driver through the memory notifier chain.

notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);

notifier_ret = notifier_to_errno(notifier_ret);

if (notifier_ret)

goto out;

* FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.

* We just check MOVABLE pages.

if (!has_unmovable_pages(zone, page, arg.pages_found,

skip_hwpoisoned_pages))

ret = 0;

* immobile means "not-on-lru" paes. If immobile is larger than

* removable-by-driver pages reported by notifier, we'll fail.

out:

if (!ret) {

unsigned long nr_pages;

int migratetype = get_pageblock_migratetype(page);

set_pageblock_migratetype(page, MIGRATE_ISOLATE);

nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);

__mod_zone_freepage_state(zone, -nr_pages, migratetype);

}

spin_unlock_irqrestore(&zone->lock, flags);

if (!ret)

drain_all_pages();

return ret;

}

由该函数可以看到，将页面设置为MIGRATE_ISOLATE类型时，其确保该空间范围内不存在不可以移动页面，同时其设置完页面类型后，通过move_freepages_block会将其从原来的页面类型链表中移除并挂入到MIGRATE_ISOLATE类型的链表中，移入MIGRATE_ISOLATE类型的页面将不会被分配出去。

start_isolate_page_range()完了如果没有异常状况会返回0，继而是调用__alloc_contig_migrate_range()：

【file:/mm/page_isolation.c】

/* [start, end) must belong to a single zone. */

static int __alloc_contig_migrate_range(struct compact_control *cc,

unsigned long start, unsigned long end)

{

/* This function is based on compact_zone() from compaction.c. */

unsigned long nr_reclaimed;

unsigned long pfn = start;

unsigned int tries = 0;

int ret = 0;

migrate_prep();

while (pfn < end || !list_empty(&cc->migratepages)) {

if (fatal_signal_pending(current)) {

ret = -EINTR;

break;

}

if (list_empty(&cc->migratepages)) {

cc->nr_migratepages = 0;

pfn = isolate_migratepages_range(cc->zone, cc,

pfn, end, true);

if (!pfn) {

ret = -EINTR;

break;

}

tries = 0;

} else if (++tries == 5) {

ret = ret < 0 ? ret : -EBUSY;

break;

}

nr_reclaimed = reclaim_clean_pages_from_list(cc->zone,

&cc->migratepages);

cc->nr_migratepages -= nr_reclaimed;

ret = migrate_pages(&cc->migratepages, alloc_migrate_target,

0, MIGRATE_SYNC, MR_CMA);

}

if (ret < 0) {

putback_movable_pages(&cc->migratepages);

return ret;

}

return 0;

}

该函数中调用的migrate_prep()主要是为了将LRU链表进行清空，以便内存页面更好地隔离出来。

其余的则主要是while循环处理非空闲的页，其中主要涉及函数有isolate_migratepages_range()、reclaim_clean_pages_from_list()和migrate_pages()。

先看一下isolate_migratepages_range()的实现：

【file:/mm/compaction.c】

/**

* isolate_migratepages_range() - isolate all migrate-able pages in range.

* @zone: Zone pages are in.

* @cc: Compaction control structure.

* @low_pfn: The first PFN of the range.

* @end_pfn: The one-past-the-last PFN of the range.

* @unevictable: true if it allows to isolate unevictable pages

* Isolate all pages that can be migrated from the range specified by

* [low_pfn, end_pfn). Returns zero if there is a fatal signal

* pending), otherwise PFN of the first page that was not scanned

* (which may be both less, equal to or more then end_pfn).

* Assumes that cc->migratepages is empty and cc->nr_migratepages is

* zero.

* Apart from cc->migratepages and cc->nr_migratetypes this function

* does not modify any cc's fields, in particular it does not modify

* (or read for that matter) cc->migrate_pfn.

unsigned long

isolate_migratepages_range(struct zone *zone, struct compact_control *cc,

unsigned long low_pfn, unsigned long end_pfn, bool unevictable)

{

unsigned long last_pageblock_nr = 0, pageblock_nr;

unsigned long nr_scanned = 0, nr_isolated = 0;

struct list_head *migratelist = &cc->migratepages;

isolate_mode_t mode = 0;

struct lruvec *lruvec;

unsigned long flags;

bool locked = false;

struct page *page = NULL, *valid_page = NULL;

bool skipped_async_unsuitable = false;

* Ensure that there are not too many pages isolated from the LRU

* list by either parallel reclaimers or compaction. If there are,

* delay for some time until fewer pages are isolated

while (unlikely(too_many_isolated(zone))) {

/* async migration should just abort */

if (!cc->sync)

return 0;

congestion_wait(BLK_RW_ASYNC, HZ/10);

if (fatal_signal_pending(current))

return 0;

}

/* Time to isolate some pages for migration */

cond_resched();

for (; low_pfn < end_pfn; low_pfn++) {

/* give a chance to irqs before checking need_resched() */

if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {

if (should_release_lock(&zone->lru_lock)) {

spin_unlock_irqrestore(&zone->lru_lock, flags);

locked = false;

}

* migrate_pfn does not necessarily start aligned to a

* pageblock. Ensure that pfn_valid is called when moving

* into a new MAX_ORDER_NR_PAGES range in case of large

* memory holes within the zone

if ((low_pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) {

if (!pfn_valid(low_pfn)) {

low_pfn += MAX_ORDER_NR_PAGES - 1;

continue;

}

if (!pfn_valid_within(low_pfn))

continue;

nr_scanned++;

* Get the page and ensure the page is within the same zone.

* See the comment in isolate_freepages about overlapping

* nodes. It is deliberate that the new zone lock is not taken

* as memory compaction should not move pages between nodes.

page = pfn_to_page(low_pfn);

if (page_zone(page) != zone)

continue;

if (!valid_page)

valid_page = page;

/* If isolation recently failed, do not retry */

pageblock_nr = low_pfn >> pageblock_order;

if (!isolation_suitable(cc, page))

goto next_pageblock;

* Skip if free. page_order cannot be used without zone->lock

* as nothing prevents parallel allocations or buddy merging.

if (PageBuddy(page))

continue;

* For async migration, also only scan in MOVABLE blocks. Async

* migration is optimistic to see if the minimum amount of work

* satisfies the allocation

if (!cc->sync && last_pageblock_nr != pageblock_nr &&

!migrate_async_suitable(get_pageblock_migratetype(page))) {

cc->finished_update_migrate = true;

skipped_async_unsuitable = true;

goto next_pageblock;

}

* Check may be lockless but that's ok as we recheck later.

* It's possible to migrate LRU pages and balloon pages

* Skip any other type of page

if (!PageLRU(page)) {

if (unlikely(balloon_page_movable(page))) {

if (locked && balloon_page_isolate(page)) {

/* Successfully isolated */

cc->finished_update_migrate = true;

list_add(&page->lru, migratelist);

cc->nr_migratepages++;

nr_isolated++;

goto check_compact_cluster;

}

continue;

}

* PageLRU is set. lru_lock normally excludes isolation

* splitting and collapsing (collapsing has already happened

* if PageLRU is set) but the lock is not necessarily taken

* here and it is wasteful to take it just to check transhuge.

* Check TransHuge without lock and skip the whole pageblock if

* it's either a transhuge or hugetlbfs page, as calling

* compound_order() without preventing THP from splitting the

* page underneath us may return surprising results.

if (PageTransHuge(page)) {

if (!locked)

goto next_pageblock;

low_pfn += (1 << compound_order(page)) - 1;

continue;

}

/* Check if it is ok to still hold the lock */

locked = compact_checklock_irqsave(&zone->lru_lock, &flags,

locked, cc);

if (!locked || fatal_signal_pending(current))

break;

/* Recheck PageLRU and PageTransHuge under lock */

if (!PageLRU(page))

continue;

if (PageTransHuge(page)) {

low_pfn += (1 << compound_order(page)) - 1;

continue;

}

if (!cc->sync)

mode |= ISOLATE_ASYNC_MIGRATE;

if (unevictable)

mode |= ISOLATE_UNEVICTABLE;

lruvec = mem_cgroup_page_lruvec(page, zone);

/* Try isolate the page */

if (__isolate_lru_page(page, mode) != 0)

continue;

VM_BUG_ON_PAGE(PageTransCompound(page), page);

/* Successfully isolated */

cc->finished_update_migrate = true;

del_page_from_lru_list(page, lruvec, page_lru(page));

list_add(&page->lru, migratelist);

cc->nr_migratepages++;

nr_isolated++;

check_compact_cluster:

/* Avoid isolating too much */

if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {

++low_pfn;

break;

}

continue;

next_pageblock:

low_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages) - 1;

last_pageblock_nr = pageblock_nr;

}

acct_isolated(zone, locked, cc);

if (locked)

spin_unlock_irqrestore(&zone->lru_lock, flags);

* Update the pageblock-skip information and cached scanner pfn,

* if the whole pageblock was scanned without isolating any page.

* This is not done when pageblock was skipped due to being unsuitable

* for async compaction, so that eventual sync compaction can try.

if (low_pfn == end_pfn && !skipped_async_unsuitable)

update_pageblock_skip(cc, valid_page, nr_isolated, true);

trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);

count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned);

if (nr_isolated)

count_compact_events(COMPACTISOLATED, nr_isolated);

return low_pfn;

}

该函数主要是将low_pfn到end_pfn范围内，可用移动的内存页隔离出来，挂到cc->migratepages链表上。为后面的内存迁移做准备。

接着再看reclaim_clean_pages_from_list()：

【file:/mm/vmscan.c】

unsigned long reclaim_clean_pages_from_list(struct zone *zone,

struct list_head *page_list)

{

struct scan_control sc = {

.gfp_mask = GFP_KERNEL,

.priority = DEF_PRIORITY,

.may_unmap = 1,

};

unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;

struct page *page, *next;

LIST_HEAD(clean_pages);

list_for_each_entry_safe(page, next, page_list, lru) {

if (page_is_file_cache(page) && !PageDirty(page) &&

!isolated_balloon_page(page)) {

ClearPageActive(page);

list_move(&page->lru, &clean_pages);

}

ret = shrink_page_list(&clean_pages, zone, &sc,

TTU_UNMAP|TTU_IGNORE_ACCESS,

&dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);

list_splice(&clean_pages, page_list);

__mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);

return ret;

}

该函数则主要是将文件缓存、干净的以及非隔离的气球页进行直接回收。

继而分析migrate_pages()：

【file:/mm/migrate.c】

* migrate_pages - migrate the pages specified in a list, to the free pages

* supplied as the target for the page migration

* @from: The list of pages to be migrated.

* @get_new_page: The function used to allocate free pages to be used

* as the target of the page migration.

* @private: Private data to be passed on to get_new_page()

* @mode: The migration mode that specifies the constraints for

* page migration, if any.

* @reason: The reason for page migration.

* The function returns after 10 attempts or if no pages are movable any more

* because the list has become empty or no retryable pages exist any more.

* The caller should call putback_lru_pages() to return pages to the LRU

* or free list only if ret != 0.

* Returns the number of pages that were not migrated, or an error code.

int migrate_pages(struct list_head *from, new_page_t get_new_page,

unsigned long private, enum migrate_mode mode, int reason)

{

int retry = 1;

int nr_failed = 0;

int nr_succeeded = 0;

int pass = 0;

struct page *page;

struct page *page2;

int swapwrite = current->flags & PF_SWAPWRITE;

int rc;

if (!swapwrite)

current->flags |= PF_SWAPWRITE;

for(pass = 0; pass < 10 && retry; pass++) {

retry = 0;

list_for_each_entry_safe(page, page2, from, lru) {

cond_resched();

if (PageHuge(page))

rc = unmap_and_move_huge_page(get_new_page,

private, page, pass > 2, mode);

else

rc = unmap_and_move(get_new_page, private,

page, pass > 2, mode);

switch(rc) {

case -ENOMEM:

goto out;

case -EAGAIN:

retry++;

break;

case MIGRATEPAGE_SUCCESS:

nr_succeeded++;

break;

default:

* Permanent failure (-EBUSY, -ENOSYS, etc.):

* unlike -EAGAIN case, the failed page is

* removed from migration page list and not

* retried in the next outer loop.

nr_failed++;

break;

}

rc = nr_failed + retry;

out:

if (nr_succeeded)

count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);

if (nr_failed)

count_vm_events(PGMIGRATE_FAIL, nr_failed);

trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);

if (!swapwrite)

current->flags &= ~PF_SWAPWRITE;

return rc;

}

该函数主要实现的是页面迁移操作。其中核心函数是unmap_and_move()，其用于申请新页面，将老页面移过去再进行映射，以实现老页面得以回收。由此可知__alloc_contig_migrate_range()函数主要工作是将页面进行隔离然后再进行分离。

最后回到alloc_contig_range()函数，其从__alloc_contig_migrate_range()返回后，将再次调用lru_add_drain_all()，这里应该是为了防止__alloc_contig_migrate_range()中间休眠时，LRU链表被添加上页面了。而drain_all_pages()则是将每CPU中缓存的页面进行释放，这些页面将会根据其标记释放至MIGRATE_ISOLATE空闲列表中。接着再是test_pages_isolated()，用于检查确保该范围内的页面已经被隔离；isolate_freepages_range()则是将指定范围的空闲页面隔离出来；最后undo_isolate_page_range()则是将所有的标记为隔离的页面重新标记为MIGRATE_CMA，至此所需的连续内存页面已经分配到了，无需在乎其迁移属性了，便更改回去。

此外CMA管理内存的释放为：

【file:/mm/page_alloc.c】

void free_contig_range(unsigned long pfn, unsigned nr_pages)

{

unsigned int count = 0;

for (; nr_pages--; pfn++) {

struct page *page = pfn_to_page(pfn);

count += page_count(page) != 1;

__free_page(page);

}

WARN(count != 0, "%d pages are still in use!\n", count);

}