内核中所有涉及到页面释放的函数最终都会调用到__free_pages
1683 fastcall void __free_pages(struct page *page, unsigned int order)
1684 {
1685 if (put_page_testzero(page)) {
1686 if (order == 0)
1687 free_hot_page(page);
1688 else
1689 __free_pages_ok(page, order);
1690 }
1691 }
如果释放的是单页,那么不会释放给buddy系统,而是先放到per-CPU page 缓存中;否则调用__free_pages_ok释放多个页。
free_hot_page仅仅是free_hot_cold_page的一个封装
985 /*
986 * Free a 0-order page
987 */
988 static void fastcall free_hot_cold_page(struct page *page, int cold)
989 {
990 struct zone *zone = page_zone(page);
991 struct per_cpu_pages *pcp;
992 unsigned long flags;
993
994 if (PageAnon(page))
995 page->mapping = NULL;
996 if (free_pages_check(page))
997 return;
998
999 if (!PageHighMem(page))
1000 debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
1001 arch_free_page(page, 0);
1002 kernel_map_pages(page, 1, 0);
1003
1004 pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
1005 local_irq_save(flags);
1006 __count_vm_event(PGFREE);
1007 list_add(&page->lru, &pcp->list);
1008 set_page_private(page, get_pageblock_migratetype(page));
1009 pcp->count++;
1010 if (pcp->count >= pcp->high) {
1011 free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
1012 pcp->count -= pcp->batch;
1013 }
1014 local_irq_restore(flags);
1015 put_cpu();
1016 }
这个函数都是些奇怪的检查,不深究了
1010 如果缓存的数目超过了一定数目,那么就把数量为pcp->batch的页面一次性的还给buddy系统。该策略称为惰性合并。防止反复的释放-> 分配 ->释放操作。
__free_pages_ok经过一些奇怪的检查,调用到__free_one_page
392 * The concept of a buddy system is to maintain direct-mapped table
393 * (containing bit values) for memory blocks of various "orders".
394 * The bottom level table contains the map for the smallest allocatable
395 * units of memory (here, pages), and each level above it describes
396 * pairs of units from the levels below, hence, "buddies".
397 * At a high level, all that happens here is marking the table entry
398 * at the bottom level available, and propagating the changes upward
399 * as necessary, plus some accounting needed to play nicely with other
400 * parts of the VM system.
401 * At each level, we keep a list of pages, which are heads of continuous
402 * free pages of length of (1 << order) and marked with PG_buddy. Page's
403 * order is recorded in page_private(page) field.
404 * So when we are allocating or freeing one, we can derive the state of the
405 * other. That is, if we allocate a small block, and both were
406 * free, the remainder of the region must be split into blocks.
407 * If a block is freed, and its buddy is also free, then this
408 * triggers coalescing into a block of larger size.
409 *
410 * -- wli
411 */
412
413 static inline void __free_one_page(struct page *page,
414 struct zone *zone, unsigned int order)
415 {
416 unsigned long page_idx;
417 int order_size = 1 << order;
418 int migratetype = get_pageblock_migratetype(page);
419
420 if (unlikely(PageCompound(page)))
421 destroy_compound_page(page, order);
422
423 page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
424
425 VM_BUG_ON(page_idx & (order_size - 1));
426 VM_BUG_ON(bad_range(zone, page));
427
428 __mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
429 while (order < MAX_ORDER-1) {
430 unsigned long combined_idx;
431 struct page *buddy;
432
433 buddy = __page_find_buddy(page, page_idx, order);
434 if (!page_is_buddy(page, buddy, order))
435 break; /* Move the buddy up one level. */
436
437 list_del(&buddy->lru);
438 zone->free_area[order].nr_free--;
439 rmv_page_order(buddy);
440 combined_idx = __find_combined_index(page_idx, order);
441 page = page + (combined_idx - page_idx);
442 page_idx = combined_idx;
443 order++;
444 }
445 set_page_order(page, order);
446 list_add(&page->lru,
447 &zone->free_area[order].free_list[migratetype]);
448 zone->free_area[order].nr_free++;
449 }
该函数是__free_pages的核心
429 ~ 444 在把内存还给buddy系统前,首先检查这个内存区的伙伴是否是空闲的,如果是则进行合并,转移到更高阶的链表,直到无法合并为止。
433 __page_find_buddy是一个辅助函数,负责找到给定page和order对应的buddy
434 判断page和它的buddy是不是真正的buddy,也就是说可以合并的,不是则终止循环动作
440~442 page和它的buddy合并后,我们需要用合并后区域的首个页面page代表这个内存区
445 从这一行我们可以看出,仅仅内存区的首page需要修改order和buddy属性。
447 把这个内存区通过lru链接到相应的迁移类型链表
448 增加free_area中的空闲计数