linux内存管理之伙伴系统(建立)

 内核使用伙伴系统来解决内存分配引起的外部碎片问题。

一、数据结构描述

结构zone中的free_area数组描述伙伴系统该数组为free_area结构

struct zone {
……
	struct free_area	free_area[MAX_ORDER];
……
};
struct free_area {/*链表类型为5类,对于分类为新加入的*/
	struct list_head	free_list[MIGRATE_TYPES];
	unsigned long		nr_free;
};

下图为伙伴系统在管理区中的表示。


 

二、伙伴系统的初始化

在初始化物理管理区的时候初始化伙伴系统的,具体实现在下面的函数中:

Start_kernel()->setup_arch()->paging_init()->zone_sizes_init()->free_area_init_nodes()->free_area_init_node()->free_area_init_core()->init_currently_empty_zone()->zone_init_free_lists()

/*初始化对应zone中所有order和所有类型的链表*/
static void __meminit zone_init_free_lists(struct zone *zone)
{
	int order, t;
	for_each_migratetype_order(order, t) {
		INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
		zone->free_area[order].nr_free = 0;
	}
}

三、伙伴系统中数据初始化

bootmem分配器中的数据回收到伙伴系统中

start_kernel()->mm_init()->mem_init()

void __init mem_init(void)
{
	int codesize, reservedpages, datasize, initsize;
	int tmp;
/*和具体硬件相关*/
	pci_iommu_alloc();

#ifdef CONFIG_FLATMEM
	BUG_ON(!mem_map);
#endif
	/* this will put all low memory onto the freelists */
	/*释放bootmem中的内存到伙伴系统中,包括bootmem占有的位图
	 返回总共释放的页面数**/
	totalram_pages += free_all_bootmem();

	reservedpages = 0;
	for (tmp = 0; tmp < max_low_pfn; tmp++)
		/*
		 * Only count reserved RAM pages:
		 */
		if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
			reservedpages++;
	/*初始化高端内存区,将高端内存区放入伙伴系统中*/
	set_highmem_pages_init();
	/*内核代码段、数据段、初始化端长度*/
	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
	/*打印输出各种内存初始化后的信息*/
	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
		nr_free_pages() << (PAGE_SHIFT-10),
		num_physpages << (PAGE_SHIFT-10),
		codesize >> 10,
		reservedpages << (PAGE_SHIFT-10),
		datasize >> 10,
		initsize >> 10,
		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
	       );

	printk(KERN_INFO "virtual kernel memory layout:\n"
		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
#endif
		"    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
		"    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
		"      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
		"      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
		"      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
		FIXADDR_START, FIXADDR_TOP,
		(FIXADDR_TOP - FIXADDR_START) >> 10,

#ifdef CONFIG_HIGHMEM
		PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
		(LAST_PKMAP*PAGE_SIZE) >> 10,
#endif

		VMALLOC_START, VMALLOC_END,
		(VMALLOC_END - VMALLOC_START) >> 20,

		(unsigned long)__va(0), (unsigned long)high_memory,
		((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,

		(unsigned long)&__init_begin, (unsigned long)&__init_end,
		((unsigned long)&__init_end -
		 (unsigned long)&__init_begin) >> 10,

		(unsigned long)&_etext, (unsigned long)&_edata,
		((unsigned long)&_edata - (unsigned long)&_etext) >> 10,

		(unsigned long)&_text, (unsigned long)&_etext,
		((unsigned long)&_etext - (unsigned long)&_text) >> 10);

	/*
	 * Check boundaries twice: Some fundamental inconsistencies can
	 * be detected at build time already.
	 */
#define __FIXADDR_TOP (-PAGE_SIZE)
#ifdef CONFIG_HIGHMEM
	BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE	> FIXADDR_START);
	BUILD_BUG_ON(VMALLOC_END			> PKMAP_BASE);
#endif
#define high_memory (-128UL << 20)
	BUILD_BUG_ON(VMALLOC_START			>= VMALLOC_END);
#undef high_memory
#undef __FIXADDR_TOP

#ifdef CONFIG_HIGHMEM
	BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE	> FIXADDR_START);
	BUG_ON(VMALLOC_END				> PKMAP_BASE);
#endif
	BUG_ON(VMALLOC_START				>= VMALLOC_END);
	BUG_ON((unsigned long)high_memory		> VMALLOC_START);

	if (boot_cpu_data.wp_works_ok < 0)
		test_wp_bit();

	save_pg_dir();
	/*调用zap_low_mappings函数清low_memory的映射,内核线程只访问内核空间是不能访问用户空间的
	,其实low_memory的映射被设置的部分也就是当初为
	 8MB建立的恒等映射填充了临时内核页全局目录的第0项,第1项
	 这里将用户空间的页目录项<3G的PGD清0;*/
	zap_low_mappings(true);
}
/**
 * free_all_bootmem - release free pages to the buddy allocator
 *
 * Returns the number of pages actually released.
 */
unsigned long __init free_all_bootmem(void)
{
	return free_all_bootmem_core(NODE_DATA(0)->bdata);
}
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
	int aligned;
	struct page *page;
	unsigned long start, end, pages, count = 0;

	if (!bdata->node_bootmem_map)
		return 0;
	/*节点内存开始和结束处*/
	start = bdata->node_min_pfn;
	end = bdata->node_low_pfn;

	/*
	 * If the start is aligned to the machines wordsize, we might
	 * be able to free pages in bulks of that order.
	 */
	aligned = !(start & (BITS_PER_LONG - 1));

	bdebug("nid=%td start=%lx end=%lx aligned=%d\n",
		bdata - bootmem_node_data, start, end, aligned);
	/*用于释放整个bootmem所涉及的内存*/
	while (start < end) {
		unsigned long *map, idx, vec;

		map = bdata->node_bootmem_map;
		idx = start - bdata->node_min_pfn;/*相对于开始处的偏移*/
		vec = ~map[idx / BITS_PER_LONG];/*vec值为页面分配情况*/
		/*如果开始地址以32位对其、连续的32个页面都没有被分配(空闲),并且
		释放起点以上的32个页面都是合法的(不超过end值),则释放连续的32个
		页面,即1<<5个页面*/
		if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
			int order = ilog2(BITS_PER_LONG);/*32位下为5*/
			/*释放到伙伴系统中*/
			__free_pages_bootmem(pfn_to_page(start), order);
			count += BITS_PER_LONG;/*释放的总页面数更新*/
		} else {
			unsigned long off = 0;
			/*vec!=0表示这个区间存在页面空闲,off为这个区间的下标,从0开始*/	
			while (vec && off < BITS_PER_LONG) {
				if (vec & 1) {/*如果页面空闲*/
                    /*偏移转化为具体的页面*/
					page = pfn_to_page(start + off);
                    /*一个页面一个页面的释放*/
					__free_pages_bootmem(page, 0);/*释放单个页面*/
					count++;/*更新释放页面总数*/
				}
				vec >>= 1;/*vec向右移动一位,表示访问下一个页面*/
				off++;/*偏移加一*/
			}
		}
		start += BITS_PER_LONG;/*偏移向后移动*/
	}
	/*虚拟地址转化为page
	用于释放bdata中的位图所占有的内存*/
	page = virt_to_page(bdata->node_bootmem_map);
	pages = bdata->node_low_pfn - bdata->node_min_pfn;
	
	/*计算bootmem分配器中所使用的页面数,即位图使用的页面数*/
	pages = bootmem_bootmap_pages(pages);
	count += pages;/*释放的总页面数加*/
	while (pages--)/*每次释放一个页面,释放
		总共的pages个页面*/
		__free_pages_bootmem(page++, 0);

	bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);

	return count;/*返回释放的总页面数*/
}
/*
 * permit the bootmem allocator to evade page validation on high-order frees
 */
void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
{
	if (order == 0) {
		__ClearPageReserved(page);
		set_page_count(page, 0);/*设置页面的引用位为0*/
		set_page_refcounted(page);/*设置页面的引用位为1*/
		__free_page(page);/*释放单个页面到伙伴系统中*/
	} else {
		int loop;
		
		/*这个不是很明白,可能和特定的体系相关???*/
		prefetchw(page);
		for (loop = 0; loop < BITS_PER_LONG; loop++) {
			struct page *p = &page[loop];

			if (loop + 1 < BITS_PER_LONG)
				prefetchw(p + 1);
			__ClearPageReserved(p);
			set_page_count(p, 0);
		}

		set_page_refcounted(page);/*设置页面的引用计数为1*/
		/*这里具体释放到那个类型里面,
		要看page的里面具体的东西,也就是
		可以用相关函数来获取他所属的类型*/
		__free_pages(page, order);/*释放order个页面*/
	}
}
void __init set_highmem_pages_init(void)
{
	struct zone *zone;
	int nid;

	for_each_zone(zone) {
		unsigned long zone_start_pfn, zone_end_pfn;

		if (!is_highmem(zone))/*验证是否属于高端内存区域中*/
		/*如果不属于,将不执行下面的操作*/
			continue;

		zone_start_pfn = zone->zone_start_pfn;
		zone_end_pfn = zone_start_pfn + zone->spanned_pages;
		/*返回zone中的node的id*/
		nid = zone_to_nid(zone);
		printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
				zone->name, nid, zone_start_pfn, zone_end_pfn);
		/*将区间中的内存放到伙伴系统中*/
		add_highpages_with_active_regions(nid, zone_start_pfn,
				 zone_end_pfn);
	}
	totalram_pages += totalhigh_pages;
}
void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
					      unsigned long end_pfn)
{
	struct add_highpages_data data;

	data.start_pfn = start_pfn;
	data.end_pfn = end_pfn;
	/*对节点中的每个区域进行页面的回收到伙伴系统中*/
	work_with_active_regions(nid, add_highpages_work_fn, &data);
}
/*用指定函数来操作活动区,在高端内存初始化时用了*/
void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
{
	int i;
	int ret;

	for_each_active_range_index_in_nid(i, nid) {
		ret = work_fn(early_node_map[i].start_pfn,
			      early_node_map[i].end_pfn, data);
		if (ret)
			break;
	}
}
static int __init add_highpages_work_fn(unsigned long start_pfn,
					 unsigned long end_pfn, void *datax)
{
	int node_pfn;
	struct page *page;
	unsigned long final_start_pfn, final_end_pfn;
	struct add_highpages_data *data;

	data = (struct add_highpages_data *)datax;
	/*活动内存区间与指定考虑区间交集*/
	final_start_pfn = max(start_pfn, data->start_pfn);
	final_end_pfn = min(end_pfn, data->end_pfn);
	if (final_start_pfn >= final_end_pfn)
		return 0;

	for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
	     node_pfn++) {
		if (!pfn_valid(node_pfn))/*验证页面是否有效*/
			continue;
		page = pfn_to_page(node_pfn);/*将下标转换为具体的页面*/
		/*初始化页面的count值,将页面释放到伙伴系统中*/
		add_one_highpage_init(page, node_pfn);
	}

	return 0;

}
static void __init add_one_highpage_init(struct page *page, int pfn)
{
	/*ClearPageReserved清除了该页面flag中的reserved标志,表示该页面属于动态内存*/
	ClearPageReserved(page);
	init_page_count(page);/*设置page的count值为1*/
	__free_page(page);	/*释放页面到伙伴系统*/
	totalhigh_pages++;/*更新高端页面总数*/
}
void zap_low_mappings(bool early)
{
	int i;

	/*
	 * Zap initial low-memory mappings.
	 *
	 * Note that "pgd_clear()" doesn't do it for
	 * us, because pgd_clear() is a no-op on i386.
	 */
	/*这个函数很简单,就是把前面我们在arch/x86/kernel/head_32.S中设置的页全局目录的前若干项清零
	。这若干项到底是多少
	 不错,0xc0000000>>22 & 1023= 768,这些也全局目录项代表虚拟地址前3G的页面,也就是所谓的用户区
	 ,我们在这里把它全清零了。*/
	for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
#ifdef CONFIG_X86_PAE
		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
#else
		set_pgd(swapper_pg_dir+i, __pgd(0));
#endif
	}

	if (early)
		__flush_tlb();
	else
		flush_tlb_all();
}

到此,伙伴系统已经建立并且里面存放了应有的内存数据。要从伙伴系统中分配内存,必须要有分配和释放机制。后面总结具体的分配和释放工作。

  • 2
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值