==================================
本文系本站原创,欢迎转载!
转载请注明出处:http://blog.csdn.net/gdt_A20
==================================
一、摘要
前面说过的bootmem分配器只不过是早期内存分配的一种方法,linux最终底层是以伙伴系统为模型,
伙伴系统的基本原理以页为最小单元,可以按2^0,2^1,2^2,...2^20...2^n (n <= 11)进行分配,再组合,
这种分配方式下可以在一定程度上处理碎片问题.建立在页之上的结构是区,由于页的物理地址问题,
实际内存比较大,dma专用性强等,所以内核
把页划分为不同的区,常见区包括:
ZONE_DMA---用于dma操作,
ZONE_NORMAL---直接映射的页,比如耳熟能详的896M,
ZONE_HIGHMEM---高端内存。
pcp冷热页缓存
buddy部分有冷热页缓存的概念,用zone的pageset成员实现,页是热的代表页位于cpu告诉缓存中,数据能够更快的被
访问,冷页,表示页面不在告诉缓存中,
计划分三个部分来说明:
1.相关数据结构
2.分配与释放
3.从bootmem到buddy的过渡
二、相关数据结构
1.对于一页内存(比如4k),kernel中用一个struct page结构来标识
include/linux/mm_types.h中
struct page {
unsigned long flags; //页的状态
struct address_space *mapping; //该页所在地址空间的描述指针
struct {
union {
pgoff_t index;
void *freelist;
};
union {
unsigned long counters; //页的引用计数
struct {
union {
atomic_t _mapcount; //页映射计数器
struct {
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
};
atomic_t _count; /* Usage count, see below. */
};
};
};
/* Third double word block */
union {
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
struct { /* slub per cpu partial pages */
struct page *next; /* Next partial slab */
#ifdef CONFIG_64BIT
int pages; /* Nr of partial slabs left */
int pobjects; /* Approximate # of objects */
#else
short int pages;
short int pobjects;
#endif
};
};
/* Remainder is not double word aligned */
union {
unsigned long private;
#if USE_SPLIT_PTLOCKS
spinlock_t ptl;
#endif
struct kmem_cache *slab; /* SLUB: Pointer to slab */
struct page *first_page; /* Compound tail pages */
};
#if defined(WANT_PAGE_VIRTUAL)
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
unsigned long debug_flags; /* Use atomic bitops on this */
#endif
#ifdef CONFIG_KMEMCHECK
void *shadow;
#endif
}
2.区的标识,区的标识有同样的一个数据结构:
include/linux/mm_zone.h
struct zone {
/* Fields commonly accessed by the page allocator */
/* zone watermarks, access with *_wmark_pages(zone) macros */
unsigned long watermark[NR_WMARK];
/*
* When free pages are below this point, additional steps are taken
* when reading the number of free pages to avoid per-cpu counter
* drift allowing watermarks to be breached
*/
unsigned long percpu_drift_mark;
/*
* We don't know if the memory that we're going to allocate will be freeable
* or/and it will be released eventually, so to avoid totally wasting several
* GB of ram we must reserve some of the lower zone memory (otherwise we risk
* to run OOM on the lower zones despite there's tons of freeable ram
* on the higher zones). This array is recalculated at runtime if the
* sysctl_lowmem_reserve_ratio sysctl changes.
*/
unsigned long lowmem_reserve[MAX_NR_ZONES];
#ifdef CONFIG_NUMA
int node;
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
unsigned long min_unmapped_pages;
unsigned long min_slab_pages;
#endif
struct per_cpu_pageset __percpu *pageset;
/*
* free areas of different sizes
*/
spinlock_t lock;
int all_unreclaimable; /* All pages pinned */
#ifdef CONFIG_MEMORY_HOTPLUG
/* see spanned/present_pages for more description */
seqlock_t span_seqlock;
#endif
struct free_area free_area[MAX_ORDER]; //空闲结构
#ifndef CONFIG_SPARSEMEM
/*
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
* In SPARSEMEM, this map is stored in struct mem_section
*/
unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_COMPACTION
/*
* On compaction failure, 1<<compact_defer_shift compactions
* are skipped before trying again. The number attempted since
* last failure is tracked with compact_considered.
*/
unsigned int compact_considered;
unsigned int compact_defer_shift;
#endif
ZONE_PADDING(_pad1_)
/* Fields commonly accessed by the page reclaim scanner */
spinlock_t lru_lock;
struct zone_lru {
struct list_head list;
} lru[NR_LRU_LISTS];
struct zone_reclaim_stat reclaim_stat;
unsigned long pages_scanned; /* since last reclaim */
unsigned long flags; /* zone flags, see below */
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
/*
* The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
* this zone's LRU. Maintained by the pageout code.
*/
unsigned int inactive_ratio;
ZONE_PADDING(_pad2_)
/* Rarely used or read-mostly fields */
/*
* wait_table -- the array holding the hash table
* wait_table_hash_nr_entries -- the size of the hash table array
* wait_table_bits -- wait_table_size == (1 << wait_table_bits)
wait_queue_head_t * wait_table;
unsigned long wait_table_hash_nr_entries;
unsigned long wait_table_bits;
/*
* Discontig memory support fields.
*/
struct pglist_data *zone_pgdat; //与此连接的管理结构
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn; //起始标号
unsigned long spanned_pages; /* total size, including holes */
unsigned long present_pages; /* amount of memory (excluding holes) */
/*
* rarely used fields:
*/
const char *name; //name
} ____cacheline_internodealigned_in_smp;
3、节点
/linux/bootmem.h
typedef struct bootmem_data {
unsigned long node_min_pfn; //该节点的最小页号
unsigned long node_low_pfn; //该节点的最大页号
void *node_bootmem_map; //指向bootmem位图
unsigned long last_end_off; //用于快速分配
unsigned long hint_idx;
struct list_head list; //连接到下一个节点
} bootmem_data_t;
4.全局页mem_map结构
setup_arch-->paging_init--->bootmem_init-->arm_bootmem_free-->free_area_init_node-->alloc_node_mem_map
size = (end - start) * sizeof(struct page);
map = alloc_remap(pgdat->node_id, size);
分配内存用于存放该节点所有页
用pgdat->node_mem_map标识
static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
{
/* Skip empty nodes */
if (!pgdat->node_spanned_pages)
return;
#ifdef CONFIG_FLAT_NODE_MEM_MAP
/* ia64 gets its own node_mem_map, before this, without bootmem */
if (!pgdat->node_mem_map) {
unsigned long size, start, end;
struct page *map;
/*
* The zone's endpoints aren't required to be MAX_ORDER
* aligned but the node_mem_map endpoints must be in order
* for the buddy allocator to function correctly.
*/
start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
end = ALIGN(end, MAX_ORDER_NR_PAGES);
size = (end - start) * sizeof(struct page);
map = alloc_remap(pgdat->node_id, size);
if (!map)
map = alloc_bootmem_node_nopanic(pgdat, size);
pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
/*
* With no DISCONTIG, the global mem_map is just set as node 0's
*/
if (pgdat == NODE_DATA(0)) {
mem_map = NODE_DATA(0)->node_mem_map;
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
}
#endif
#endif /* CONFIG_FLAT_NODE_MEM_MAP */
}
三、总结:简要罗列了buddy会用到的相关数据结构;