This chapter describes the structrures used to keep account of memory banks(nodes), pages and flags that affect VM behavior.
Node:
typedef struct pglist_data {
____zone_t node_zones[MAX_NR_ZONES];
/*This is the order of zones that allocations are preferred from.*/
____zonelist_t node_zonelists[GFP_ZONEMASK+1];
____int nr_zones;
____struct page *node_mem_map; /*指向node中page数组的第一个page的指针*/
____unsigned long *valid_addr_bitmap;
____struct bootmem_data *bdata; /*boot memory allocator used it*/
____unsigned long node_start_paddr; /*结点的起始物理地址*/
____unsigned long node_start_mapnr; /*结点在全局mem_map中的偏移*/
____unsigned long node_size; /*node中的页面总数*/
____int node_id;
____struct pglist_data *node_next;
} pg_data_t;
Zone:
/*
* On machines where it is needed (eg PCs) we divide physical memory
* into multiple physical zones. On a PC we have 3 zones:
*
* ZONE_DMA_ < 16 MB___ISA DMA capable memory
* ZONE_NORMAL__16-896 MB___direct mapped by the kernel
* ZONE_HIGHMEM_ > 896 MB___only page cache and user processes
*/
typedef struct zone_struct {
____/*
____ * Commonly accessed fields:
____ */
____spinlock_t______lock;
____unsigned long_______free_pages; /*zone available pages*/
____unsigned long_______pages_min, pages_low, pages_high;
____int_________need_balance; /*当可用页面到达watermarks时,需要kswapd balance管理区*/
____/*
____ * free areas of different sizes
____ */
____free_area_t_____free_area[MAX_ORDER]; /*伙伴系统使用*/
/*This is a hash table of wait queues of processes waiting on a page to be freed. */
____wait_queue_head_t___* wait_table;
/* This is the number of queues in the hash table, which is a power of 2. */
____unsigned long_______wait_table_size;
____unsigned long_______wait_table_shift;
____/*
____ * Discontig memory support fields.
____ */
____struct pglist_data__*zone_pgdat;
/*This is the first page in the global mem map that this zone refers to.*/
____struct page_____*zone_mem_map;
____unsigned long_______zone_start_paddr;/*zone的起始物理地址*/
____unsigned long_______zone_start_mapnr;/*zone中在mem_map中的偏移*/
____/*
____ * rarely used fields:
____ */
____char____________*name;
____unsigned long_______size;
} zone_t;
Memory within ZONE NORMAL is directly mapped by the kernel into the upper region of the linear address space.
ZONE HIGHMEM is the remaining available memory in the system and is not directly mapped by the kernel.
1、node, zone。
2、zone watermarks:
pages_low:
当zone中空闲页面个数达到pages_low时,会 woken up kswapd by the buddy allocator to start freeing pages.
pages_min:
当进程分配页面时,剩余的页面个数到达pages_min时,分配页面的进程会执行kswapd释放页面的工作。
pages_high:
当kswapd执行释放页面后,系统中空闲的页面数量达到pages_high时,kswapd will go back to sleep.
每个zone的大小怎么计算?通过setup_memory()来计算。
min_low_pfn:PFN_UP(__pa(&_end));
max_low_pfn:max_pfn
max_pfn:
for (i = 0; i < e820.nr_map; i++) {
....
start = PFN_UP(e820.map[i].addr);
end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
if (start >= end)
____continue;
if (end > max_pfn)
____max_pfn = end;
....
}
zone initialization:
每个zone大小的计算与设置:
paging_init()->zone_sizes_init()
static void __init zone_sizes_init(void)
{
____unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
____unsigned int max_dma, high, low;
____max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
____low = max_low_pfn;
____high = highend_pfn;
____if (low < max_dma)
________zones_size[ZONE_DMA] = low;
____else {
________zones_size[ZONE_DMA] = max_dma;
________zones_size[ZONE_NORMAL] = low - max_dma;
#ifdef CONFIG_HIGHMEM
________zones_size[ZONE_HIGHMEM] = high - low;
#endif
____}
____free_area_init(zones_size);
}
void __init free_area_init(unsigned long *zones_size)
{
____free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0);
}
void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
____unsigned long *zones_size, unsigned long zone_start_paddr,
____unsigned long *zholes_size, struct page *lmem_map)
{
..
for (j = 0; j < MAX_NR_ZONES; j++) {
..
realsize = size = zones_size[j];
zone->size = size;
..
}
..
}