一. 物理内存初始化
本文主要介绍系统启动后 内存的初始化流程,以及BOOTMEM 分配器的初始化,相关结构和函数详细释义
数据结构
// 整个系统的内存信息
struct meminfo {
int nr_banks; // BANK 数量
unsigned long end; //整个内存的结束物理地址
struct {
unsigned long start; //起始物理地址
unsigned long size; //bank size
int node; // BANK 编号
} bank[NR_BANKS];
};
//节点的页表信息
struct node_info {
unsigned int start; //起始页号,向高地址页对齐
unsigned int end; //结束页号,
int bootmap_pages; // 该节点映射表所占的页数
};
// 节点对应的boot mem allocator中有关信息
typedef struct bootmem_data {
unsigned long node_boot_start; //节点起始物理地址
unsigned long node_low_pfn; //节点结束页帧号
void *node_bootmem_map; // 节点映射表的起始虚拟地址
unsigned long last_offset; //上次分配的bootmem页内偏移
unsigned long last_pos; //上次分配的bootmem页结束id
} bootmem_data_t;
//节点
typedef struct pglist_data {
zone_t node_zones[MAX_NR_ZONES]; //节点包含的zone
zonelist_t node_zonelists[GFP_ZONEMASK+1];//内存分配的区域优先次序
int nr_zones;.//分区的个数
struct page *node_mem_map;//节点第一个page的指针,在mem_map数组的某个位置
unsigned long *valid_addr_bitmap;//有效地址位图,用于有空洞的内存系统
struct bootmem_data *bdata;//用于启动内存分配
unsigned long node_start_paddr;.//节点的起始物理地址
unsigned long node_start_mapnr;//全局mem map数组的偏移量
unsigned long node_size;//包含的page数量
int node_id;// 节点ID, 从0开始
struct pglist_data *node_next;//下个节点指针
} pg_data_t;
内存初始化流程
内存初始化入口setup_arch
setup_arch :
- 使用默认配置初始化meminfo
- 初始化init_mm,_text,_etext,_edata,_end都是连接器预定义符号,&_text ,&_etext,&_edata,_end 依次表示代码段的起始地址 ,代码段结束地址,数据段结束地址,bss段结束地址,这里和C语言用法不一样,关于链接的符号定义详见LD说明手册3.5.5节
- 调用parse_cmdline 解析启动参数,配置meminfo
- bootmem_init : boot memory allocator初始化
- paging_init
void __init setup_arch(char **cmdline_p)
{
struct tag *tags = (struct tag *)&init_tags;
struct machine_desc *mdesc;
char *from = default_command_line;
...
//[1
if (meminfo.nr_banks == 0) { //初始化默认配置
meminfo.nr_banks = 1;
meminfo.bank[0].start = PHYS_OFFSET;
meminfo.bank[0].size = MEM_SIZE;
}//]
//[ 2
init_mm.start_code = (unsigned long) &_text;
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
init_mm.brk = (unsigned long) &_end;
//]
....
parse_cmdline(&meminfo, cmdline_p, from); //3 解析启动参数,配置meminfo
bootmem_init(&meminfo); //4 物理内存初始化
paging_init(&meminfo, mdesc); // 5
request_standard_resources(&meminfo, mdesc);
...
}
parse_cmdline
1.根据启动参数mem=size@addr , 重新配置meminfo
parse_cmdline(struct meminfo *mi, char **cmdline_p, char *from)
{
...
start = PHYS_OFFSET;
size = memparse(from + 4, &from);
if (*from == '@')
start = memparse(from + 1, &from);
//[1
mi->bank[mi->nr_banks].start = start;
mi->bank[mi->nr_banks].size = size;
mi->bank[mi->nr_banks].node = PHYS_TO_NID(start);
mi->nr_banks += 1;
...
}//]
c = *from++;
if (!c)
break;
if (COMMAND_LINE_SIZE <= ++len)
break;
*to++ = c;
}
*to = '\0';
*cmdline_p = command_line;
}
bootmem_init
- find_memend_and_nodes(mi, np); // 返回bootmem alllocator全部节点映射表页数
- find_bootmap_pfn:返回启动页映射表起始页帧号
/*
* Initialise the bootmem allocator for all nodes. This is called
* early during the architecture specific initialisation.
*/
void __init bootmem_init(struct meminfo *mi)
{
struct node_info node_info[NR_NODES], *np = node_info;
unsigned int bootmap_pages, bootmap_pfn, map_pg;
int node, initrd_node;
bootmap_pages = find_memend_and_nodes(mi, np);// 1. 返回启动页映射表页数
bootmap_pfn = find_bootmap_pfn(0, mi, bootmap_pages);//2 返回启动页映射表起始页帧号
initrd_node = check_initrd(mi);
map_pg = bootmap_pfn;
/*
* Initialise the bootmem nodes.
*
* What we really want to do is:
*
* unmap_all_regions_except_kernel();
* for_each_node_in_reverse_order(node) {
* map_node(node);
* allocate_bootmem_map(node);
* init_bootmem_node(node);
* free_bootmem_node(node);
* }
*
* but this is a 2.5-type change. For now, we just set
* the nodes up in reverse order.
*
* (we could also do with rolling bootmem_init and paging_init
* into one generic "memory_init" type function).
*/
np += numnodes - 1;
for (node = numnodes - 1; node >= 0; node--, np--) {
/*
* If there are no pages in this node, ignore it.
* Note that node 0 must always have some pages.
*/
if (np->end == 0) {
if (node == 0)
BUG();
continue;
}
/*
* Initialise the bootmem allocator.
*/
init_bootmem_node(NODE_DATA(node), map_pg, np->start, np->end);
free_bootmem_node_bank(node, mi);
map_pg += np->bootmap_pages;
/*
* If this is node 0, we need to reserve some areas ASAP -
* we may use bootmem on node 0 to setup the other nodes.
*/
if (node == 0)
reserve_node_zero(bootmap_pfn, bootmap_pages);
}
...
if (map_pg != bootmap_pfn + bootmap_pages)
BUG();
}
/*
* Scan the memory info structure and pull out:
* - the end of memory
* - the number of nodes
* - the pfn range of each node
* - the number of bootmem bitmap pages
*/
static unsigned int __init
find_memend_and_nodes(struct meminfo *mi, struct node_info *np)
{
unsigned int i, bootmem_pages = 0, memend_pfn = 0;
for (i = 0; i < NR_NODES; i++) {
np[i].start = -1U;
np[i].end = 0;
np[i].bootmap_pages = 0;
}
for (i = 0; i < mi->nr_banks; i++) {
unsigned long start, end;
int node;
if (mi->bank[i].size == 0) {
/*
* Mark this bank with an invalid node number
*/
mi->bank[i].node = -1;
continue;
}
node = mi->bank[i].node;
if (node >= numnodes) {
numnodes = node + 1;
/*
* Make sure we haven't exceeded the maximum number
* of nodes that we have in this configuration. If
* we have, we're in trouble. (maybe we ought to
* limit, instead of bugging?)
*/
if (numnodes > NR_NODES)
BUG();
}
/*
* Get the start and end pfns for this bank
*/
start = O_PFN_UP(mi->bank[i].start);
end = O_PFN_DOWN(mi->bank[i].start + mi->bank[i].size);
if (np[node].start > start)
np[node].start = start;
if (np[node].end < end)
np[node].end = end;
if (memend_pfn < end)
memend_pfn = end;
}
/*
* Calculate the number of pages we require to
* store the bootmem bitmaps.
*/
for (i = 0; i < numnodes; i++) {
if (np[i].end == 0)
continue;
np[i].bootmap_pages = bootmem_bootmap_pages(np[i].end -
np[i].start);
bootmem_pages += np[i].bootmap_pages;
}
/*
* This doesn't seem to be used by the Linux memory
* manager any more. If we can get rid of it, we
* also get rid of some of the stuff above as well.
*/
max_low_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET);
mi->end = memend_pfn << PAGE_SHIFT;
return bootmem_pages;
}
bootmem bitmap : 页的位映射图,每一个页对应一个位,节点映射图的大小根据节点包含页数计算,大小以页为单位对齐。
find_memend_and_nodes: 输入meminfo,输出node_info ,计算每个节点的 bitmap pages ,返回the number of bootmem bitmap pages,即节点 bitmap pages之和。
static unsigned int __init
find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages)
{
unsigned int start_pfn, bank, bootmap_pfn;
start_pfn = V_PFN_UP(&_end);
bootmap_pfn = 0;
for (bank = 0; bank < mi->nr_banks; bank ++) {
unsigned int start, end;
if (mi->bank[bank].node != node)
continue;
start = O_PFN_UP(mi->bank[bank].start);
end = O_PFN_DOWN(mi->bank[bank].size +
mi->bank[bank].start);
if (end < start_pfn)
continue;
if (start < start_pfn)
start = start_pfn;
if (end <= start)
continue;
if (end - start >= bootmap_pages) {
bootmap_pfn = start;
break;
}
}
if (bootmap_pfn == 0)
BUG();
return bootmap_pfn;
}
find_bootmap_pfn:
第6行:_end 是链接脚本(vmlinux-armv.lds.in)中的符号,不同于源码中的符号。
“_end=.” 的含义: _end符号地址等于bss段的结束地址,对链接脚本定义的符号只能引用地址,但未分配内存地址,因此保存不了值到符号地址。关于链接的符号定义详见LD说明手册3.5.5节
因此bootmap_pfn等于接着KERNEL的bss的页帧,也就是说bootmap分配的地址紧跟在KERNEL的后面(页对齐)
init_bootmem_core :
/*
* Called once to set up the allocator itself.
*/
static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
unsigned long mapstart, unsigned long start, unsigned long end)
{
bootmem_data_t *bdata = pgdat->bdata;
unsigned long mapsize = ((end - start)+7)/8;
pgdat->node_next = pgdat_list;
pgdat_list = pgdat;
mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL);
bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
bdata->node_boot_start = (start << PAGE_SHIFT);
bdata->node_low_pfn = end;
/*
* Initially all pages are reserved - setup_arch() has to
* register free RAM areas explicitly.
*/
memset(bdata->node_bootmem_map, 0xff, mapsize);
return mapsize;
}
该函数先初始化每个节点的bootmem 分配器的数据结构bootmem_data_t:
node_bootmem_map:节点对应的bootmap起始虚拟地址
node_boot_start :节点起始物理地址
node_low_pfn :节点的结束页帧号
并初始化该节点的bootmap:保留全部页
static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
{
unsigned long i;
unsigned long start;
/*
* round down end of usable mem, partially free pages are
* considered reserved.
*/
unsigned long sidx;
unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE;
unsigned long end = (addr + size)/PAGE_SIZE;
if (!size) BUG();
if (end > bdata->node_low_pfn)
BUG();
/*
* Round up the beginning of the address.
*/
start = (addr + PAGE_SIZE-1) / PAGE_SIZE;
sidx = start - (bdata->node_boot_start/PAGE_SIZE);
for (i = sidx; i < eidx; i++) {
if (!test_and_clear_bit(i, bdata->node_bootmem_map))
BUG();
}
}
free_bootmem_core: 清空节点bootmap的对应位(起始页到结束页),即标记节点包含的全部页已释放。
参考文献:
LD链接脚本
https://sourceware.org/binutils/docs/ld/Source-Code-Reference.html