本地的笔记有点长,先把bootmem位图分配器的建立 及 使用过程做下梳理。
都是代码,上面做了标注。开始的汇编部分省略了(涉及的内容不多,除了swapper_pg_dir的分配)。
该记录不会再添加说明,看下记录中的注释就明白了bootmem的建立及使用。
该记录中考虑了高端内存……
从start_kernel开始……
start_kernel()
|---->page_address_init()
| 考虑支持高端内存
| 业务:初始化page_address_pool链表;
| 将page_address_maps数组元素按索
| 引降序插入page_address_pool链表;
| 初始化page_address_htable数组
|
|---->setup_arch(&command_line);
|
void setup_arch(char **cmdline_p)
|---->parse_tags(tags);
|---->parse_tag_mem32(tag)
|---->arm_add_memory(tag->u.mem.start,
| tag->u.mem.size);
|---->为meminfo添加内存信息
| meminfo.bank[meminfo.
| nr_banks].start = start;
| meminfo.bank[meminfo.
| nr_banks].size = size;
| meminfo.bank[meminfo.
| nr_banks].node = 0;
| meminfo.nr_banks++;
|
|----init_mm.start_code = (unsigned long)_text;
| init_mm.end_code = (unsigned long)_etext;
| init_mm.end_data = (unsigned long)_edata;
| init_mm.brk = (unsigned long)_end;
|
|---->parse_early_param()
| 注意,这里也会根据boot传入的command_line中信息来修
| 正meminfo的内存信息,此处忽略(假定command_line不含内存信息)。
|---->early_initrd(char *p)
| ramdisk
|---->phys_initrd_start = start;
|---->phys_initrd_size = size;
|
|---->paging_init(mdesc);
| bootmem位图分配器初始化,I/O空间、中断向量空间映射,
| PKMAP空间映射初始化,"0"页面建立.
|---->request_standart_resources(&meminfo, mdesc);
|
|---->smp_init_cpus()
| 对于2.6.34的ARM,我能说这个函数有问题么,这时做了ioremap?
| 获取核的个数,并在cpu_possible_bits上标注核的存在性
|
|---->cpu_init()
| 为每个核的irq、abt、und状态设置栈,每个状态只有12字节
| 栈空间(static struct stack stacks[NR_CPUS]),因为
| 基本所有的事情都在svc状态即被处理
|
|---->tcm_init()//tightly coupled memory, tks gaohao
||
|---->early_trap_init()
|---->memcpy(vectors, __vectors_start,
| __vectors_end - __vectors_start);
| memcpy(vectors + 0x200, __stubs_start,
| __stubs_end - __stubs_start);
| 拷贝中断向量
|---->memcpy(vectors + 0x1000 - kuser_sz,
| __kuser_helper_start, kuser_sz);
| ARM的特殊之处,为用户态进行原子操作提供接口,
| 即用户态直接进入该部分(3G~4G),中断处将做
| 特别检查和相应的处理.见__kuser_helper_version
|
|---->memcpy(KERN_SIGRETURN_CODE, sigreturn_codes,
| sizeof(sigreturn_codes));
|---->memcpy(KERN_RESTART_CODE, syscall_restart_code,
| sizeof(syscall_restart_code));
|
|---->flush_icache_range(vectors, vectors + PAGE_SIZE);
|---->modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
//paging_init 非常重要:
void pagint_init(struct machine_desc *mdesc)
|---->build_mem_type_table()
| 此处没有深入查看ARM的页表项,
| ARM的页表项和unicore不同,我的疑问在于:
| ARM页表项中没有提供Dirty、Accessed位,那么kswap线程进行页面回收时,
| 它是怎样判定该操作哪些页?关于页表项就按unicore的理解,比较简单.
||---->sanity_check_meminfo();
| 以一块2G DRAM为例,前期meminfo.nr_banks = 1;
| 开启高端内处支持,则需将meminfo分成两个bank,
| (为什么以bank作为变量名,DRAM的物理组成就有bank的概念,
| 此处需要作出区分)
|---->struct membank *bank = &meminfo.bank[0];
| memove(bank + 1, bank, sizef(*bank))
| meminfo.nr_banks++;
| bank[1].size -= VMALLOC_MIN - __va(bank->start);
| bank[1].start = __pa(VMALLOC_MIN - 1) + 1;
| bank[1].highmem = 1;
| bank->size = VMALLOC_MIN - __va(bank->start);
|
|---->prepare_page_table();
| 将swapper_pg_dir处的页表清除(部分页表项已缓存在TLB中,在
| bootmem_init中会间接调用create_mapping(&map),其中会再次建立)
|
|---->bootmem_init();
| bootmem分配器初始化.
|
|---->devicemaps_init(mdesc);
| 为中断向量和I/O空间的虚拟与物理地址建立映射关系
|
|---->kmap_init()
| 永久映射区域保留,对于ARM,该区域位于3G-4M ~ 3G
|
|---->top_pmd = pmd_off_k(0xffff0000);
| 记录0xffff0000相应的一级页表项地址.
|---->zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
| 分配一个“0”页面.
|---->empty_zero_page = virt_to_page(zero_page);
| 管理"0"页面所对应的struct page虚拟地址.
|---->__flush_dache_page(NULL, empty_zero_page);
//bootmem_init 完成位图分配器的建立,bootmem_init也使用了位图分配器进行内存分配
void bootmem_init(void)
|---->struct meminfo *mi = &meminfo;
| sort(&mi->bank, mi->nr_banks, sizeof(mi->bank[0]),
| meminfo_cmp, NULL);
| 将meminfo中的bank数组元素按其start地址升序排序
|
|---->int initrd_node = 0;
| initrd_node = check_initrd(mi);
| ramdisk在meminfo下的哪个bank
| check_initrd(mi)
|-->struct membank *bank = &mi->bank[i];
| if (bank_phys_start(bank) <= phys_initrd_start &&
| end <= bank_phys_end(bank))
| initrd_node = bank->node;
|
| return initrd_node
|
|---->for_each_node(node)
| UMA体系,只有一个node, 仅循环一次
| |---->find_node_limits(node, mi, &min, &node_low, &node_high);
| | 此处两个bank(高、低)
| | min:物理内存的最小页帧号(pfn)
| | node_low:物理内存中低端内存的最大页帧号
| | node_high:物理内存中高端内存的最大页帧号
| |
| | max_low:物理内存中低端内存的最大页帧号
| | max_high:物理内存中高端内存的最大页帧号
| |
| |---->bootmem_init_node(node, mi, min, node_low);
| | 详见后文标注;
| | 业务在于:将低端内存部分与虚拟空间做固定偏移映射,而且采用一级页表完成;
| | 采集位图分配器信息,并存放在contig_page_data.bdata
| | 内,而且将位图分配器自身所占用的物理内存在位图分配器内标记为
| | 占用,此位图分配器暂时只管理低端内存(依据meminfo.bank[0],
| | 未使用meminfo.bank[1]).
| |
| |
| |---->reserve_node_zero(&contig_page_data)
| | |---->reserve_bootmem_node(pgdat, __pa(_stext),
| | | _end - _stext, BOOTMEM_DEFAULT);
| | | 把内核中内核所占物理内存在位图分配器中标记为被占用
| | |
| | |---->reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
| | | PTRS_PER_PGD * sizeof(pgd_t), BOOTMEM_DEFAULT);
| | | 把0进程的一级页表所占用的物理内存标记为被占用,
| | | 该一级页表是我们迄今为止惟一没有在内核编译时所占用的空间
| | |
| |---->bootem_reserve_initrd(node)
| | |---->res = reserve_bootmem_node(pgdat,
| | | phys_initrd_start,
| | | phys_initrd_size, BOOTMEM_EXCLUSIVE);
| | | 这里有个疑问:为什么是BOOTMEM_EXCLUSIVE
| | |---->initrd_start = __phys_to_virt(phys_initrd_start);
| | | initrd_end = initrd_start + phys_initrd_size;
| | | 文件系统的虚拟起始地址和结束地址
| | |
|---->for_each_node(node)
| UMA体系,只有一个node, 仅循环一次
| |---->find_node_limits(node, mi, &min, &max_low, &max_high);
| | 此处两个bank(高、低)
| | min:物理内存的最小页帧号(pfn)
| | max_low:物理内存中低端内存的最大页帧号
| | max_high:物理内存中高端内存的最大页帧号
| |
| |---->unsigned long zone_size[MAX_NR_ZONES],
| | zhole_size[MAX_NR_ZONES];
| | memset(zone_size, 0, sizeof(zone_size));
| |
| | zone_size[0] = max_low - min;
| | ZONE_NORMAL区的页帧数
| |
| | zone_size[ZONE_HIGHMEM] = max_high - max_low;
| | ZONE_HIGHMEM的页帧
| |
| | memcpy(zhole_size, zone_size, sizeof(zhole_size));
| | 从zhole_size的各个区中减去各个zone_size,
| | 结果是zhole_size数组元素都为0
| |
| |---->free_area_init_node(node, zone_size, min, zhole_size);
| | 完善contig_page_data,并调用重量级函数:
| | free_area_init_core
| |
|---->high_memory = __va((max_low << PAGE_SHIFT) - 1) + 1;
| 获取高端内存的起始虚拟地址
|
|---->max_low_pfn = max_low - PHYS_PFN_OFFSET;
| 低端内存所对应的页帧数
|
|---->max_pfn = max_high - PHYS_PFN_OFFSET;
| 总共的物理内存页帧数
void free_area_init_node(int nid, unsigned long *zones_size,
unsigned long node_start_pfn, unsigned long *zholes_size)
|---->pg_data_t *pgdat = &contig_page_data;
| pgdat->node_id = nid; (即0)
| pgdat->node_start_pfn = node_start_pfn;
| 物理内存起始地址的页帧号
|
|---->calculate_node_totalpages(pgdat, zones_size, zholes_size);
| |---->totalpages = 该pgdata下的各个区(zone)所含页的页数
| |---->pgdat->node_spanned_pages = totalpages;
| |---->realtotalpages = totalpages;
| |---->realtotalpages -= 该pgdata下各个区(zone)所含的洞的页数
| | 对于连续型,实际上不存在“洞”
| |---->pgdat->node_present_pages = realtotalpages;
| |
|---->alloc_node_mem_map(pgdat);
| 为pglist_data建立mem_map(struct page数组)
| |---->start = pgdat->node_start_pfn &
| | ~(MAX_ORDER_NR_PAGES - 1);
| | 因为最后要迁移到伙伴系统,因此做了调整
| |
| |---->end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
| | end = ALIGN(end, MAX_ORDER_NR_PAGES);
| |
| |---->size = (end - start) * sizeof(struct page);
| | 为了管理pglist所跨越的总的页数目,首先获得需要申请的
| | struct page实例的内存大小.
| |
| |---->struct page *map = NULL;
| | map = alloc_bootmem_node(pgdat, size);
| | 依bootmem位图分配器申请内存
| |__alloc_bootmem_node(pgdat, size, SMP_CACHE_BYTES,
| | __pa(MAX_DAM_ADDRESS))
| | |---->ptr = alloc_bootmem_core(pgdat->bdata, size,
| | | align, goal, 0);
| | | 若位图中出现连续的未被占用的页数满足size的要求,则将在位图中
| | | 找到的相应bit位置1(标记被占用),并将对应物理页清0,返回对应
| | | 物理页的虚拟起始地址.
| | | return ptr;
| | |
| |---->pgdat->node_mem_map = map + (pgdat->node_start_pfn
| | - start);
| | 终于为pglist_data的node_mem_map域建立好了空间,所有的
| | struct page 实例均存于该空间内.
| |---->mem_map = (&contig_page_data)->node_mem_map
| |
|---->free_area_init_core(pgdat, zones_size, zholes_size)
| |详见下文
| | 初始化pgdat下的各个zone及相关信息
void free_area_init_core(struct pglist_data *pgdat,
unsigned long *zones_size,
unsigned long *zholes_size)
|---->init_waitqueue_head(&pgdat->kswapd_wait);
| pgdat->kswapd_max_order = 0;
|
|---->pgdat->nr_zones = 0;
|
|---->for(j = 0; j < MAX_NR_ZONES; j++)
| 依次建立pglist_data下的每个zone.
|
| struct zone *zone = pgdat->node_zones + j;
| unsigned long size, realsize, memmap_pages;
| enum lru_list l;
| |
| |---->size = zone_spanned_pages_in_node(nid, j, zones_size);
| | 获取该区所跨越的页的总数
| |
| | realsize = size - zone_absent_pages_in_node(nid, j,
| | zholes_size);
| | 获取该区实际可用的物理页的总数(除去“洞”)
| |
| |---->memmap_pages = PAGE_ALIGN(size * sizeof(struct page))
| | >> PAGE_SHIFT;
| | 获取因管理该区所使用的struct page实例的内存大小
| |
| |---->realisze -= memmap_pages;
| | 获取该区实际可用的物理页的总数(除去管理结构所占用页数)
| |
| |---->if(!is_highmem_idx(j)) nr_kernel_pages += realsize;
| | 将非高端内存区中,还未被所占用的页数计入nr_kernel_pages
| |
| |---->nr_all_pages += realsize;
| | 将所有还未被占用的页数计入nr_all_pages
| |
| |
| |开始为pglist_data下的各个区建立信息
| |---->zone->spanned_pages = size;
| | 将该区跨越的页数存入pglist_data下相应的
| | zone->spanned_pages.
| |---->zone->present_pages = realsize;
| | 将该区可以使用的实际页数存入pglist_data下相应的
| | zone->present_pages.
| |---->zone->name = zones_names[j];
| | 为pglist_data下相应的zone添加名称
| |---->spin_lock_init(&zone->lock);
| | spin_lock_init(&zone->lru_lock);
| |---->zone->zone_pgdat = pgdat;
| | 记录zone所在的pglist_data
| |---->zone->pre_priority = DEF_PRIORITY
| |---->zone_pcp_init(zone);
| | WHAT:????????????????????
| |---->for_each_lru(l)
| | {INIT_LIST_HEAD(&zone->lru[l].list);
| | zone->reclaim_stat.nr_saved_scan[l] = 0;}
| |---->zone->reclaim_stat.recent_rotated[0] = 0;
| | zone->reclaim_stat.recent_rotated[1] = 0;
| | zone->reclaim_stat.recent_scanned[0] = 0;
| | zone->reclaim_stat.recent_scanned[1] = 0;
| |---->memset(zone->vm_stat, 0, sizeof(zone->vm_stat);
| |---->zone->flags = 0;
| |
| |---->setup_usemap(pgdat, zone, size);
| | 将管理该zone中的pageblock的比特位图的起始地址
| | 存入zone->pageblock_flags.
| |
| |---->init_currently_empty_zone(zone, zone_start_pfn,
| | size, MEMMAP_EARLY);
| | 详见下文
| | 分配zone的hash资源(用于进程请求页时阻塞);
| | 初始化zone的free_area,以及free_area元素下
| | 的各类free_list.
| |
| |---->memmap_init(size, nid, j, zone_start_pfn)
| | 即:memmap_init_zone(size, nid, j,
| | zone_start_pfn, MEMMAP_EARLY);
| | 详细见下文
| | 该函数的业务:
| | 修正最高的页帧数highest_memap_pfn;
| | 获取zone所管理的页对应的struct page实例,
| | 在struct page中的flags中标注各种标志;
| | 将页所隶属的pageblock的位图标记为MIGRATE_MOVABLE;
| |
| |---->zone_start_pfn += size;
void memmap_init_zone(unsigned long size, int nid,
unsigned long zone,
unsigned long start_pfn, enum memmap_context context)
|---->struct page *page = NULL;
| unsigned long end_pfn = start_pfn + size;
| unsigned long pfn = 0;
| struct zone *z = NULL;
|
|---->if(highest_memmap_pfn < end_pfn - 1)
| highest_memap_pfn = end_pfn - 1;
| 修正最高的页帧数
|
|---->z = &NODE_DATA(nid)->node_zones[zone];
| 获取需要操作的zone
|
|-->for(pfn = start_pfn; pfn < end_pfn; pfn++)
|-->page = pfn_to_page(pfn);
| 获取页帧号所对应的struct page实例地址
|
|-->set_page_links(page, zone, nid, pfn);
| |-->set_page_zone(page, zone);
| | 在struct page->flags中记录该页是属于哪个zone
| |-->set_page_node(page, node);
| | set_page_section(page, pfn_to_section_nr(pfn);
| | 对于单个node,实际上无需在page->flags中
| | 存储node,section信息.
| |
|-->init_page_count(page)
| |-->atomic_set(&page->_count, 1);
| | page的访问计数,当为0时,说明page是空闲的,当大于0的时
| | 候,说明page被一个或多个进程正在使用该页或者有进程在等待该页.
| | .
| |
|-->reset_page_mapcount(page)
| |-->atomic_set(&(page)->_mapcount, -1);
| |
|-->SetPageReserved(page);
| | 关于SetPageReserved请参阅:page-flags.h
| | 定义了许多宏以及page->flags各位的意义.
| |
|-->INIT_LIST_HEAD(&page->lru)
| |
|--->set_pageblock_migratetype(page, MIGRATE_MOVABLE);
| 实际上此处是先测试,若满足条件再执行,一般直接执行也没问题。
| 我们已经知道,内存中的一些页隶属于同一个pageblock,
| 而且内存所对应的zone中,已存储了管理pageblock的位图
| pageblock_flags的起始地址。此函数的任务在于将每个page
| 所属于的pageblock标记为MIGRATE_MOVABLE(即:属于该
| pageblock 中的页均MIGRATE_MOVABLE)
void setup_usemap(struct pglist_data *pgdat, struct zone *zone, unsigne long zonesize) |---->unsigned long usemapsize = usemap_size(zonesize); | 每个zone中的页按pageblock被分成几个block,一个 | pageblock所含页数为(1 << (MAX_ORDER - 1)),每个 | pageblock需要几个bit位来存储信息(这几个bit位的 | 作用,暂时不知道),usemap_size的作用就在于计算 | 该zone中的pageblock数所对应的bit位数,并转化成字节数. | |---->zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); | 将管理该zone中的pageblock的比特位图的起始地址 | 存入zone->pageblock_flags.
当对一个page做I/O操作的时候,I/O操作需要被锁住,防止不正确的数据被访问。进程在访问page前,调用wait_on_page()函数,使进程加入一个等待队列。访问完后,UnlockPage()函数解锁其他进程对page的访问。其他正在等待队列中的进程被唤 醒。每个page都可以有一个等待队列,但是太多的分离的等待队列使得花费太多的内存访问周期。替代的解决方法,就是将所有的队列放在struct zone数据结构中。 如果struct zone中只有一个队列,则当一个page unlock的时候,访问这个zone里内存page的所有休眠的进程将都被唤醒,这样就会出现拥堵(thundering herd)的问题。建立一个哈希表管理多个等待队列,能解解决这个问题,zone->wait_table就是这个哈希表。哈希表的方法可能还是会造成一些进程不必要的唤醒。
int init_currently_empty_zone(struct zone *zone,
unsigned long zone_start_pfn,
unsigned long size,
enum memmap_context_context)
|---->zone_wait_table_init(zone, size);
| 初始化zone下的hash表(用于进程等待页资源时使用,
| 我们可以将等待对列存放在各个struct page内,但是
| 这样会使struct page结构体空间太大,造成浪费,
| 因此放在了zone中,并用hash表实现).
| |---->zone->wait_table_hash_nr_entries =
| | wait_table_hash_nr_entries(size);
| | 获取所需的hash表的数组元素个数
| |
| |---->zone->wait_table_bits =
| | wait_table_bits(zone->wait_table_hash_nr_entries);
| | 获取值wait_table_hash_nr_entries中首个bit位值为1的序号
| | (从最低位0开始记起,例如1,则获取值为0)
| |
| |---->alloc_size = zone->wait_table_hash_nr_entries *
| | sizeof(wait_queue_head_t);
| | 获取所需的hash表的数组所需空间大小
| |
| |---->zone->wait_table = (wait_queue_head_t *)
| | alloc_bootmem_node(pgdat, alloc_size);
| | 分配hash表数组空间
| |
| |---->init_waitqueue_head(
| | zone->wait_table[0...wait_table_hash_nr_entries]);
| | 初始化各个队列头
| |
|---->pgdat->nr_zones = zone_idx(zone) + 1;
| 更新pgdat下的zone的数目
|
|---->zone->zone_start_pfn = zone_start_pfn;
|
|---->zone_init_free_lists(zone);
|-->for(order = 0; order < MAX_ORDER; order++)
| for(type = 0; type < MIGRATE_TYPES; type++)
| {INIT_LIST_HEAD(&zone->free_area[order].free_list[type]);
| zone->free_area[order].nr_free = 0;}
| 可以看出,每个zone除了被分为pageblock外,
| 还被分为数个free_area, 每个free_area又被
| 分为不同类型的free_list,各个free_area下
| 的各自的free_list所含的页数是下同的.
|
static void bootmem_init_node(int node, struct meminfo *mi,
unsigned int start_pfn, unsigned long end_pfn)
|---->unsigned long boot_pfn;
| unsigned int boot_pages;
| pg_data_t *pgdat;
| int i;
|
|---->for_each_nodebank(i, mi, node)
| i依次取得meminfo中的bank索引
| struct membank *bank = &mi->bank[i];
| if(!bank->highmem) map_memory_bank(bank);
| 对于低端内存所在的bank,需执行map_memory_bank(bank);
|
| map_memory_bank(bank)
|---->struct map_desc map;
| map.pfn = bank_pfn_start(bank);
| map.virtual = __phys_to_virt(bank_phys_start(bank));
| map.length = banks_phys_size(bank);
| map.type = MT_MEMORY;
| create_mapping(&map);
| |---->此处以超页映射(低端内存,一级页表即可完成映射,
| | ,减少TLB刷新)
|
|---->boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn)
| 对于低端内存,先用位图进行管理,获取bit位所需的页数
|
|---->boot_pfn = find_bootmap_pfn(node, mi, boot_pages);
| 获取内核结束地址的页号,作为寻找位图页的起始页
|
|---->pg_data_t *pgdat = NODE_DATA(node);
|---->init_bootmem_node(pgdat, boot_pfn, start_pfn, end_pfn);
|---->init_bootmem_core(pgdat->bdata, boot_pfn,
| start_pfn, end_pfn);
| 见后文对此函数的标注
|
|---->for_each_nodebank(i, mi, node)
| i依次取得meminfo中的bank索引
| struct membank *bank = &mi->bank[i];
| if(!bank->highmem)
| free_bootmem_node(pgdat, bank_phys_start(bank),
| bank_phys_size(bank));
| 对于低端内存所在的bank,需执行free_bootmem_node
|
| free_bootmem_node---->
| mark_bootmem_node(pgdat->bdata, start, end, 0, 0)
| start为低端内存起始物理页帧号,
| end为低端内存终止页帧号
|---->__free(bdata, sidx, eidx);
| sidx:低端内存起始页号(需减去bdata->node_min_pfn);
| eidx:低端内存终止页号(需减去bdata->node_min_pfn);
| 业务:将bdata中的页图标注为未被占用
|
|---->reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT,
| boot_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
| 业务:即将位图分配器自身所占用的内存标记为被占用
|---->mark_bootmem_node(pgdata->bdata, start, end, 1, 0);
| start:低端内存中,位图所占用的物理内存起始页帧号
| end:低端内存中,位图所占用的物理内存终止页帧号
|---->sidx = start - bdata->node_min_pfn;
| eidx = end - bdata->node_min_pfn;
|---->__reserve(bdata, sidx, eidx, flags)
| sidx:低端内存起始页号(需减去bdata->node_min_pfn);
| eidx:低端内存终止页号(需减去bdata->node_min_pfn);
| 业务:将bdata中的页图标注为被占用
|
unsigned long init_bootmem_core(bootmem_data_t *bdata,
unsigned long mapstart, unsigned long start, unsigned long end)
|---->bdata->node_bootmem_map = phys_to_virt(PFN_PHY(mapstart));
| bdata->node_bootmem_map存放位图页的虚拟地址
|---->bdata->node_min_pfn = start;
| 存放低端内存的起始物理页号
|---->bdata->node_low_pfn = end;
| 存放低端内存的结束物理页号
|
|---->link_bootmem(bdata);
| 将bdata按照node_min_pfn值的升序顺序插入到bdata_list链表中
|---->unsigned long mapsize = bootmap_bytes(end - sart);
| 获取位图所需的字节数
| memset(bdata->node_bootmem_map, 0XFF, mapsize);
| 将位图全部标记为已被占用(后期会再做修改, 注意文件系统位置)
static void devicemaps_init(struct machine_desc *mdesc)
|---->void *vectors = NULL;
| vectors = alloc_bootmem_low_pages(PAGE_SIZE);
| 为中断向量申请内存空间,
| 实际上仍是通过alloc_bootmem_core函数完成内存分配.
|
|---->for(addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
| pmd_clear(pmd_off_k(addr))
| 将VMALLOC_END ~ 4G的页表映射全部清除
|---->map.pfn = __phys_to_pfn(virt_to_phys(vectors));
| map.virtual = CONFIG_VECTORS_BASE;
| map.length = PAGE_SIZE;
| map.type = MT_HIGH_VECTORS;
| create_mapping(&map);
| 将物理地址和虚拟地址建立映射关系,此处即为:
| 为中断向量虚拟地址寻找一个物理页面,并且建立映射关系.
|
|---->mdesc->map_io()
| 为I/O空间建立映射,注意页表中cache的属性,
| 这部分完全和SOC设计相关,将需要建立的映射关系
| 存放于一个struct map_desc实例数组中,调用
| create_mapping完成I/O空间映射.
|---->local_flush_tlb_all();
| flush_cache_all();
| 同步硬件缓存与物理内存.
void kmap_init(void)
|---->pmd_t *pmd = pmd_off_k(PKMAP_BASE);
| 获取PKMAP_BASE虚拟地址所对应的一级页表项的的地址
| 关于PKMAP_BASE,网上有很多都说是接近4G,但是我只在
| X86上看到是这样,而ARM或者unicore都是:
| PAGE_OFFSET - PMD_SIZE 暂时不知作出改动的原因.
|---->pte_t *pte = alloc_bootmem_low_pages(
| PTRS_PER_PTR * sizeof(pte_t);
| PKMAP空间需做二级页表映射,此处获得二级页表的起始
| 地址.
|---->__pmd_populate(pmd, __pa(pte) |
| _PAGE_KERNEL_TABLE);
| 在相应的一级页表项中计入二级页表项的物理地址,并设置好
| 一级页表项的属性.
|---->pkmap_page_table = pte + PTRS_PER_PTE
| 记录PKMAP虚拟空间的二级页表项的物理末尾地址.
//request_stanard_resources描述地有些不准确
void request_standard_resources(struct meminfo *mi,
struct machine_desc *mdesc)
|---->kernel_code.start = virt_to_phys(_text);
| kernel_code.end = virt_to_phys(_etext - 1);
| kernel_data.strt = virt_to_phys(_data);
| kernel_data.end = virt_to_phys(_end - 1);
|
|---->for(i = 0; i < mi->nr_banks; i++)
| 此处是将meminfo的资源放入iomem_resource树中,
| 同时将内核镜像资源也放入iomem_resource树中.
| 注意内核镜像资源如何放入.
|
| struct *res = NULL;
| res = alloc_bootmem_low(sizeof(*res));
| res->name = "System RAM"
| res->start = mi->bank[i].start;
| res->end = mi->bank[i].start + mi->bank[i].size - 1;
| res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
| request_resource(&iomem_resource, res)
| 将内存资源放入iomem_resource树中.
|
| if(kernel_code.start >= res->start &&
| kernel_code.end <= res->end)
| request_resource(res, &kernel_code);
| if(kernel_data.start >= res->start &&
| kernel_data.end <= res->end)
| request_resource(res, &kernel_data);
| 将内核镜像资源放入iomem_resource树中.
bootmem_init
|---->bootmem_init_node
|---->map_memory_bank(bank) 对内存0x3000_0000,64M空间进行映射
|
|
|---->bootmem_bootmap_pages
|---->find_bootmap_pfn
|
|
|---->init_bootmem_node
|
|---->free_bootmem_node
|
|---->reserve_bootmem_node
|---->reserve_node_zero
|
|
|---->zone_size[0] = end_pfn - start_pfn
|
|
|---->arch_adjust_zones
|
|
|---->free_area_init_node
1. map_memory_bank
for_each_nodebank(i, mi, node) {
struct membank *bank = &mi->bank[i];
unsigned long start, end;
start = bank->start >> PAGE_SHIFT;
end = (bank->start + bank->size) >> PAGE_SHIFT;
if (start_pfn > start)
start_pfn = start;
if (end_pfn < end)
end_pfn = end;
map_memory_bank(bank);
}
static inline void map_memory_bank(struct membank *bank)
{
#ifdef CONFIG_MMU
struct map_desc map;
map.pfn = __phys_to_pfn(bank->start);
map.virtual = __phys_to_virt(bank->start);
map.length = bank->size;
map.type = MT_MEMORY;
/* 完成虚拟地址到物理地址的映射,直接填在初始化最开始的时候section位段处,映射大小是64M空间
map.pfn = 0x30000
map.virtual = 0xc0000000
map.length = 0x1000000
*/
create_mapping(&map);
#endif
}
这部分代码会调用底层的create_mapping函数对物理内存进行一个映射。映射的实际情况是:
map_memory_bank pfn=0x30000, vitual=0xc0000000, length=0x4000000, type=0x8
creatmap pgd=0xc0007000, addr=0xc0000000, next=0xc0200000, phys=0x30000000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x600
pmd=0xc0007000, pmdcontext=0x3000041e, addr=0xc0000000, end=0xc0200000, phys=0x30000000
pmd=0xc0007004, pmdcontext=0x3010041e, addr=0xc0100000, end=0xc0200000, phys=0x30100000
creatmap pgd=0xc0007008, addr=0xc0200000, next=0xc0400000, phys=0x30200000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x601
pmd=0xc0007008, pmdcontext=0x3020041e, addr=0xc0200000, end=0xc0400000, phys=0x30200000
pmd=0xc000700c, pmdcontext=0x3030041e, addr=0xc0300000, end=0xc0400000, phys=0x30300000
creatmap pgd=0xc0007010, addr=0xc0400000, next=0xc0600000, phys=0x30400000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x602
pmd=0xc0007010, pmdcontext=0x3040041e, addr=0xc0400000, end=0xc0600000, phys=0x30400000
pmd=0xc0007014, pmdcontext=0x3050041e, addr=0xc0500000, end=0xc0600000, phys=0x30500000
creatmap pgd=0xc0007018, addr=0xc0600000, next=0xc0800000, phys=0x30600000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x603
pmd=0xc0007018, pmdcontext=0x3060041e, addr=0xc0600000, end=0xc0800000, phys=0x30600000
pmd=0xc000701c, pmdcontext=0x3070041e, addr=0xc0700000, end=0xc0800000, phys=0x30700000
creatmap pgd=0xc0007020, addr=0xc0800000, next=0xc0a00000, phys=0x30800000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x604
pmd=0xc0007020, pmdcontext=0x3080041e, addr=0xc0800000, end=0xc0a00000, phys=0x30800000
pmd=0xc0007024, pmdcontext=0x3090041e, addr=0xc0900000, end=0xc0a00000, phys=0x30900000
creatmap pgd=0xc0007028, addr=0xc0a00000, next=0xc0c00000, phys=0x30a00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x605
pmd=0xc0007028, pmdcontext=0x30a0041e, addr=0xc0a00000, end=0xc0c00000, phys=0x30a00000
pmd=0xc000702c, pmdcontext=0x30b0041e, addr=0xc0b00000, end=0xc0c00000, phys=0x30b00000
creatmap pgd=0xc0007030, addr=0xc0c00000, next=0xc0e00000, phys=0x30c00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x606
pmd=0xc0007030, pmdcontext=0x30c0041e, addr=0xc0c00000, end=0xc0e00000, phys=0x30c00000
pmd=0xc0007034, pmdcontext=0x30d0041e, addr=0xc0d00000, end=0xc0e00000, phys=0x30d00000
creatmap pgd=0xc0007038, addr=0xc0e00000, next=0xc1000000, phys=0x30e00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x607
pmd=0xc0007038, pmdcontext=0x30e0041e, addr=0xc0e00000, end=0xc1000000, phys=0x30e00000
pmd=0xc000703c, pmdcontext=0x30f0041e, addr=0xc0f00000, end=0xc1000000, phys=0x30f00000
creatmap pgd=0xc0007040, addr=0xc1000000, next=0xc1200000, phys=0x31000000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x608
pmd=0xc0007040, pmdcontext=0x3100041e, addr=0xc1000000, end=0xc1200000, phys=0x31000000
pmd=0xc0007044, pmdcontext=0x3110041e, addr=0xc1100000, end=0xc1200000, phys=0x31100000
creatmap pgd=0xc0007048, addr=0xc1200000, next=0xc1400000, phys=0x31200000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x609
pmd=0xc0007048, pmdcontext=0x3120041e, addr=0xc1200000, end=0xc1400000, phys=0x31200000
pmd=0xc000704c, pmdcontext=0x3130041e, addr=0xc1300000, end=0xc1400000, phys=0x31300000
creatmap pgd=0xc0007050, addr=0xc1400000, next=0xc1600000, phys=0x31400000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x60a
pmd=0xc0007050, pmdcontext=0x3140041e, addr=0xc1400000, end=0xc1600000, phys=0x31400000
pmd=0xc0007054, pmdcontext=0x3150041e, addr=0xc1500000, end=0xc1600000, phys=0x31500000
creatmap pgd=0xc0007058, addr=0xc1600000, next=0xc1800000, phys=0x31600000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x60b
pmd=0xc0007058, pmdcontext=0x3160041e, addr=0xc1600000, end=0xc1800000, phys=0x31600000
pmd=0xc000705c, pmdcontext=0x3170041e, addr=0xc1700000, end=0xc1800000, phys=0x31700000
creatmap pgd=0xc0007060, addr=0xc1800000, next=0xc1a00000, phys=0x31800000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x60c
pmd=0xc0007060, pmdcontext=0x3180041e, addr=0xc1800000, end=0xc1a00000, phys=0x31800000
pmd=0xc0007064, pmdcontext=0x3190041e, addr=0xc1900000, end=0xc1a00000, phys=0x31900000
creatmap pgd=0xc0007068, addr=0xc1a00000, next=0xc1c00000, phys=0x31a00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x60d
pmd=0xc0007068, pmdcontext=0x31a0041e, addr=0xc1a00000, end=0xc1c00000, phys=0x31a00000
pmd=0xc000706c, pmdcontext=0x31b0041e, addr=0xc1b00000, end=0xc1c00000, phys=0x31b00000
creatmap pgd=0xc0007070, addr=0xc1c00000, next=0xc1e00000, phys=0x31c00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x60e
pmd=0xc0007070, pmdcontext=0x31c0041e, addr=0xc1c00000, end=0xc1e00000, phys=0x31c00000
pmd=0xc0007074, pmdcontext=0x31d0041e, addr=0xc1d00000, end=0xc1e00000, phys=0x31d00000
creatmap pgd=0xc0007078, addr=0xc1e00000, next=0xc2000000, phys=0x31e00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x60f
pmd=0xc0007078, pmdcontext=0x31e0041e, addr=0xc1e00000, end=0xc2000000, phys=0x31e00000
pmd=0xc000707c, pmdcontext=0x31f0041e, addr=0xc1f00000, end=0xc2000000, phys=0x31f00000
creatmap pgd=0xc0007080, addr=0xc2000000, next=0xc2200000, phys=0x32000000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x610
pmd=0xc0007080, pmdcontext=0x3200041e, addr=0xc2000000, end=0xc2200000, phys=0x32000000
pmd=0xc0007084, pmdcontext=0x3210041e, addr=0xc2100000, end=0xc2200000, phys=0x32100000
creatmap pgd=0xc0007088, addr=0xc2200000, next=0xc2400000, phys=0x32200000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x611
pmd=0xc0007088, pmdcontext=0x3220041e, addr=0xc2200000, end=0xc2400000, phys=0x32200000
pmd=0xc000708c, pmdcontext=0x3230041e, addr=0xc2300000, end=0xc2400000, phys=0x32300000
creatmap pgd=0xc0007090, addr=0xc2400000, next=0xc2600000, phys=0x32400000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x612
pmd=0xc0007090, pmdcontext=0x3240041e, addr=0xc2400000, end=0xc2600000, phys=0x32400000
pmd=0xc0007094, pmdcontext=0x3250041e, addr=0xc2500000, end=0xc2600000, phys=0x32500000
creatmap pgd=0xc0007098, addr=0xc2600000, next=0xc2800000, phys=0x32600000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x613
pmd=0xc0007098, pmdcontext=0x3260041e, addr=0xc2600000, end=0xc2800000, phys=0x32600000
pmd=0xc000709c, pmdcontext=0x3270041e, addr=0xc2700000, end=0xc2800000, phys=0x32700000
creatmap pgd=0xc00070a0, addr=0xc2800000, next=0xc2a00000, phys=0x32800000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x614
pmd=0xc00070a0, pmdcontext=0x3280041e, addr=0xc2800000, end=0xc2a00000, phys=0x32800000
pmd=0xc00070a4, pmdcontext=0x3290041e, addr=0xc2900000, end=0xc2a00000, phys=0x32900000
creatmap pgd=0xc00070a8, addr=0xc2a00000, next=0xc2c00000, phys=0x32a00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x615
pmd=0xc00070a8, pmdcontext=0x32a0041e, addr=0xc2a00000, end=0xc2c00000, phys=0x32a00000
pmd=0xc00070ac, pmdcontext=0x32b0041e, addr=0xc2b00000, end=0xc2c00000, phys=0x32b00000
creatmap pgd=0xc00070b0, addr=0xc2c00000, next=0xc2e00000, phys=0x32c00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x616
pmd=0xc00070b0, pmdcontext=0x32c0041e, addr=0xc2c00000, end=0xc2e00000, phys=0x32c00000
pmd=0xc00070b4, pmdcontext=0x32d0041e, addr=0xc2d00000, end=0xc2e00000, phys=0x32d00000
creatmap pgd=0xc00070b8, addr=0xc2e00000, next=0xc3000000, phys=0x32e00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x617
pmd=0xc00070b8, pmdcontext=0x32e0041e, addr=0xc2e00000, end=0xc3000000, phys=0x32e00000
pmd=0xc00070bc, pmdcontext=0x32f0041e, addr=0xc2f00000, end=0xc3000000, phys=0x32f00000
creatmap pgd=0xc00070c0, addr=0xc3000000, next=0xc3200000, phys=0x33000000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x618
pmd=0xc00070c0, pmdcontext=0x3300041e, addr=0xc3000000, end=0xc3200000, phys=0x33000000
pmd=0xc00070c4, pmdcontext=0x3310041e, addr=0xc3100000, end=0xc3200000, phys=0x33100000
creatmap pgd=0xc00070c8, addr=0xc3200000, next=0xc3400000, phys=0x33200000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x619
pmd=0xc00070c8, pmdcontext=0x3320041e, addr=0xc3200000, end=0xc3400000, phys=0x33200000
pmd=0xc00070cc, pmdcontext=0x3330041e, addr=0xc3300000, end=0xc3400000, phys=0x33300000
creatmap pgd=0xc00070d0, addr=0xc3400000, next=0xc3600000, phys=0x33400000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x61a
pmd=0xc00070d0, pmdcontext=0x3340041e, addr=0xc3400000, end=0xc3600000, phys=0x33400000
pmd=0xc00070d4, pmdcontext=0x3350041e, addr=0xc3500000, end=0xc3600000, phys=0x33500000
creatmap pgd=0xc00070d8, addr=0xc3600000, next=0xc3800000, phys=0x33600000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x61b
pmd=0xc00070d8, pmdcontext=0x3360041e, addr=0xc3600000, end=0xc3800000, phys=0x33600000
pmd=0xc00070dc, pmdcontext=0x3370041e, addr=0xc3700000, end=0xc3800000, phys=0x33700000
creatmap pgd=0xc00070e0, addr=0xc3800000, next=0xc3a00000, phys=0x33800000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x61c
pmd=0xc00070e0, pmdcontext=0x3380041e, addr=0xc3800000, end=0xc3a00000, phys=0x33800000
pmd=0xc00070e4, pmdcontext=0x3390041e, addr=0xc3900000, end=0xc3a00000, phys=0x33900000
creatmap pgd=0xc00070e8, addr=0xc3a00000, next=0xc3c00000, phys=0x33a00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x61d
pmd=0xc00070e8, pmdcontext=0x33a0041e, addr=0xc3a00000, end=0xc3c00000, phys=0x33a00000
pmd=0xc00070ec, pmdcontext=0x33b0041e, addr=0xc3b00000, end=0xc3c00000, phys=0x33b00000
creatmap pgd=0xc00070f0, addr=0xc3c00000, next=0xc3e00000, phys=0x33c00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x61e
pmd=0xc00070f0, pmdcontext=0x33c0041e, addr=0xc3c00000, end=0xc3e00000, phys=0x33c00000
pmd=0xc00070f4, pmdcontext=0x33d0041e, addr=0xc3d00000, end=0xc3e00000, phys=0x33d00000
creatmap pgd=0xc00070f8, addr=0xc3e00000, next=0xc4000000, phys=0x33e00000, type=0xc0360390, intit_mm_pgd=0xc0004000, pgd_index=0x61f
pmd=0xc00070f8, pmdcontext=0x33e0041e, addr=0xc3e00000, end=0xc4000000, phys=0x33e00000
pmd=0xc00070fc, pmdcontext=0x33f0041e, addr=0xc3f00000, end=0xc4000000, phys=0x33f00000
unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
unsigned long startpfn, unsigned long endpfn)
{
return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
}
该函数作用:
调用init_bootmem_core函数,初始化pgdat变量中的成员
先看一下pgdat变量的结构
typedef struct bootmem_data {
unsigned long node_min_pfn; /*表示内存起始页帧号*/
unsigned long node_low_pfn; /*表示内存结束页帧号*/
void *node_bootmem_map; /*表示bootmam_map的起始物理地址*/
unsigned long last_end_off; /*表示上一次分配截止地址的偏移。*/
unsigned long hint_idx; /*存放前一次分配的最后一个页面号*/
struct list_head list;
} bootmem_data_t;
static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
unsigned long mapstart, unsigned long start, unsigned long end)
{
unsigned long mapsize;
mminit_validate_memmodel_limits(&start, &end);//空函数
bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
bdata->node_min_pfn = start;
bdata->node_low_pfn = end;
link_bootmem(bdata);
/*
* Initially all pages are reserved - setup_arch() has to
* register free RAM areas explicitly.
*/
mapsize = bootmap_bytes(end - start);
memset(bdata->node_bootmem_map, 0xff, mapsize);
bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
bdata - bootmem_node_data, start, mapstart, end, mapsize);
return mapsize;
}
假如调用函数传进来的参数值如下:
mapstart = 0x30431; start = 0x30000; end = 0x31000;
那么:
bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
#define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT)
bdata->node_bootmem_map = phys_to_virt(0x30431000) = 0xc0431000
bdata->node_min_pfn = start = 0x30000
bdata->node_low_pfn = end = 0x31000
mapsize = bootmap_bytes(end - start) = 512
memset(bdata->node_bootmem_map, 0xff, mapsize) 也即
memset(bdata->node_bootmem_map, 0xff, 512)
到这里就可以看出该函数的主要作用:
1,初始化pgdat变量中的主要成员,这些成员在随后即将用到。
2,将用来管理内存状况的那512个字节的数据全部赋值为0xff,也即将4096位全部置1