linux内核启动时如何知道系统有多少可用内存呢?
我们可以猜猜大概有这么几种方式:
1.内核写死;
2.通过启动参数传递给内核;
3.内核代码自动探测可用内存大小;
具体是怎样的呢,下面结合代码具体分析一下。
start_kernel()启动后调用setup_arch()对平台相关的代码进行初始化。
[arch/mips/kernel/setup.c]
void __init setup_arch(char **cmdline_p)
{
cpu_probe();
prom_init();
cpu_report();
check_bugs_early();
...
arch_mem_init(cmdline_p);
resource_init();
plat_smp_setup();
}
[arch/mips/brcmstb/prom.c]
void __init prom_init(void)
{
char *ptr;
cfe_init(cfe_handle, cfe_entry);
...
cfe_read_configuration();
brcm_setup_early_printk();
...
board_get_ram_size(&
brcm_dram0_size_mb, &brcm_dram1_size_mb);
do {
unsigned long dram0_mb = brcm_dram0_size_mb, mb;
mb = min(dram0_mb, BRCM_MAX_LOWER_MB);
//#define BRCM_MAX_LOWER_MB 256
dram0_mb -= mb;
add_memory_region(0, mb << 20, BOOT_MEM_RAM);
if (!dram0_mb)
break;
#ifdef CONFIG_BRCM_UPPER_MEMORY
//物理内存大于256M
mb = min(dram0_mb, BRCM_MAX_UPPER_MB);
//#define BRCM_MAX_UPPER_MB _AC(768, UL)
dram0_mb -= mb;
plat_wired_tlb_setup();
add_memory_region(UPPERMEM_START, mb << 20, BOOT_MEM_RAM);
//#define UPPERMEM_START _AC(0x20000000, UL)
if (!dram0_mb)
break;
#endif
/*
* We wound up here because the chip's architecture cannot
* make use of all MEMC0 RAM in Linux. i.e. no suitable
* HIGHMEM or upper memory options are supported by the CPU.
*
* But we can still report the excess memory as a "bonus"
* reserved (bmem) region, so the application can manage it.
*/
mb = brcm_dram0_size_mb - dram0_mb; /* Linux memory */
if (!brcm_dram1_size_mb) {
printk(KERN_INFO "MEMC0 split: %lu MB -> Linux; "
"%lu MB -> extra bmem\n", mb, dram0_mb);
brcm_dram1_size_mb = dram0_mb;
brcm_dram1_start = UPPERMEM_START;
}
} while (0);
...
board_ebase_setup = &bmips_ebase_setup;
register_smp_ops(&bmips_smp_ops);
}
static void __init __maybe_unused cfe_read_configuration(void)
{
FETCH("DRAM0_SIZE", parse_ulong, &
brcm_dram0_size_mb);
FETCH("DRAM1_SIZE", parse_ulong, &
brcm_dram1_size_mb);
...
printk(KERN_CONT "found %d vars.\n", fetched);
}
broadcom的boot是cfe,这里就是从boot中得到DRAM的大小。
void __init board_get_ram_size(unsigned long *dram0_mb, unsigned long *dram1_mb)
{
#if defined(CONFIG_BRCM_OVERRIDE_RAM_SIZE)
//内核配置中指定
*dram0_mb = CONFIG_BRCM_FORCED_DRAM0_SIZE;
#if defined(CONFIG_BRCM_FORCED_DRAM1_SIZE)
*dram1_mb = CONFIG_BRCM_FORCED_DRAM1_SIZE;
#endif
pr_info("Using %lu MB + %lu MB RAM (from kernel configuration)\n",
*dram0_mb, *dram1_mb);
#else
/* DRAM0_SIZE variable from CFE */
if (*dram0_mb) {
//如果非零,说明已经从cfe中取得了,则只是打印显示出来
pr_info("Using %lu MB + %lu MB RAM (from CFE)\n",
*dram0_mb, *dram1_mb);
return;
}
*dram0_mb = probe_ram_size();
//探测DRAM的大小
#endif
}
static inline unsigned int __init probe_ram_size(void)
{
unsigned long addr = KSEG1, taddr;
uint32_t olddata;
unsigned long flags;
unsigned int i, memsize = 256;
pr_info("Probing system memory size... ");
local_irq_save(flags);
//关闭中断
cache_op(Hit_Writeback_Inv_D, KSEG0);
//writeback数据cache,并使之无效
olddata = DEV_RD(addr);
//读出物理0地址处的数据
/*
* Try to figure out where memory wraps around. If it does not
* wrap, assume 256MB
*/
for (i = 4; i <= 128; i <<= 1) {
taddr = KSEG1 + i * 1048576;
//1048576刚好等于1M大小
DEV_WR(addr, MAGIC0);
//写物理0地址处的数据
if (DEV_RD(taddr) == MAGIC0) {
//读出taddr地址处的值,如果和0地址处写入的值相等,说明发生了wraps around, 则taddr就是DRAM的大小
DEV_WR(addr, MAGIC1);
//写入其他的值再测试一遍
if (DEV_RD(taddr) == MAGIC1) {
memsize = i;
break;
}
}
}
DEV_WR(addr, olddata);
//恢复0地址处原来的值
cache_op(Hit_Writeback_Inv_D, KSEG0);
local_irq_restore(flags);
pr_cont("found %u MB\n", memsize);
return memsize;
}
上面这段代码其实非常好理解,假设系统是8M内存,那么第一次循环,首先0地址写入MAGIC0,那么4M处读出来的肯定不是MAGIC0,进行第二次循环,0地址写入MAGIC0,因为系统内存8M, 寻址物理空间是0-0x7FFFFF,物理内存只需要23根地址线,0x7FFFFF就是23根地址线上全是1,寻址最后一个8M单元,那么0x800000去寻址呢,会怎样?第24根地址上是1,低23根地址线全0,但是系统内存只有8M,第24根地址线悬空,只有低23根地址线用于寻址,所以0x800000跟0地址寻址的都是第一个内存单元,这就是wraps around,所以读取出来的就是MAGIC0,这时显然就相等了,于是就得到了系统内存8M.
如果支持BRCM_HAS_XKS01,那么开启后内核地址空间的映射变为:
/*
* 1024MB Broadcom 256+768 virtual address map
*
* 8000_0000 - 8fff_ffff: 256MB RAM @ 0000_0000, cached
* 9000_0000 - 9fff_ffff: 256MB EBI/Registers @ 1000_0000, uncached
* a000_0000 - cfff_ffff: 768MB RAM @ 2000_0000, cached
* d000_0000 - dfff_ffff: TBD
* e000_0000 - ff1f_7fff: vmalloc region
* ff1f_8000 - ff1f_ffff: FIXMAP
* ff40_0000 - ff7f_ffff: CONSISTENT region
*
* PA 5000_0000 and above are accessed through HIGHMEM (BMIPS5000 only).
*/
此时KSEG0空间拓展为1024M,0x8000_0000---cfff_ffff内核地址空间不通过TLB直接映射到Vaddr-0x8000_0000物理地址空间。
通过add_memory_region()记录物理内存分布
void __init add_memory_region(phys_t start, phys_t size, long type)
{
int x = boot_mem_map.nr_map;
struct boot_mem_map_entry *prev = boot_mem_map.map + x - 1;
/* Sanity check */
if (start + size < start) {
pr_warning("Trying to add an invalid memory region, skipped\n");
return;
}
/*
* Try to merge with previous entry if any. This is far less than
* perfect but is sufficient for most real world cases.
*/
if (x && prev->addr + prev->size == start && prev->type == type) {
prev->size += size;
return;
}
if (x == BOOT_MEM_MAP_MAX) {
pr_err("Ooops! Too many entries in the memory map!\n");
return;
}
boot_mem_map.map[x].addr = start;
boot_mem_map.map[x].size = size;
boot_mem_map.map[x].type = type;
boot_mem_map.nr_map++;
}
当然也可以直接通过启动参数mem=xxxx@xxxxx指定物理内存的分布情况:
static int __init early_parse_mem(char *p)
{
unsigned long start, size;
/*
* If a user specifies memory size, we
* blow away any automatically generated
* size.
*/
if (usermem == 0) {
boot_mem_map.nr_map = 0;
usermem = 1;
}
start = 0;
size = memparse(p, &p);
if (*p == '@')
start = memparse(p + 1, &p);
add_memory_region(start, size, BOOT_MEM_RAM);
return 0;
}
early_param("mem", early_parse_mem);
static void __init
arch_mem_init(char **cmdline_p)
{
phys_t init_mem, init_end, init_size;
extern void plat_mem_setup(void);
/* call board setup routine */
plat_mem_setup();
init_mem = PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT;
init_end = PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT;
init_size = init_end - init_mem;
if (init_size) {
/* Make sure it is in the boot_mem_map */
int i, found;
found = 0;
for (i = 0; i < boot_mem_map.nr_map; i++) {
if (init_mem >= boot_mem_map.map[i].addr &&
init_mem < (boot_mem_map.map[i].addr +
boot_mem_map.map[i].size)) {
found = 1;
break;
}
}
if (!found)
add_memory_region(init_mem, init_size,
BOOT_MEM_INIT_RAM);
}
...
bootmem_init();
device_tree_init();
sparse_init();
plat_swiotlb_setup();
paging_init();
}
除了记录了物理内存的分布外,另外还记录了init段的分布。
那么记录着物理内存的分布之后该如何管理呢,请看下一节:linux 内存管理---bootmem(三)