有关临时页表的建立和主内核页表的建立swapper_pg_dir
1.swapper_pg_dir的地址是怎样决定的
.equ swapper_pg_dir, KERNEL_RAM_VADDR - PG_DIR_SIZE
其中KERNEL_RAM_VADDR == KERNEL_START,而KERNEL_START = 0xc0008000;
又 PG_DIR_SIZE = 0x4000,
所以:swapper_pg_dir = 0xc0008000 - 0x4000 = 0xc0004000,也就是kernel代码段
上面的16K.
/*
* swapper_pg_dir is the virtual address of the initial page table.
* We place the page tables 16K below KERNEL_RAM_VADDR. Therefore, we must
* make sure that KERNEL_RAM_VADDR is correctly set. Currently, we expect
* the least significant 16 bits to be 0x8000, but we could probably
* relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x4000.
*/
.globl swapper_pg_dir
.equ swapper_pg_dir, KERNEL_RAM_VADDR - PG_DIR_SIZE
#define PG_DIR_SIZE 0x4000
#define PMD_ORDER 2
先已知swapper_pd_dir的地址是0xc0004000, 所以可知道KERNEL_RAM_VADDR = 0xc0008000;
#define KERNEL_START KERNEL_RAM_VADDR,
也就是KERNEL_START = 0xc0008000。
也就是kernel的入口地址
ENTRY(stext)
可知代码的入口是stext
crash> sym stext
c0008000 (T) stext
crash> dis stext
0xc0008000 <.head.text>: msr CPSR_c, #211 ; 0xd3
2.硬件相关的RAM地址是怎样告诉内核的
0xc0008014 <stext+0x14>: add r3, pc, #44 ; 0x2c
0xc0008018 <stext+0x18>: ldm r3, {r4, r8}
0xc000801c <stext+0x1c>: sub r4, r3, r4
0xc0008020 <stext+0x20>: add r8, r8, r4
r3根据pc找到数据段的物理地址,此时pc是物理地址,这里使用的是相对地址;
把数据段的内容load到r4, r8,此时r4是r3对应的虚拟地址,根据这个线性对应关系
得到PAGE_OFFSET对应的物理地址:PHYS_OFFSET
.long .
.long PAGE_OFFSET
从这里可以看出PHYS_OFFSET不是用参数直接传进来的,而是上面的方法得到的。
3.__create_page_tables
3.1 Clear the swapper page table
r8:物理地址PHYS_OFFSET,
r4:物理地址swapper_pg_dir
该代码的意思是clean the swapper_pg_dir 64K region
0xc0008050 <__create_page_tables>: add r4, r8, #16384 ; 0x4000
0xc0008054 <__create_page_tables+0x4>: mov r0, r4
0xc0008058 <__create_page_tables+0x8>: mov r3, #0
0xc000805c <__create_page_tables+0xc>: add r6, r0, #16384 ; 0x4000
0xc0008060 <__create_page_tables+0x10>: str r3, [r0], #4
0xc0008064 <__create_page_tables+0x14>: str r3, [r0], #4
0xc0008068 <__create_page_tables+0x18>: str r3, [r0], #4
0xc000806c <__create_page_tables+0x1c>: str r3, [r0], #4
0xc0008070 <__create_page_tables+0x20>: teq r0, r6
0xc0008074 <__create_page_tables+0x24>: bne 0xc0008060
3.2 Create identity mapping to cater for __enable_mmu
0xc0008078 <__create_page_tables+0x28>: ldr r7, [r10, #8]/*根据物理地址寻址,得到变量的虚拟地址,根据线性关系又得到变量的物理地址*/
0xc000807c <__create_page_tables+0x2c>: add r0, pc, #128 ; 0x80
0xc0008080 <__create_page_tables+0x30>: ldm r0, {r3, r5, r6}
/*virt->phys offset*/
0xc0008084 <__create_page_tables+0x34>: sub r0, r0, r3
/*phys __turn_mmu_on, phys __turn_mmu_on_end*/
0xc0008088 <__create_page_tables+0x38>: add r5, r5, r0
0xc000808c <__create_page_tables+0x3c>: add r6, r6, r0
/*物理地址右移20位,再左移2位,得到对应的tabale index【index不是应该来自虚拟地址吗?】
*由base address+index,就可得到descriptor的地址
*
*描述符的内容来自PROCINFO_MM_MMUFLAGS,
*/
0xc0008090 <__create_page_tables+0x40>: lsr r5, r5, #20
0xc0008094 <__create_page_tables+0x44>: lsr r6, r6, #20
0xc0008098 <__create_page_tables+0x48>: orr r3, r7, r5, lsl #20
0xc000809c <__create_page_tables+0x4c>: str r3, [r4, r5, lsl #2]
0xc00080a0 <__create_page_tables+0x50>: cmp r5, r6
0xc00080a4 <__create_page_tables+0x54>: addcc r5, r5, #1
0xc00080a8 <__create_page_tables+0x58>: bcc 0xc0008098 <__create_page_tables+72>
3.3 setup the pagetables for our kernel direct mapped region.
0xc00080ac <__create_page_tables+0x5c>: mov r3, pc /*PC为物理地址*/
0xc00080b0 <__create_page_tables+0x60>: lsr r3, r3, #20
0xc00080b4 <__create_page_tables+0x64>: orr r3, r7, r3, lsl #20
0xc00080b8 <__create_page_tables+0x68>: add r0, r4, #12288 ; 0x3000
0xc00080bc <__create_page_tables+0x6c>: str r3, [r0, #0]!
/*r0: table address, r4: base
*r6的内容是虚地址
*crash> rd 0xc0008100 :c0008100: c0815e33
*c0815e33 >> 18: 0x00003000这里和 add r0, r4, #12288 ; 0x3000对应了
*/
0xc00080c0 <__create_page_tables+0x70>: ldr r6, [pc, #56] ; 0xc0008100
0xc00080c4 <__create_page_tables+0x74>: add r0, r0, #4
0xc00080c8 <__create_page_tables+0x78>: add r6, r4, r6, lsr #18
0xc00080cc <__create_page_tables+0x7c>: cmp r0, r6
0xc00080d0 <__create_page_tables+0x80>: add r3, r3, #1048576 ; 0x100000
0xc00080d4 <__create_page_tables+0x84>: strls r3, [r0], #4
0xc00080d8 <__create_page_tables+0x88>: bls 0xc00080cc <__create_page_tables+124>
建立页表内容的过程是:
1.由物理地址右移20位,再左移20位;2.得到控制位;
3.物理地址和控制位oor;
4.把内容写到相应的地址中,4字节对齐,the 2 least 00.
4.进入C语言后的页表创建
paging_init -> create_mapping -> pgd = pgd_offset_k(addr);/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
struct mm_struct init_mm = {
.mm_rb = RB_ROOT,
.pgd = swapper_pg_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
INIT_MM_CONTEXT(init_mm)
};
5. the procedure of page creation
setup_arch:
parse_early_param();
sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
sanity_check_meminfo();
arm_memblock_init(&meminfo, mdesc);
struct memblock {
phys_addr_t current_limit;
struct memblock_type memory;
struct memblock_type reserved;
};
memblock_type分两种一种为reserverd,即已经有固定用途
void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
{
int i;
/*1. regions difined in meminfo*/
for (i = 0; i < mi->nr_banks; i++)
memblock_add(mi->bank[i].start, mi->bank[i].size);
/*2. Register the kernel text, kernel data and initrd with memblock. */
memblock_reserve(__pa(_stext), _end - _stext);
arm_mm_memblock_reserve();
arm_dt_memblock_reserve();
/*3. reserve any platform specific memblock areas */
if (mdesc->reserve)
mdesc->reserve();
/*4.
* reserve memory for DMA contigouos allocations,
* must come from DMA area inside low memory
*/
dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));
arm_memblock_steal_permitted = false;
memblock_allow_resize();
memblock_dump_all();
}
/*
* paging_init() sets up the page tables, initialises the zone memory
* maps, and sets up the zero page, bad page and bad page tables.
*/
void __init paging_init(struct machine_desc *mdesc)
{
void *zero_page;
memblock_set_current_limit(arm_lowmem_limit);
build_mem_type_table();
prepare_page_table();
map_lowmem();
dma_contiguous_remap();
devicemaps_init(mdesc);
kmap_init();
top_pmd = pmd_off_k(0xffff0000);
/* allocate the zero page. */
zero_page = early_alloc(PAGE_SIZE);
bootmem_init();
empty_zero_page = virt_to_page(zero_page);
__flush_dcache_page(NULL, empty_zero_page);
}
struct mem_type {
pteval_t prot_pte;
pmdval_t prot_l1;
pmdval_t prot_sect;
unsigned int domain;
};
typedef u32 pteval_t;
typedef u32 pmdval_t;
/*
* Architecture ioremap implementation.
*/
#define MT_DEVICE 0
#define MT_DEVICE_NONSHARED 1
#define MT_DEVICE_CACHED 2
#define MT_DEVICE_WC 3
/* types 0-3 are defined in asm/io.h */
#define MT_UNCACHED 4
#define MT_CACHECLEAN 5
#define MT_MINICLEAN 6
#define MT_LOW_VECTORS 7
#define MT_HIGH_VECTORS 8
#define MT_MEMORY 9
#define MT_ROM 10
#define MT_MEMORY_NONCACHED 11
#define MT_MEMORY_DTCM 12
#define MT_MEMORY_ITCM 13
#define MT_MEMORY_SO 14
#define MT_MEMORY_DMA_READY 15
crash> p mem_types
mem_types = $18 =
{{
prot_pte = 0x653,
prot_l1 = 0x41,
prot_sect = 0x11452,
domain = 0x2
}, {
prot_pte = 0x273,
prot_l1 = 0x41,
prot_sect = 0x1452,
domain = 0x2
}, {
prot_pte = 0x66f,
prot_l1 = 0x41,
prot_sect = 0x1045e,
domain = 0x2
}, {
prot_pte = 0x667,
prot_l1 = 0x41,
prot_sect = 0x10456,
domain = 0x2
}, {
prot_pte = 0x243,
prot_l1 = 0x41,
prot_sect = 0x52,
domain = 0x2
}, {
prot_pte = 0x0,
prot_l1 = 0x0,
prot_sect = 0x841e,
domain = 0x0
}, {
prot_pte = 0x0,
prot_l1 = 0x0,
prot_sect = 0x941a,
domain = 0x0
}, {
prot_pte = 0x4df,
prot_l1 = 0x21,
prot_sect = 0x0,
domain = 0x1
}, {
prot_pte = 0x5df,
prot_l1 = 0x21,
prot_sect = 0x0,
domain = 0x1
}, {
prot_pte = 0x45f,
prot_l1 = 0x1,
prot_sect = 0x1140e,
domain = 0x0
}, {
prot_pte = 0x0,
prot_l1 = 0x0,
prot_sect = 0x940e,
domain = 0x0
}, {
prot_pte = 0x447,
prot_l1 = 0x1,
prot_sect = 0x10406,
domain = 0x0
}, {
prot_pte = 0x243,
prot_l1 = 0x1,
prot_sect = 0x12,
domain = 0x0
}, {
prot_pte = 0x43,
prot_l1 = 0x1,
prot_sect = 0x0,
domain = 0x0
}, {
prot_pte = 0x43,
prot_l1 = 0x1,
prot_sect = 0x10412,
domain = 0x0
}, {
prot_pte = 0x45f,
prot_l1 = 0x1,
prot_sect = 0x0,
domain = 0x0
}}
static inline void prepare_page_table(void)
{
unsigned long addr;
phys_addr_t end;
/*
* Clear out all the mappings below the kernel image.
*/
for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
pmd_clear(pmd_off_k(addr));
for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
pmd_clear(pmd_off_k(addr));
/*
* Find the end of the first block of lowmem.
*/
end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
if (end >= arm_lowmem_limit)
end = arm_lowmem_limit;
/*
* Clear out all the kernel space mappings, except for the first
* memory bank, up to the vmalloc region.
*/
for (addr = __phys_to_virt(end);
addr < VMALLOC_START; addr += PMD_SIZE)
pmd_clear(pmd_off_k(addr));
}
static inline pmd_t *pmd_off_k(unsigned long virt)
{
return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
}
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
/* to find an entry in a page-table-directory */
#define pgd_index(addr) ((addr) >> PGDIR_SHIFT)
#define PMD_SHIFT 21
#define PGDIR_SHIFT 21
static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
{
return (pud_t *)pgd;
}
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
{
return (pmd_t *)pud;
}
从外想内, pud_offset(pgd_offset_k(virt), virt) -> pgd_offset_k(virt) -> pgd_offset(&init_mm, addr)
-> ((mm)->pgd + pgd_index(addr))
#define pmd_clear(pmdp) \
do { \
pmdp[0] = __pmd(0); \
pmdp[1] = __pmd(0); \
clean_pmd_entry(pmdp); \
} while (0)
static inline void clean_pmd_entry(void *pmd)
{
const unsigned int __tlb_flag = __cpu_tlb_flags;
tlb_op(TLB_DCLEAN, "c7, c10, 1 @ flush_pmd", pmd);
tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1 @ L2 flush_pmd", pmd);
}
#define __pte(x) (x)
#define __pmd(x) (x)
#define __pgprot(x) (x)
static void __init map_lowmem(void)
{
struct memblock_region *reg;
phys_addr_t start;
phys_addr_t end;
struct map_desc map;
/* Map all the lowmem memory banks. */
for_each_memblock(memory, reg) {
start = reg->base;
end = start + reg->size;
if (end > arm_lowmem_limit)
end = arm_lowmem_limit;
if (start >= end)
break;
map.pfn = __phys_to_pfn(start);
map.virtual = __phys_to_virt(start);
map.length = end - start;
map.type = MT_MEMORY;
create_mapping(&map, false);
}
}
/*
* Create the page directory entries and any necessary
* page tables for the mapping specified by `md'. We
* are able to cope here with varying sizes and address
* offsets, and we take full advantage of sections and
* supersections.
*/
static void __init create_mapping(struct map_desc *md, bool force_pages)
{
unsigned long addr, length, end;
phys_addr_t phys;
const struct mem_type *type;
pgd_t *pgd;
type = &mem_types[md->type];
addr = md->virtual & PAGE_MASK;
phys = __pfn_to_phys(md->pfn);
length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
pgd = pgd_offset_k(addr);
end = addr + length;
do {
unsigned long next = pgd_addr_end(addr, end);
alloc_init_pud(pgd, addr, next, phys, type, force_pages);
phys += next - addr;
addr = next;
} while (pgd++, addr != end);
}
static void __init alloc_init_section(pud_t *pud, unsigned long addr,
unsigned long end, phys_addr_t phys,
const struct mem_type *type,
bool force_pages)
{
pmd_t *pmd = pmd_offset(pud, addr);
/*
* Try a section mapping - end, addr and phys must all be aligned
* to a section boundary. Note that PMDs refer to the individual
* L1 entries, whereas PGDs refer to a group of L1 entries making
* up one logical pointer to an L2 table.
*/
if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0 &&
!force_pages) {
pmd_t *p = pmd;
if (addr & SECTION_SIZE)
pmd++;
do {
*pmd = __pmd(phys | type->prot_sect);
phys += SECTION_SIZE;
} while (pmd++, addr += SECTION_SIZE, addr != end);
flush_pmd_entry(p);
} else {
/*
* No need to loop; pte's aren't interested in the
* individual L1 entries.
*/
alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
}
}
static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long pfn,
const struct mem_type *type)
{
pte_t *start_pte = early_pte_alloc(pmd);
pte_t *pte = start_pte + pte_index(addr);
/* If replacing a section mapping, the whole section must be replaced */
BUG_ON(pmd_bad(*pmd) && ((addr | end) & ~PMD_MASK));
do {
set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
pfn++;
} while (pte++, addr += PAGE_SIZE, addr != end);
early_pte_install(pmd, start_pte, type->prot_l1);
}
arch/arm/include/asm/glue-proc.h:
#ifdef CONFIG_CPU_V7
# ifdef CPU_NAME
# undef MULTI_CPU
# define MULTI_CPU
# else
# define CPU_NAME cpu_v7
# endif
#endif
#ifndef MULTI_CPU
#define cpu_proc_init __glue(CPU_NAME,_proc_init)
#define cpu_proc_fin __glue(CPU_NAME,_proc_fin)
#define cpu_reset __glue(CPU_NAME,_reset)
#define cpu_do_idle __glue(CPU_NAME,_do_idle)
#define cpu_dcache_clean_area __glue(CPU_NAME,_dcache_clean_area)
#define cpu_do_switch_mm __glue(CPU_NAME,_switch_mm)
#define cpu_set_pte_ext __glue(CPU_NAME,_set_pte_ext)
#define cpu_suspend_size __glue(CPU_NAME,_suspend_size)
#define cpu_do_suspend __glue(CPU_NAME,_do_suspend)
#define cpu_do_resume __glue(CPU_NAME,_do_resume)
#endif
arch/arm/mm/proc-v7-2level.S
/*
* cpu_v7_set_pte_ext(ptep, pte)
*
* Set a level 2 translation table entry.
*
* - ptep - pointer to level 2 translation table entry
* (hardware version is stored at +2048 bytes)
* - pte - PTE value to store
* - ext - value for extended PTE bits
*/
ENTRY(cpu_v7_set_pte_ext)
str r1, [r0] @ linux version
bic r3, r1, #0x000003f0
bic r3, r3, #PTE_TYPE_MASK
orr r3, r3, r2
orr r3, r3, #PTE_EXT_AP0 | 2
tst r1, #1 << 4
orrne r3, r3, #PTE_EXT_TEX(1)
eor r1, r1, #L_PTE_DIRTY
tst r1, #L_PTE_RDONLY | L_PTE_DIRTY
orrne r3, r3, #PTE_EXT_APX
tst r1, #L_PTE_USER
orrne r3, r3, #PTE_EXT_AP1
tst r1, #L_PTE_XN
orrne r3, r3, #PTE_EXT_XN
tst r1, #L_PTE_YOUNG
tstne r1, #L_PTE_PRESENT
moveq r3, #0
ARM( str r3, [r0, #2048]! )
mcr p15, 0, r0, c7, c10, 1 @ flush_pte
mov pc, lr
ENDPROC(cpu_v7_set_pte_ext)