下周准备做linux启动的技术讲座,在这里我慢慢整理下自己的材料,这次我写的是Image的启动过程,也即使zImage解压缩结束后的启动代码,这时候的代码开始地址仍然是0x30008000,下面我结合代码来讲吧:
Image的启动代码是在/arch/arm/kernel/head.S中的:
__INIT
.type
stext, %function
ENTRY(stext)
msr
cpsr_c, #PSR_F_BIT | PSR_I_BIT | MODE_SVC @ ensure svc mode
@ and irqs disabled
bl
__lookup_processor_type
@ r5=procinfo r9=cpuid
.type
__lookup_processor_type, %function
__lookup_processor_type:
adr
r3, 3f
ldmda
r3, {r5, r6, r9}
@ ldmda弹栈顺序是从右到左,[r3]->r9,[r3-4]->r6,[r3-8]->r5
sub
r3, r3, r9
@ get offset between virt&phys
// r3 = 加载地址和连接地址的差值
//现在,r5 = __proc_info_begin 的加载地址,即在 RAM 中的地址
add
r5, r5, r3
@ convert virt addresses to
add
r6, r6, r3
@ physical address space
mrc
p15, 0, r9, c0, c0
@ get processor id协处理器指令获取cpu id号r9=0x41807202(sep4020)
//ldmia弹栈顺序是从左到右,[r5]->r3,[r5+4]->r4 ,即低地址的内容放到低编号的寄存器,高地址的内容放到高编号的寄存器,指令结束后r5的指依然为_arm720_proc_info
1:
ldmia
r5, {r3, r4}
@ value, mask
and
r4, r4, r9
@ mask wanted bits
//将r9屏上0xffffff00看sep4020是否是arm720t的内核
teq
r3, r4
beq
2f
@若是arm720t内核则直接跳转到标签2
add
r5, r5, #PROC_INFO_SZ
@ sizeof(proc_info_list)=48
cmp
r5, r6
blo
1b
mov
r5, #0
@ unknown processor
2:
mov
pc, lr
ENTRY(lookup_processor_type)
stmfd
sp!, {r4 - r6, r9, lr}
bl
__lookup_processor_type
mov
r0, r5
ldmfd
sp!, {r4 - r6, r9, pc}
.long
__proc_info_begin
.long
__proc_info_end
3:
.long
.
.long
__arch_info_begin
.long
__arch_info_end
movs
r10, r5
@ 是有效720t核吗
(r5=0)?
beq
__error_p
@ yes, error 'p'
bl
__lookup_machine_type
@ r5=machinfo
.type
__lookup_machine_type, %function
__lookup_machine_type:
adr
r3, 3b
ldmia
r3, {r4, r5, r6}
sub
r3, r3, r4
@ get offset between virt&phys
//r5=__arch_info_begin的加载地址
add
r5, r5, r3
@ convert virt addresses to
add
r6, r6, r3
@ physical address space
unsigned int
nr;
unsigned int __deprecated phys_ram;
unsigned int
phys_io;
unsigned int
io_pg_offst;
const char
*name;
unsigned long
boot_params;
unsigned int
video_start;
unsigned int
video_end;
unsigned int
reserve_lp0 :1;
unsigned int
reserve_lp1 :1;
unsigned int
reserve_lp2 :1;
unsigned int
soft_reboot :1;
void
(*fixup)(struct machine_desc *,
struct tag *, char **,
struct meminfo *);
void
(*map_io)(void);
void
(*init_irq)(void);
struct sys_timer
*timer;
void
(*init_machine)(void);
};
而对于我们的SEP4020其真正的定义是在/arch/arm/mach-sep4020/4020.c中
MACHINE_START(GFD4020, "4020 board")
.phys_io
= 0x10000000,
.io_pg_offst
= ((0xe0000000) >> 18) & 0xfffc,
.boot_params
= 0x30000100,
.fixup
= fixup_gfd4020,
.map_io
= sep4020_map_io,
.init_irq
=
sep4020_init_irq,
.init_machine
= sep4020_init,
.timer
= &sep4020_timer,
MACHINE_END
看到这里,我们就不难明白下边这条指令了,struct machine_desc 中第一个就是
nr,即 architecture number
r3 = MACH_TYPE_GFD4020
――――――――――――――――――――――――――――――――――――――――――――――*/
1:
ldr
r3, [r5, #MACHINFO_TYPE]
@ get machine type ,MACHINFO_TYPE = 0
//r1是由解压缩程序/arch/arm/boot/compressed/head.S最后传过来的,或者是uboot传过来的体系结构号
teq
r3, r1
@ matches loader number?
beq
2f
@ found
add
r5, r5, #SIZEOF_MACHINE_DESC
@ next machine_desc
cmp
r5, r6
blo
1b
mov
r5, #0
@ unknown machine
2:
mov
pc, lr
movs
r8, r5
@ invalid machine (r5=0)?是不是我们的SEP4020系统结构
beq
__error_a
@ yes, error 'a'
bl
__create_page_tables
.type
__create_page_tables, %function
__create_page_tables:
// Page offset: 3GB
内核页表的偏移在/inculde/asm/memory.h
#define PAGE_OFFSET
UL(0xc0000000)
#ifndef __virt_to_phys
#define __virt_to_phys(x)
((x) - PAGE_OFFSET + PHYS_OFFSET)
#define __phys_to_virt(x)
((x) - PHYS_OFFSET + PAGE_OFFSET)
#endif
而这其中的PHYS_OFFSET则是我们需要在我们的SEP4020的定义自己的主存ram的基址的物理地址,我们是在/include/asm-arm/arch-sep4020/memory.h中定义的
#define PHYS_OFFSET
UL(0x30000000)
#define KERNEL_RAM_ADDR
(PAGE_OFFSET + TEXT_OFFSET)
@其中TEXT_OFFSET = 0x8000
//swapper_pg_dir是放启动时的临时页表的页表基址(虚地址)
.globl
swapper_pg_dir
.equ
swapper_pg_dir, KERNEL_RAM_ADDR - 0x4000
//这个宏就是根据内核ram首址(虚拟地址)计算出我们内核页表的页表基址(物理地址)
.macro
pgtbl, rd
ldr
\rd, =(__virt_to_phys(KERNEL_RAM_ADDR - 0x4000))
.endm
―――――――――――――――――――――――――――――――――――――――――――――― */
pgtbl
r4
@ page table address
//这样r4 = 内核页表的页表基址(物理地址)
mov
r0, r4
mov
r3, #0
//r6 = 内核的KERNEL_RAM_ADDR
add
r6, r0, #0x4000
//首先对16k的一级页表内容清0
1:
str
r3, [r0], #4
str
r3, [r0], #4
str
r3, [r0], #4
str
r3, [r0], #4
teq
r0, r6
bne
1b
//PROCINFO_MMUFLAGS = 8;这样 r7 = 0x00000c1e
mmuflags,一级段描述符 ,在proc-arm720.S 中定义
ldr
r7, [r10, #PROCINFO_MMUFLAGS]
@ mmuflags
//获取当前程序的段地址 = r6
mov
r6, pc, lsr #20
@ start of kernel section
//将段地址 或上mmuflags,然后赋值给r3
orr
r3, r7, r6, lsl #20
@ flags + kernel base
//开始放内核代码的段映射描述符,段表基址为r4,段表内的索引为r6<<2;
str
r3, [r4, r6, lsl #2]
@ identity mapping
//这样做是为了解决后面刚开MMU是防止pc值飞掉了,在__turn_mmu_on函数中
//#define TEXTADDR
KERNEL_RAM_ADDR即等于0xc0008000
//这里的TEXTADDR是0xc0008000段对应的代码和前面的pc对应的段地址是同一代码,这样做是为了解决后面刚开MMU是防止pc值飞掉了
add
r0, r4,
#(TEXTADDR & 0xff000000) >> 18
@ start of kernel
//把start of kernel(0xc0008000)的虚拟地址映射起来,即将它的段描述符保存到段表的相应索引处位置
str
r3, [r0, #(TEXTADDR & 0x00f00000) >> 18]!
//建立start of kernel+1MB的虚拟地址映射
add
r3, r3, #1 << 20
str
r3, [r0, #4]!
@ KERNEL + 1MB
//建立start of kernel+2MB的虚拟地址映射
add
r3, r3, #1 << 20
str
r3, [r0, #4]!
@ KERNEL + 2MB
//建立start of kernel+3MB的虚拟地址映射
add
r3, r3, #1 << 20
str
r3, [r0, #4]
@ KERNEL + 3MB
add
r0, r4, #PAGE_OFFSET >> 18
orr
r6, r7, #PHYS_OFFSET
str
r6, [r0]
mov
pc, lr
―――――――――――――从__create_page_tables返回――――――――――――――――――――――――――――――――― */
//__switch_data是一个标签(即是一个地址),这个即是r13 = __mmap_switched(函数指针)
ldr
r13, __switch_data
@ address to jump to after
@ mmu has been enabled
//将__enable_mmu(这是个与地址无关代码)赋值给lr,等会返回执行,模拟一个函数栈
adr
lr, __enable_mmu
@ return (PIC) address
add
pc, r10, #PROCINFO_INITFUNC
//转到__arm720_setup函数来执行
.type
__arm720_setup, #function
__arm720_setup:
mov
r0, #0
//写CP15的c7寄存器使cache的数据无效
mcr
p15, 0, r0, c7, c7, 0
@ invalidate caches
//使整个TLB内部的地址变换条目无效
mcr
p15, 0, r0, c8, c7, 0
@ flush TLB (v4)
//把CP15的寄存器c1传给r0
mrc
p15, 0, r0, c1, c0
@ get control register
ldr
r5, arm720_cr1_clear
bic
r0, r0, r5
ldr
r5, arm720_cr1_set
//r0 = 0x013d(mmu使能,禁用地址对齐,cache使能,写缓冲使能,小印地安序,系统保护,rom不保护)
orr
r0, r0, r5
//跳转到__enable_mmu,相当于执行函数__enable_mmu
mov
pc, lr
@ __ret (head.S)
.size
__arm720_setup, . - __arm720_setup
.type
__enable_mmu, %function
__enable_mmu:
//#define CONFIG_ALIGNMENT_TRAP 1是在/include/linux/Autoconfig.h定义
//Autoconfig.h是在make时根据Kconfig文件产生的
#ifdef CONFIG_ALIGNMENT_TRAP
orr
r0, r0, #CR_A
@CR_A=1 地址对齐
#else
bic
r0, r0, #CR_A
#endif
#ifdef CONFIG_CPU_DCACHE_DISABLE
bic
r0, r0, #CR_C
@CR_C= 2 D cache使能
#endif
#ifdef CONFIG_CPU_BPREDICT_DISABLE
bic
r0, r0, #CR_Z
@CR_Z = 11 分支预测
#endif
#ifdef CONFIG_CPU_ICACHE_DISABLE
bic
r0, r0, #CR_I
@CR_I = 12 I cache使能
#endif
//#define domain_val(dom,type)
((type) << (2*(dom)))在/include/asm-arm/Domain.h定义
//MMU中的域是一些段,大页或小页的集合,而在MMU的页表描述符中有些位表示该描述符的域,指明该存储空间所属的域号0~15
mov
r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
domain_val(DOMAIN_IO, DOMAIN_CLIENT))
//配置cp15的域访问控制寄存器
mcr
p15, 0, r5, c3, c0, 0
@ load domain access register
//配置cp15的页表基址寄存器c2,r4就是页表的首地址
mcr
p15, 0, r4, c2, c0, 0
@ load page table pointer
b
__turn_mmu_on
.align
5
.type
__turn_mmu_on, %function
__turn_mmu_on:
mov
r0, r0
//配置cp15的c1,开启MMU
mcr
p15, 0, r0, c1, c0, 0
@ write control reg
//因为 ARM 720T 是三级流水线,所以运行三条指令,让流水线充满指令
//读取id寄存器
mrc
p15, 0, r3, c0, c0, 0
@ read id reg
//这时候刚开MMU的虚拟地址和实地址是一一映射,pc就不需要跳转了
mov
r3, r3
mov
r3, r3
//跳转到__mmap_switched函数执行
mov
pc, r13
.type
__mmap_switched, %function
__mmap_switched:
//这是把__data_loc对应的地址赋值给r3
adr
r3, __switch_data + 4
ldmia
r3!, {r4, r5, r6, r7}
cmp
r4, r5
@ Copy data segment if needed
//把动态数据拷贝到全局数据区
1:
cmpne
r5, r6
ldrne
fp, [r4], #4
@fp是帧指针,即r11
strne
fp, [r5], #4
bne
1b
//把BSS区清零
mov
fp, #0
@ Clear BSS (and zero fp)
1:
cmp
r6, r7
strcc
fp, [r6],#4
@cc是无符号小于
bcc
1b
ldmia
r3, {r4, r5, r6, sp}
//将cpu id保存到r4中
str
r9, [r4]
@ Save processor ID
//将机器号保存到r5当中
str
r1, [r5]
@ Save machine type
bic
r4, r0, #CR_A
@ Clear 'A' bit
stmia
r6, {r0, r4}
@ Save control register values
//进入伟大的start_kernel函数
b
start_kernel
.type
__switch_data, %object
__switch_data:
.long
__mmap_switched
.long
__data_loc
@ r4
.long
__data_start
@ r5
.long
__bss_start
@ r6
.long
_end
@ r7
.long
processor_id
@ r4
.long
__machine_arch_type
@ r5
.long
cr_alignment
@ r6
.long
init_thread_union + THREAD_START_SP @ sp