这个题目取得好,好像底层离我们越来越远了,因为内核在一步一步脱壳,很快漂亮的形状就会展现在我们眼前,在这之前,我们得屏气凝神,静静地等待这一时刻的到来。进入真实内核的第一个文件是arch/x86/kernel/head_32.S。
/*
* 32-bit kernel entrypoint; only used by the boot CPU. On entry,
* %esi points to the real-mode code as a 32-bit pointer.
* CS and DS must be 4 GB flat segments, but we don't depend on
* any particular GDT layout, because we load our own as soon as we
* can.
*/
__HEAD
ENTRY(startup_32)
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
testb $(1<<6), BP_loadflags(%esi) # 这个标志见过,询问是否重新加载一遍段寄存器
jnz 2f
/*
* Set segments to known values.
*/
lgdt pa(boot_gdt_descr) # boot_gdt_descr在下面数据区中有设置,记载GDT表首地址
movl $(__BOOT_DS),%eax
movl %eax,%ds
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
2:
/*
* Clear BSS first so that there are no surprises...
*/
# 将bss段清0
cld /*di,si ++*/
xorl %eax,%eax
movl $pa(__bss_start),%edi
movl $pa(__bss_stop),%ecx
subl %edi,%ecx
shrl $2,%ecx
rep ; stosl
/*
* Copy bootup parameters out of the way.
* Note: %esi still has the pointer to the real-mode data.
* With the kexec as boot loader, parameter segment might be loaded beyond
* kernel image and might not even be addressable by early boot page tables.
* (kexec on panic case). Hence copy out the parameters before initializing
* page tables.
*/
# 拷贝实模式中的boot_params结构体
movl $pa(boot_params),%edi
movl $(PARAM_SIZE/4),%ecx /*PARAM_SIZE ==struct boot_params*/
cld
rep
movsl /*ds:esi copy to es:edi*/
# 拷贝命令行参数到boot_command_line
# 得到命令行参数地址
movl pa(boot_params) + NEW_CL_POINTER,%esi /*boot_params->hdr.cmd_line_ptr*/
andl %esi,%esi /*test if point is null*/ # 检查指针是否为空
jz 1f # No comand line
movl $pa(boot_command_line),%edi /*in the init/main.c*/
movl $(COMMAND_LINE_SIZE/4),%ecx
rep
movsl
1:
# 不用管
#ifdef CONFIG_OLPC_OPENFIRMWARE /*firmware*/
/* save OFW's pgdir table for later use when calling into OFW */
movl %cr3, %eax
movl %eax, pa(olpc_ofw_pgd)
#endif
# 虚拟环境,直接飘过~
#ifdef CONFIG_PARAVIRT /*virtual environment*/
/* This is can only trip for a broken bootloader... */
cmpw $0x207, pa(boot_params + BP_version)
jb default_entry
/* Paravirt-compatible boot parameters. Look to see what architecture
we're booting under. */
movl pa(boot_params + BP_hardware_subarch), %eax
cmpl $num_subarch_entries, %eax
jae bad_subarch /*if eax >= num_subarch_entries*/
movl pa(subarch_entries)(,%eax,4), %eax /*eax=subarch_entries+eax*4*/
subl $__PAGE_OFFSET, %eax
jmp *%eax
bad_subarch:
WEAK(lguest_entry)
WEAK(xen_entry)
/* Unknown implementation; there's really
nothing we can do at this point. */
ud2a
__INITDATA
subarch_entries:
.long default_entry /* normal x86/PC */ /*if hardware_subarch=0*/
.long lguest_entry /* lguest hypervisor */ /*0x00000001*/
.long xen_entry /* Xen hypervisor */ /*0x00000002*/
.long default_entry /* Moorestown MID */
num_subarch_entries = (. - subarch_entries) / 4
.previous
#endif /* CONFIG_PARAVIRT */
/*
* Initialize page tables. This creates a PDE and a set of page
* tables, which are located immediately beyond __brk_base. The variable
* _brk_end is set up to point to the first "safe" location.
* Mappings are created both at virtual address 0 (identity mapping)
* and PAGE_OFFSET for up to _end.
*
* Note that the stack is not yet set up!
*/
# 这段非常关键,之前的保护模式是没有分页功能的,这段就是设置全局页目录和页表项
# 然后开启分页机制
default_entry:
# 如果启用了PAE,也就是物理地址扩展,变成64G,这里不用考虑
#ifdef CONFIG_X86_PAE /*physical address extend , 64G */
/*
* In PAE mode swapper_pg_dir is statically defined to contain enough
* entries to cover the VMSPLIT option (that is the top 1, 2 or 3
* entries). The identity mapping is handled by pointing two PGD
* entries to the first kernel PMD.
*
* Note the upper half of each PMD or PTE are always zero at
* this stage.
*/
#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
/*__PAGE_OFFSET = 0xc0000000*/
xorl %ebx,%ebx /* %ebx is kept at zero */
movl $pa(__brk_base), %edi
movl $pa(swapper_pg_pmd), %edx
movl $PTE_IDENT_ATTR, %eax
10:
leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
movl %ecx,(%edx) /* Store PMD entry */
/* Upper half already zero */
addl $8,%edx
movl $512,%ecx
11:
stosl
xchgl %eax,%ebx
stosl
xchgl %eax,%ebx
addl $0x1000,%eax
loop 11b
/*
* End condition: we must map up to the end + MAPPING_BEYOND_END.
*/
movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
1:
addl $__PAGE_OFFSET, %edi
movl %edi, pa(_brk_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
/* Do early initialization of the fixmap area */
movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
#else /* Not PAE */
# 如果没有启用PAE,那么线性地址为4G,一般都属于这里
/*4G physical address*/
# 这个很重要,内核的线性地址偏移
# 我们知道在liunx中,进程空间为0-3G,高1G是内核空间使用的
# 也就是3G之后
# 这里的这个偏移是相对于全局页目录的,全局页目录大小为4k,每项大小为4b
# 每项可以表示4M的线性范围
# 因此这个偏移3k刚好就是3G,3k/4*4M=3G。
page_pde_offset = (__PAGE_OFFSET >> 20); /*0xc00= 3k , PGD(3k)=3G*/
# 这里的这个__brk_base在文件vmlinux.lds.S中,链接后,表示页表的首地址
. = ALIGN(PAGE_SIZE);
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
__brk_base = .;
. += 64 * 1024; /* 64k alignment slop space */
*(.brk_reservation) /* areas brk users have reserved */
__brk_limit = .;
}
_end = .;
# 可以看到总大小为64k,也就是说可以设置线性地址64M
movl $pa(__brk_base), %edi /*__brk_base==PTD*/
movl $pa(swapper_pg_dir), %edx # 全局页目录地址
# 这个宏只是个属性而已
# 不管是全局页目录项还是页表项都是4B
# 4B中最后12位来表示属性
movl $PTE_IDENT_ATTR, %eax /*PTE_IDENT_ATTR= 0x00000067*/
10:
# 外循环是填充全局页目录项的
# edi为页表首地址
leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
# 将计算出的页目录项填充到全局页目录中
# edx表示全局页目录地址
movl %ecx,(%edx) /* Store identity PDE entry */
movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
# 填充下一个全局页目录项
addl $4,%edx
movl $1024, %ecx
11: /*fill 4k PTD*/
# 内循环填充页表
# edi为页表地址,eax为页表项
stosl /*es:edi= eax,edi++*/
addl $0x1000,%eax # 注意理解这里的0x1000,后12位为属性,前20位为页地址,4k对齐
loop 11b
/*
* End condition: we must map up to the end + MAPPING_BEYOND_END.
*/ /*_end-__brk_base = 64k*/
# 如果64k页表还没用完,那么继续,eax表示当前页表地址(当然包括了属性)
# 都忽略属性标志,则eax表示在__brk_base中的偏移/4
# 这里没有看懂MAPPING_BEYOND_END
# 这个在上面定义的,我计算了一下为0x0c400000
# 不过这里好像有些不对,不过看程序没有影响,就是判断是否还有页表可以设置
movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b /*if eax < ebp jump*/
# 将页表末尾的线性地址存入_brk_end变量中
addl $__PAGE_OFFSET, %edi
movl %edi, pa(_brk_end) /*_brk_end = _end=edi+__PAGE_OFFSET*/
# 这次映射了多少物理页面都在eax中,存好
shrl $12, %eax
movl %eax, pa(max_pfn_mapped) /*previous eax pages(4k) in physical address*/
/* Do early initialization of the fixmap area */
# 这里还加了一个全局页目录项,页表地址为swapper_pg_fixmap
# 放入了全局页目录项的最后一项
movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(swapper_pg_dir+0xffc) /*a 4k PTD in the end of PGT*/
#endif
jmp 3f
/*
* Non-boot CPU entry point; entered from trampoline.S
* We can't lgdt here, because lgdt itself uses a data segment, but
* we know the trampoline has already loaded the boot_gdt for us.
*
* If cpu hotplug is not supported then this code can go in init section
* which will be freed later
*/
__CPUINIT
# 多处理器平台
#ifdef CONFIG_SMP
ENTRY(startup_32_smp)
cld
movl $(__BOOT_DS),%eax
movl %eax,%ds
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
#endif /* CONFIG_SMP */
3:
/*
* New page tables may be in 4Mbyte page mode and may
* be using the global pages.
*
* NOTE! If we are on a 486 we may have no cr4 at all!
* So we do not try to touch it unless we really have
* some bits in it to set. This won't work if the BSP
* implements cr4 but this AP does not -- very unlikely
* but be warned! The same applies to the pse feature
* if not equally supported. --macro
*
* NOTE! We have to correct for the fact that we're
* not yet offset PAGE_OFFSET..
*/
# 这里可以设置页面大小为4M,cr4的PSE如果置位的话
# 可以完全飘过~ 飘到下标6f
#define cr4_bits pa(mmu_cr4_features) /*the bit in PSE on the cr4*/
movl cr4_bits,%edx
andl %edx,%edx /*if set the bit then start 4M page*/
jz 6f /*assume don't set the PSE bit*/
movl %cr4,%eax # Turn on paging options (PSE,PAE,..)
orl %edx,%eax
movl %eax,%cr4
testb $X86_CR4_PAE, %al # check if PAE is enabled
jz 6f
/* Check if extended functions are implemented */
movl $0x80000000, %eax
cpuid
/* Value must be in the range 0x80000001 to 0x8000ffff */
subl $0x80000001, %eax
cmpl $(0x8000ffff-0x80000001), %eax
ja 6f
mov $0x80000001, %eax
cpuid
/* Execute Disable bit supported? */
btl $(X86_FEATURE_NX & 31), %edx
jnc 6f
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
rdmsr
btsl $_EFER_NX, %eax
/* Make changes effective */
wrmsr
6:
/*
* Enable paging
*/
# 从这里开始我们将完全进入保护模式
# initial_page_table就是全局页目录的首地址
# 放入cr3
movl pa(initial_page_table), %eax
movl %eax,%cr3 /* set the page table pointer.. */
# 将cr0的分页机制打开
movl %cr0,%eax
orl $X86_CR0_PG,%eax /*X86_CR0_PG=0x80000000*/
movl %eax,%cr0 /* ..and set paging (PG) bit */
# 这里,我们调皮的跳一下,完全进入保护模式,呵呵!
ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
1: /*Now completely go to protected mode...*/
/* Set up the stack pointer */
# 设置好栈
lss stack_start,%esp
/*
* Initialize eflags. Some BIOS's leave bits like NT set. This would
* confuse the debugger if this code is traced.
* XXX - best to initialize before switching to protected mode.
*/
# 将标志寄存器清0
pushl $0
popfl
# 多处理器,直接飘过~
#ifdef CONFIG_SMP
cmpb $0, ready
jz 1f /* Initial CPU cleans BSS */
jmp checkCPUtype
1:
#endif /* CONFIG_SMP */
/*
* start system 32-bit setup. We need to re-do some of the things done
* in 16-bit mode for the "real" operations.
*/
# 设置IDT,中断向量表,这个非常重要
# 不过现在还没有什么的
call setup_idt
# 检查一下CPU类型
checkCPUtype:
movl $-1,X86_CPUID # -1 for no CPUID initially
/* check if it is 486 or 386. */
/*
* XXX - this does a lot of unnecessary setup. Alignment checks don't
* apply at our cpl of 0 and the stack ought to be aligned already, and
* we don't need to preserve eflags.
*/
# 这段可以完全跳过,直接进入is386下标继续看
movb $3,X86 # at least 386
pushfl # push EFLAGS
popl %eax # get EFLAGS
movl %eax,%ecx # save original EFLAGS
xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
pushl %eax # copy to EFLAGS
popfl # set EFLAGS
pushfl # get new EFLAGS
popl %eax # put it in eax
xorl %ecx,%eax # change in flags
pushl %ecx # restore original EFLAGS
popfl
testl $0x40000,%eax # check if AC bit changed
je is386
movb $4,X86 # at least 486
testl $0x200000,%eax # check if ID bit changed
je is486
/* get vendor info */
xorl %eax,%eax # call CPUID with 0 -> return vendor ID
cpuid
movl %eax,X86_CPUID # save CPUID level
movl %ebx,X86_VENDOR_ID # lo 4 chars
movl %edx,X86_VENDOR_ID+4 # next 4 chars
movl %ecx,X86_VENDOR_ID+8 # last 4 chars
orl %eax,%eax # do we have processor info as well?
je is486
movl $1,%eax # Use the CPUID instruction to get CPU type
cpuid
movb %al,%cl # save reg for future use
andb $0x0f,%ah # mask processor family
movb %ah,X86
andb $0xf0,%al # mask model
shrb $4,%al
movb %al,X86_MODEL
andb $0x0f,%cl # mask mask revision
movb %cl,X86_MASK
movl %edx,X86_CAPABILITY
is486: movl $0x50022,%ecx # set AM, WP, NE and MP
jmp 2f
# 在往下看,无非就是加载了GDT、IDT
is386: movl $2,%ecx # set MP
2: movl %cr0,%eax
andl $0x80000011,%eax # Save PG,PE,ET
orl %ecx,%eax
movl %eax,%cr0
call check_x87
lgdt early_gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt.
movl $(__USER_DS),%eax # DS/ES contains default USER segment
movl %eax,%ds
movl %eax,%es
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
# 栈保护,飘过~
#ifdef CONFIG_CC_STACKPROTECTOR
/*
* The linker can't handle this by relocation. Manually set
* base address in stack canary segment descriptor.
*/
cmpb $0,ready
jne 1f
movl $gdt_page,%eax
movl $stack_canary,%ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
1:
#endif
# 飘到这里了~
movl $(__KERNEL_STACK_CANARY),%eax
movl %eax,%gs
xorl %eax,%eax # Clear LDT
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
pushl $0 # fake return address for unwinder
# 又是多处理器,继续飘~
#ifdef CONFIG_SMP
movb ready, %cl
movb $1, ready
cmpb $0,%cl # the first CPU calls start_kernel
je 1f
movl (stack_start), %esp
1:
#endif /* CONFIG_SMP */
# 关键点了,即将进入C代码了,哈哈!
jmp *(initial_code)
/*
* We depend on ET to be correct. This checks for 287/387.
*/
# is386里的调用,居然还要检查一下x87
check_x87:
movb $0,X86_HARD_MATH
clts
fninit
fstsw %ax
cmpb $0,%al
je 1f
movl %cr0,%eax /* no coprocessor: have to set bits */
xorl $4,%eax /* set EM */
movl %eax,%cr0
ret
ALIGN
1: movb $1,X86_HARD_MATH
.byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
ret
/*
* setup_idt
*
* sets up a idt with 256 entries pointing to
* ignore_int, interrupt gates. It doesn't actually load
* idt - that can be done only after paging has been enabled
* and the kernel moved to PAGE_OFFSET. Interrupts
* are enabled elsewhere, when we can be relatively
* sure everything is ok.
*
* Warning: %esi is live across this function.
*/
# 这就是设置IDT表了
setup_idt:
# 默认中断处理程序
lea ignore_int,%edx
# 注意得用代码段选择子,而且肯定是内核代码段
movl $(__KERNEL_CS << 16),%eax /*set selector in the GDT*/
movw %dx,%ax /* selector = 0x0010 = cs */
movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
lea idt_table,%edi # idt_table为idt表的首地址
mov $256,%ecx # 总共设置256项
rp_sidt:
movl %eax,(%edi)
movl %edx,4(%edi)
addl $8,%edi
dec %ecx
jne rp_sidt
linux内核学习(13)我们开始慢慢地往上爬...(上)
最新推荐文章于 2022-11-06 16:58:25 发布