Little Kernel代码学习笔记

fy_闷油瓶

已于 2023-08-31 20:55:40 修改

阅读量337

点赞数

分类专栏： Linux内核文章标签：学习笔记

于 2023-08-25 12:06:03 首次发布

本文链接：https://blog.csdn.net/fyadf/article/details/132490423

版权

Linux内核专栏收录该内容

1 篇文章 0 订阅

订阅专栏

虚拟地址转换为物理地址

// start.S

#define PHYS_LOAD_ADDRESS (MEMBASE + KERNEL_LOAD_OFFSET)
#define PHYS_ADDR_DELTA (KERNEL_BASE + KERNEL_LOAD_OFFSET - PHYS_LOAD_ADDRESS)
#define PHYS(x) ((x) - PHYS_ADDR_DELTA)

PHYS(x) 将x转换为物理地址

内核启动

Multiboot头部结构

// start.S

.section ".text.boot"
.code32
.global _start
_start:
    jmp real_start

.align 8

/* flags for multiboot header */
#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO | MULTIBOOT_AOUT_KLUDGE)
//MULTIBOOT_PAGE_ALIGN   0x00000001      MULTIBOOT_MEMORY_INFO   0x00000002      MULTIBOOT_AOUT_KLUDGE      0x00010000
.type multiboot_header,STT_OBJECT
multiboot_header:
    /* magic */
    .int MULTIBOOT_HEADER_MAGIC
    /* flags */
    .int MULTIBOOT_HEADER_FLAGS
    /* checksum */
    .int -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)

    /* header_addr */
    .int PHYS(multiboot_header)
    /* load_addr */
    .int PHYS(_start)
    /* load_end_addr */
    .int PHYS(__data_end)
    /* bss_end_addr */
    .int PHYS(__bss_end)
    /* entry_addr */
    .int PHYS(real_start)

刚启动时，使用32位指令集，MULTIBOOT_HEADER_FLAGS 指定启动加载程序的功能，此处设置了4K字节对齐、multiboot_info需要包含mem_*字段以及设Multiboot偏移12-28处的字段有效
图中代码在multiboot.h
multibool_info结构
图中代码在start.S
Multiboot header地址含义可以参考Multiboot技术文档3.1.3小节，Multiboot_info可参考3.3小节

启动时的寄存器状态

Multiboot协议规定，EAX = 0x2BADB002（魔数）表明操作系统是被符合Multiboot的加载程序进行加载的，此外Multiboot协议规定，EBX必须包含Multiboot_info的32位物理地址。有关机器启动时的状态可参考文档3.2小节。

real_start

// start.S

real_start:
    cmpl $MULTIBOOT_BOOTLOADER_MAGIC, %eax
    jne 0f
    movl %ebx, PHYS(_multiboot_info)

0:
    /* load our new gdt by physical pointer */
    lgdt PHYS(_gdtr_phys)

    /* load our data selectors */
    movw $DATA_SELECTOR, %ax
    movw %ax, %ds
    movw %ax, %es
    movw %ax, %fs
    movw %ax, %ss
    movw %ax, %gs
    movw %ax, %ss

    /* load initial stack pointer */
    movl $PHYS(_kstack + 4096), %esp

    /* far jump to load the CS from our GDT */
    pushl $CODE_SELECTOR
    pushl $PHYS(.Lfarjump)
    lret

在real_start开始部分，先检查EAX中的值是否等于Multiboot魔数，等于则将EBX的值加载到multiboot_info的物理地址，否则直接跳转到标号0处执行。

将全局描述符表gdt的物理地址加载到GDTR中，然后将段寄存器的值设置为DATA_SELECTOR = 0x10 = 0001 0000

段选择子

在这里插入图片描述
Requestor Privilege-Level (RPL)表示处理器正在运行的特权级别
Table Indicator (TI)表示选择哪个描述符表，TI=0使用GDT，TI=1使用LDT
Selector Index Field(SI)表示索引
因此DATA_SELECTOR = 0x10 = 0001 0000 表示CPL=0，即最高权限；使用GDT，Index为2，使用GDT中的第二个段描述符
图中代码在gdt.S
在这里插入图片描述

段寄存器设置好后，用一个4K的数组作为栈，数组末尾作为栈顶，然后将CODE_SELECTOR和.Lfarjump的物理地址压栈，再跳转到.Lfarjump处运行。
CODE_SELECTOR = 0x08 = 0000 1000 即选择GDT的第一个段描述符

初始化BSS段

//start.S

.Lfarjump:

    /* zero the bss section */
bss_setup:
    movl $PHYS(__bss_start), %edi /* starting address of the bss */
    movl $PHYS(__bss_end), %ecx   /* find the length of the bss in bytes */
    subl %edi, %ecx
    shrl $2, %ecx       /* convert to 32 bit words, since the bss is aligned anyway */
2:
    movl $0, (%edi)
    addl $4, %edi
    loop 2b

初始化BSS段，其中_bss_start， _bss_end在kernel.ld文件中，在链接阶段分配地址

页表转换设置

CR4、CR3、EFER寄存器设置

//start.S

paging_setup:
    /* Preparing 64 bit paging. We will use 2MB pages covering 1GB
     * for initial bootstrap, this page table will be 1 to 1.
     */

    /* PAE bit must be enabled  for 64 bit paging*/
    mov %cr4, %eax
    btsl $(5), %eax
    mov %eax, %cr4

    /* load the physical pointer to the top level page table */
    movl $PHYS(kernel_pml4), %eax
    mov %eax, %cr3

    /* Long Mode Enabled at this point*/
    movl $MSR_EFER ,%ecx
    rdmsr
    orl $EFER_LME,%eax
    wrmsr

将CR4位5置1，启用PAE(Physical-Address Extensions)，并将PML4的地址存储在CR3中，然后设置MSR_EFER寄存器，启用长模式。

//start.S

#define MSR_EFER 0xc0000080
#define EFER_LME 0x00000100

MSR_EFER = 0xc0000080 看似是一个宏定义，其实是EFER寄存器的地址（在AMD手册3.1.7中给出）
在这里插入图片描述

页表映射

//mmu.c

/* top level kernel page tables, initialized in start.S */
map_addr_t kernel_pml4[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE);
map_addr_t kernel_pdp[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE); /* temporary */
map_addr_t kernel_pte[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE);

/* top level pdp needed to map the -512GB..0 space */
map_addr_t kernel_pdp_high[NO_OF_PT_ENTRIES] __ALIGNED(PAGE_SIZE);

/* a big pile of page tables needed to map 64GB of memory into kernel space using 2MB pages */
map_addr_t kernel_linear_map_pdp[(64ULL*GB) / (2*MB)];

kernel_pml4、kernel_pdp、kernel_pte都是一个4K大小的数组，kernel_linear_map_pdp是一个4*64K大小的数组，在这里的作用是作为64个4K的kernel_pte

//start.S

    /* Setting the First PML4E with a PDP table reference at index 0 */
    movl $PHYS(kernel_pdp), %eax
    orl  $X86_KERNEL_PD_FLAGS, %eax
    movl %eax, PHYS(kernel_pml4)

    /* Setting the First PDPTE with a Page table reference at index 0 */
    movl $PHYS(kernel_pte), %eax
    orl  $X86_KERNEL_PD_FLAGS, %eax
    movl %eax, PHYS(kernel_pdp)

    /* point the pml4e at the second high PDP (for -2GB mapping) at index 511 */
    movl $PHYS(kernel_pdp_high),   %eax
    orl  $X86_KERNEL_PD_FLAGS, %eax
    movl %eax, PHYS(kernel_pml4 + 8*511)

    /* point the second pdp at the same low level page table */
    movl $PHYS(kernel_pte), %eax
    orl  $X86_KERNEL_PD_FLAGS, %eax
    movl %eax, PHYS(kernel_pdp_high + 8*510)

    /* map the first 1GB in this table */
    movl $PHYS(kernel_pte), %esi
    movl $0x200, %ecx               /* 512 entries */
    xor  %eax, %eax                 /* start off at address 0 */

0:
    mov  %eax, %ebx
    shll $21, %ebx
    orl  $X86_KERNEL_PD_LP_FLAGS, %ebx
    movl %ebx, (%esi)
    addl $8,%esi
    inc  %eax
    loop 0b                         /* dec ecx and loop while > 0 */

使用的是2M的页表，实际上kernel_pte换成kernel_pde会更好，但只是个名字，并不影响实际运行。映射的结果如图：
在这里插入图片描述

    /* set up a linear map of the first 64GB at 0xffffff8000000000 */
    movl $PHYS(kernel_linear_map_pdp), %esi
    movl $32768, %ecx
    xor  %eax, %eax

    /* loop across these page tables, incrementing the address by 2MB */
0:
    mov  %eax, %ebx
    shll $21, %ebx
    orl  $X86_KERNEL_PD_LP_FLAGS, %ebx    # lower word of the entry
    movl %ebx, (%esi)
    mov  %eax, %ebx
    shrl $11, %ebx      # upper word of the entry
    movl %ebx, 4(%esi)
    addl $8,%esi
    inc  %eax
    loop 0b

    /* point the high pdp at our linear mapping page tables */
    movl $PHYS(kernel_pdp_high), %esi
    movl $64, %ecx
    movl $PHYS(kernel_linear_map_pdp), %eax
    orl  $X86_KERNEL_PD_FLAGS, %eax

0:
    movl %eax, (%esi)
    add  $8, %esi
    addl $4096, %eax
    loop 0b

    /* Enabling Paging and from this point we are in 32 bit compatibility mode */
    mov %cr0,  %eax
    btsl $(31), %eax
    mov %eax,  %cr0

1G内存映射完后，继续建立64G内存的映射关系，如下图所示，最后将CR0的31位置1，启动页表。
在这里插入图片描述

初始化IDT

    /* Use a far jump to get into 64bit mode */
    pushl $CODE_64_SELECTOR
    pushl $PHYS(farjump64)
    lret

.align 8
.code64
farjump64:
    /* branch to our high address */
    mov  $highaddr, %rax
    jmp  *%rax

highaddr:
    /* load the high kernel stack */
    mov  $(_kstack + 4096), %rsp

    /* reload the gdtr */
    lgdt _gdtr

    /* set up the idt */
    call setup_idt

    /* call the main module */
    call lk_main

0:                          /* just sit around waiting for interrupts */
    hlt                     /* interrupts will unhalt the processor */
    pause
    jmp 0b                  /* so jump back to halt to conserve power */

启动分页机制后，需要重新加载堆栈地址以及GDTR的虚拟地址，然后调用setup_idt初始化IDT。setup_idt在exceptions.S文件中。_isr是中断服务程序的起始地址，一共有256个中断向量，所以循环256次。下图中，两个nop的作用是为了保证每个中断服务程序的长度相等。
在这里插入图片描述
使用objdump查看反汇编结果可以看到，两个nop占用2个字节，pushq 0占用两个字节，因为设置了对齐方式为8，所以得到的中断服务程序长度都是16B。

#define NUM_INT 0x100
#define NUM_EXC 0x14

.text

_isr:
.set i, 0
.rept NUM_INT

100:  /* unnamed label for start of isr stub */

.if i == 8 || (i >= 10 && i <= 14) || i == 17
.align 16
    nop             /* error code pushed by exception */
    nop             /* 2 nops are the same length as push byte */
    pushq $i        /* interrupt number */
    jmp interrupt_common
.align 16
.else
.align 16
    pushq $0        /* fill in error code in iframe */
    pushq $i        /* interrupt number */
    jmp interrupt_common
.align 16
.endif

.set i, i + 1
.endr

/* figure out the length of a single isr stub (usually 6 or 9 bytes) */
.set isr_stub_len, . - 100b

/* annoying, but force AS to use the same (longer) encoding of jmp for all of the stubs */
.fill 256

interrupt_common:

    /* clear the direction bit */
    cld

    /* save general purpose registers */
    pushq %r15
    pushq %r14
    pushq %r13
    pushq %r12
    pushq %r11
    pushq %r10
    pushq %r9
    pushq %r8
    pushq %rax
    pushq %rcx
    pushq %rdx
    pushq %rbx
    pushq %rbp
    pushq %rsi
    pushq %rdi

    /* pass the  iframe using rdi */
    movq %rsp, %rdi

    call x86_exception_handler

    /* restore general purpose registers */
    popq %rdi
    popq %rsi
    popq %rbp
    popq %rbx
    popq %rdx
    popq %rcx
    popq %rax
    popq %r8
    popq %r9
    popq %r10
    popq %r11
    popq %r12
    popq %r13
    popq %r14
    popq %r15

    /* drop vector number and error code*/
    addq $16, %rsp
    iretq

在将控制寄存器压栈前，硬件已经将SS、SP、RFLAGS、CS、IP压栈，将控制寄存器压栈后，调用x86_exception_handler，中断处理程序执行完后，按照相反的顺序弹出控制寄存器。最后将rsp+16，是因为在jmp interrupt_common之前将中断向量i和错误码压栈，为了让rsp指向return IP，将rsp+16。

FUNCTION(setup_idt)
    /* setup isr stub descriptors in the idt */
    mov  $_isr, %rsi
    mov  $_idt, %rdi
    movl $NUM_INT, %ecx

.Lloop:
    mov  %rsi, %rbx
    movw %bx, (%rdi)        /* offset [0:15] in IDT(n).low */
    shr  $16, %rbx
    movw %bx, 6(%rdi)       /* offset [16:31] in IDT(n).high */
    shr  $16, %rbx
    movl %ebx, 8(%rdi)      /* offset [32:63] */

    add  $isr_stub_len, %rsi    /* index the next ISR stub */
    add  $16, %rdi          /* index the next IDT entry */

    loop .Lloop

    lidt _idtr

    ret

在setup_idt中，循环256次，每次循环，将中断服务程序的地址填入IDT的门描述符中，最后使用lidt将IDT表的地址载入IDTR中，下图为长模式下门描述符格式。
在这里插入图片描述

.align 8
DATA(_idtr)
    .short _idt_end - _idt - 1  /* IDT limit */
    .quad _idt
.fill 8

.align 8
/* interrupt descriptor table (IDT) */
DATA(_idt)

.set i, 0
.rept NUM_INT
    .short 0        /* low 16 bits of ISR offset (_isr#i & 0FFFFh) */
    .short CODE_64_SELECTOR   /* selector */
    .byte  0
    .byte  0x8e     /* present, ring 0, 64-bit interrupt gate */
    .short  0       /* high 16 bits of ISR offset (_isr#i / 65536) */
    .short  0       /* ISR offset */
    .short  0       /* ISR offset */
    .short  0       /* 32bits Reserved */
    .short  0       /* 32bits Reserved */

.set i, i + 1
.endr