Redox bootloader实现分析

Redox bootloader概述

Redox bootloader用于引导Redox OS启动,支持传统BIOS以及UEFI两种启动方式。用户可以通过编译选项进行配置,这里需要注意的是,只有x86架构可以选择以传统BIOS或UEFI的方式进行启动;对于Aarch64架构,则只支持UEFI的启动方式,因此在分析代码时,需要根据处理器体系架构进行区分。

Redox bootloader的代码路径位于https://gitlab.redox-os.org/redox-os/bootloader。在分析代码前,可以先梳理下对应的代码结构。

Makefile组织

  1. 顶层Makefile
  2. 特定于体系结构的Makefile,存放于mk目录下:
    • aarch64-unknown-uefi.mk:用于编译生成Aarch64体系架构下的UEFI引导程序;
    • x86_64-unknown-uefi.mk:用于编译生成x86_64体系结构下的UEFI引导程序;
    • x86-unknown-none.mk:用于编译生成传统BIOS引导程序。

代码组织

Rust bootloader的代码大体可以分为两个部分:公共部分代码以及基于特定引导方式相关的代码。公共部分代码包括:

  • src/arch目录下代码:特定于体系结构相关的代码,包括页面映射等实现;
  • src/main.rs:包含了体系无关的通用main函数实现,BIOS与UEFI引导方式都会调用;
  • src/logger.rs:日志打印模块实现;
  • src/serial_16550.rs:串口模块实现。

BIOS引导程序

BIOS引导程序核心代码主要包含两个部分:

  • asm目录下的汇编代码:其中bootloader.asm是启动代码,通过include语句引入了stage1.asm和stage2.asm文件:
    • stage1.asm:第一启动阶段实现,即引导程序,最后会烧录到磁盘的第一个扇区中;
    • stage2.asm:第二启动阶段实现,为系统进入保护模式和长模式进行准备,并加载第三阶段的boot程序执行;
  • src/os/bios目录下的rust代码:redox-bootloader术语中的第三启动阶段,包含了基于BIOS方式启动的核心代码实现,负责加载OS内核。

UEFI引导程序

UEFI引导程序核心代码主要存放在src/os/uefi目录下。UEFI引导代码最后会编译生成EFI应用程序文件,并在系统上电时,由UEFI固件加载执行。这里查看一个Aarch64位系统下的EFI程序信息如下:

[root@localhost BOOT]# file BOOTAA64.EFI
BOOTAA64.EFI: MS-DOS executable PE32+ executable (EFI application) Aarch64 (stripped to external PDB), for MS Windows

传统BIOS启动

传统BIOS启动方式下,计算机开机上电会先加载BIOS固件执行,完成硬件自检、初始化BIOS服务等工作,然后按照约定读取存放在磁盘第一个扇区内的引导程序,并将CPU控制权移交给引导程序,从这里开始就是bootloader的工作范畴。

bootloader.asm

bootloader.asm的代码实现如下:

sectalign off

; stage 1 is sector 0, loaded at 0x7C00
%include "stage1.asm"	// 位于第一扇区的引导程序,由BIOS加载到内存地址0x7C00处,第一条执行的指令就位于这个地址处

; GPT area from sector 1 to 33, loaded at 0x7E00
times (33*512) db 0		// 跳过GPT管理结构所占用的扇区

; stage 2, loaded at 0xC000
stage2:
    %include "stage2.asm"	// 第二阶段的程序被加载到内存地址0xC000处
    align 512, db 0
stage2.end:

; the maximum size of stage2 is 4 KiB
times (4*1024)-($-stage2) db 0

; ISO compatibility, uses up space until 0x12400
%include "iso.asm"

times 3072 db 0 ; Pad to 0x13000

; stage3, loaded at 0x13000
stage3:
    %defstr STAGE3_STR %[STAGE3]	// 第三阶段程序加载到内存地址0x13000处
    incbin STAGE3_STR
    align 512, db 0
.end:

; the maximum size of the boot loader portion is 384 KiB
times (384*1024)-($-$$) db 0

第一启动阶段

第一启动阶段在stage1.asm文件中实现,代码如下:

ORG 0x7C00		// 引导程序在系统上电后,被BIOS加载到内存地址0x7C00处执行
SECTION .text
USE16

stage1: ; dl comes with disk
    ; initialize segment registers
    xor ax, ax
    mov ds, ax
    mov es, ax
    mov ss, ax

    ; initialize stack
    mov sp, 0x7C00		// 初始化堆栈

    ; initialize CS
    push ax		// ax存储cs段寄存器内容
    push word .set_cs		// 将.set_cs标签地址压入堆栈中,通过调用retf返回调用执行
    retf

.set_cs:

    ; save disk number
    mov [disk], dl

    mov si, stage_msg
    call print
    mov al, '1'
    call print_char
    call print_line

    ; read CHS gemotry
    ;  CL (bits 0-5) = maximum sector number
    ;  CL (bits 6-7) = high bits of max cylinder number
    ;  CH = low bits of maximum cylinder number
    ;  DH = maximum head number
    mov ah, 0x08
    mov dl, [disk]
    xor di, di
    int 0x13
    jc error ; carry flag set on error
    mov bl, ch
    mov bh, cl
    shr bh, 6
    mov [chs.c], bx
    shr dx, 8
    inc dx ; returns heads - 1
    mov [chs.h], dx
    and cl, 0x3f
    mov [chs.s], cl

    mov eax, (stage2 - stage1) / 512
    mov bx, stage2
    mov cx, (stage3.end - stage2) / 512
    mov dx, 0
    call load

    mov si, stage_msg
    call print
    mov al, '2'
    call print_char
    call print_line

    jmp stage2.entry		// 跳转到第二启动阶段入口开始运行

; load some sectors from disk to a buffer in memory
; buffer has to be below 1MiB
; IN
;   ax: start sector
;   bx: offset of buffer
;   cx: number of sectors (512 Bytes each)
;   dx: segment of buffer
; CLOBBER
;   ax, bx, cx, dx, si
; TODO rewrite to (eventually) move larger parts at once
; if that is done increase buffer_size_sectors in startup-common to that (max 0x80000 - startup_end)
load:
    cmp cx, 127
    jbe .good_size

    pusha
    mov cx, 127
    call load
    popa
    add eax, 127
    add dx, 127 * 512 / 16
    sub cx, 127

    jmp load
.good_size:
    mov [DAPACK.addr], eax
    mov [DAPACK.buf], bx
    mov [DAPACK.count], cx
    mov [DAPACK.seg], dx

    call print_dapack

    cmp byte [chs.s], 0
    jne .chs
    ;INT 0x13 extended read does not work on CDROM!
    mov dl, [disk]
    mov si, DAPACK
    mov ah, 0x42
    int 0x13
    jc error ; carry flag set on error
    ret

.chs:
    ; calculate CHS
    xor edx, edx
    mov eax, [DAPACK.addr]
    div dword [chs.s] ; divide by sectors
    mov ecx, edx ; move sector remainder to ecx
    xor edx, edx
    div dword [chs.h] ; divide by heads
    ; eax has cylinders, edx has heads, ecx has sectors

    ; Sector cannot be greater than 63
    inc ecx ; Sector is base 1
    cmp ecx, 63
    ja error_chs

    ; Head cannot be greater than 255
    cmp edx, 255
    ja error_chs

    ; Cylinder cannot be greater than 1023
    cmp eax, 1023
    ja error_chs

    ; Move CHS values to parameters
    mov ch, al
    shl ah, 6
    and cl, 0x3f
    or cl, ah
    shl dx, 8

    ; read from disk using CHS
    mov al, [DAPACK.count]
    mov ah, 0x02 ; disk read (CHS)
    mov bx, [DAPACK.buf]
    mov dl, [disk]
    push es ; save ES
    mov es, [DAPACK.seg]
    int 0x13
    pop es ; restore EC
    jc error ; carry flag set on error
    ret

print_dapack:
    mov bx, [DAPACK.addr + 2]
    call print_hex

    mov bx, [DAPACK.addr]
    call print_hex

    mov al, '#'
    call print_char

    mov bx, [DAPACK.count]
    call print_hex

    mov al, ' '
    call print_char

    mov bx, [DAPACK.seg]
    call print_hex

    mov al, ':'
    call print_char

    mov bx, [DAPACK.buf]
    call print_hex

    call print_line

    ret

error_chs:
    mov ah, 0

error:
    call print_line

    mov bh, 0
    mov bl, ah
    call print_hex

    mov al, ' '
    call print_char

    mov si, error_msg
    call print
    call print_line
.halt:
    cli
    hlt
    jmp .halt

%include "print.asm"

stage_msg: db "Stage ",0
error_msg: db "ERROR",0

disk: db 0

chs:
.c: dd 0
.h: dd 0
.s: dd 0

DAPACK:
        db 0x10
        db 0
.count: dw 0 ; int 13 resets this to # of blocks actually read/written
.buf:   dw 0 ; memory buffer destination address (0:7c00)
.seg:   dw 0 ; in memory page zero
.addr:  dq 0 ; put the lba to read in this spot

times 446-($-$$) db 0		// 引导程序占用第一个扇区前446字节的内容,若未占满446字节,则剩余字节全部填充为0
partitions: times 4 * 16 db 0
db 0x55
db 0xaa

第二启动阶段

第二启动阶段主体流程在stage2.asm中实现。在分析stage2.asm的流程前,可以先看下stage2.asm通过include指令引入的一系列asm文件,对各个asm实现的功能描述如下:

  • cpuid.asm:
  • gdt.asm:保护模式和长模式下的gdt表项配置;
  • protected_mode.asm:从实地址模式切换到保护模式的流程;
  • long_mode.asm:从保护模式切换到长模式的流程;
  • thunk.asm

stage2.asm入口代码主要完成CPU的相关信息检查,并通过操作A20-地址线,打开了处理器的32位线性地址支持,随后开始为进入保护模式进行准备:

stage2.entry:
    ; check for required features
    call cpuid_check	// 进行CPU特性信息检查

    ; enable A20-Line via IO-Port 92, might not work on all motherboards
    in al, 0x92	// 操作0x92端口,打开A20地址线,突破实模式下1MB寻址限制
    or al, 2
    out 0x92, al

    mov dword [protected_mode.func], stage3.entry	// protected_mode.func存储stage3.entry的指令地址
    jmp protected_mode.entry	// 跳转到protected_mode.entry标签处,准备执行从实模式切换到保护模式

从实模式切换到保护模式

从实地址模式切换到保护模式的流程代码包含在protected_mode.asm文件中:

SECTION .text
USE16

protected_mode:

.func: dd 0	// protected_mode.func,被设置为stage3.entry标签所关联的指令地址

.entry:
    ; disable interrupts
    cli		// Intel处理器手册要求在切换到保护模式之前,需要先关闭中断

    ; load protected mode GDT
    lgdt [gdtr]		// 加载GDT基地址和段限长信息,GDTR寄存器存储GDT的线性地址,不开启分页则对应于物理地址;开启分页则对应于虚拟地址

    ; set protected mode bit of cr0
    mov eax, cr0
    or eax, 1
    mov cr0, eax			// CR0寄存器的0位为PE标记,用于配置保护模式

    ; far jump to load CS with 32 bit segment
    jmp gdt.pm32_code:.inner	// 进入保护模式下,需要刷新段寄存器的内容,其中cs寄存器由jmp指令刷新

USE32	

.inner:
    ; load all the other segments with 32 bit data segments
    mov eax, gdt.pm32_data	// 刷新其它段寄存器的内容
    mov ds, eax
    mov es, eax
    mov fs, eax
    mov gs, eax
    mov ss, eax

    ; jump to specified function
    mov eax, [.func]
    jmp eax	// 跳转到stage3.entry继续执行

stage3.entry

stage3.entry负责加载第三阶段的程序执行:

stage3.entry:
   ; stage3 stack at 448 KiB (512KiB minus 64KiB disk buffer)
   mov esp, 0x70000

   ; push arguments
   mov eax, thunk.int16	// 第三阶段程序入口的参数准备
   push eax
   mov eax, thunk.int15
   push eax
   mov eax, thunk.int13
   push eax
   mov eax, thunk.int10
   push eax
   xor eax, eax
   mov al, [disk]
   push eax
   mov eax, kernel.entry	// 第三阶段程序执行完成待返回时,会执行kernel.entry
   push eax
   mov eax, [stage3 + 0x18]	// 跳转到第三阶段程序入口执行,这里第三阶段程序是ELF可执行文件,ELF文件头偏移0x18处存储了程序入口地址,后面可以看到第三阶段程序的入口是start函数
   call eax
.halt:
   cli
   hlt
   jmp .halt

redox-bootloader在实现时同时考虑了OS内核是32位系统和64位系统两种情况,在利用第三阶段程序探测到OS内核程序的信息后,会执行kernel.entry。kernel.entry会根据OS内核信息进行处理:若是32位OS内核程序,直接调用内核入口函数执行;若是64位OS内核程序,则先将处理器切换到长模式后,调用OS内核入口函数执行。

kernel:
.stack: dq 0	// 存储OS内核栈地址
.func: dq 0	// 存储OS内核入口函数地址
.args: dq 0	// 存储OS内核入参地址

.entry:
    ; page_table: usize
    mov eax, [esp + 4]	// 第一个函数参数为页全局目录地址
    mov [long_mode.page_table], eax

    ; stack: u64
    mov eax, [esp + 8]		// 第二个参数为堆栈地址
    mov [.stack], eax
    mov eax, [esp + 12]
    mov [.stack + 4], eax

    ; func: u64
    mov eax, [esp + 16]	// 第三参数存储OS内核入口地址
    mov [.func], eax
    mov eax, [esp + 20]
    mov [.func + 4], eax

    ; args: *const KernelArgs	
    mov eax, [esp + 24]	// 第四个参数存储OS内核参数地址
    mov [.args], eax

    ; long_mode: usize
    mov eax, [esp + 28]	// 第五个参数指示了OS内核是32位系统或64位系统
    test eax, eax
    jz .inner32	// 跳转到.inner32,执行32位OS内核入口函数

    mov eax, .inner64
    mov [long_mode.func], eax		// long_mode.func存储kernel.inner64标签指令的地址
    jmp long_mode.entry		// 跳转到long_mode.entry,完成保护模式到长模式的切换

从保护模式切换到长模式

Intel处理器为了增加对64位的支持,新增了IA-32e模式,也称作long-mode,即长模式,而且Intel手册要求必须在保护模式下完成向长模式的切换。长模式切换的代码在long_mode.asm中:

SECTION .text
USE32

long_mode:
.func: dq 0	// 存储kernel.entry标签指令地址
.page_table: dd 0

.entry:
    ; disable interrupts
    cli		// 模式切换前,必须关闭中断

    ; disable paging
    mov eax, cr0
    and eax, 0x7FFFFFFF
    mov cr0, eax	// Intel手册要求保护模式必须先关闭分页,才能切换到长模式

    ; enable FXSAVE/FXRSTOR, Page Global, Page Address Extension, and Page Size Extension
    mov eax, cr4
    or eax, 1 << 9 | 1 << 7 | 1 << 5 | 1 << 4
    mov cr4, eax	// 打开PAE地址扩展,实现64位线性寻址?

    ; load long mode GDT
    lgdt [gdtr]		// 加载长模式下GDT

    ; enable long mode
    mov ecx, 0xC0000080               ; Read from the EFER MSR.
    rdmsr
    or eax, 1 << 11 | 1 << 8          ; Set the Long-Mode-Enable and NXE bit.
    wrmsr	// 使能长模式

    ; set page table
    mov eax, [.page_table]
    mov cr3, eax	// 设置页表目录

    ; enabling paging and protection simultaneously
    mov eax, cr0
    or eax, 1 << 31 | 1 << 16 | 1                ;Bit 31: Paging, Bit 16: write protect kernel, Bit 0: Protected Mode
    mov cr0, eax	// 开启分页

    ; far jump to enable Long Mode and load CS with 64 bit segment
    jmp gdt.lm64_code:.inner	// 跳转到.inner标签处执行,同时也会完成cs寄存器的更新

USE64

.inner:
    ; load all the other segments with 64 bit data segments
    mov rax, gdt.lm64_data	// 重新加载所有数据段寄存器
    mov ds, rax
    mov es, rax
    mov fs, rax
    mov gs, rax
    mov ss, rax

    ; jump to specified function
    mov rax, [.func]	// 执行kernel.entry
    jmp rax

第三启动阶段

第三启动阶段的代码正式使用Rust语言进行实现,包含在src目录下,与传统BIOS启动相关的核心代码位于src/os/bios目录下。通过查看Makefile文件x86-unknown-none.mk,第三启动阶段程序最终会生成名为bootloader.elf的ELF可执行文件。在linkers目录下,有一个链接脚本文件``x86_64-unknown-none.ld`,用于指示bootloader.elf如何进行链接:

ENTRY(start)	// 第三阶段的入口函数地址为start
OUTPUT_FORMAT(elf32-i386)

SECTIONS {
    /* The start address must match bootloader.asm */
    . = 0x13000;	// 第三阶段程序最终会被加载到内存地址0x13000处,这里需要通过链接指定起始地址

    . += SIZEOF_HEADERS;	// 增加ELF文件头的偏移
    . = ALIGN(4096);		// 4k边界对齐

    .text : {
        __text_start = .;
        *(.text*)
        . = ALIGN(4096);
        __text_end = .;
    }
...
}

第三启动阶段程序入口

通过链接脚本,可以看到第三启动阶段程序的入口是名为start的函数:

src/os/bios/mod.rs

#[no_mangle]
pub unsafe extern "C" fn start(
    kernel_entry: extern "C" fn(
        page_table: usize,
        stack: u64,
        func: u64,
        args: *const KernelArgs,
        long_mode: usize,
    ) -> !,
    boot_disk: usize,
    thunk10: extern "C" fn(),
    thunk13: extern "C" fn(),
    thunk15: extern "C" fn(),
    thunk16: extern "C" fn(),
) -> ! {
    ...
    
    let mut os = OsBios {
        boot_disk,
        thunk10,
        thunk13,
        thunk15,
        thunk16,
    };

    let (heap_start, heap_size) = memory_map(os.thunk15)
        .expect("No memory for heap");

    ALLOCATOR.lock().init(heap_start as *mut u8, heap_size);	// 初始化堆分配器

    let (page_phys, func, args) = crate::main(&mut os);	// 调用体系结构无关的main函数,获取OS内核程序信息,包括页全局目录地址、内核入口地址、内核参数等

	// kernel_entry在stage2.asm中被设置为kernel.entry,这里调用kernel.entry执行
    kernel_entry(	
        page_phys,		// 页全局目录地址
        args.stack_base + args.stack_size + if crate::KERNEL_64BIT {
            crate::arch::x64::PHYS_OFFSET as u64
        } else {
            crate::arch::x32::PHYS_OFFSET as u64
        },
        func,	// OS内核入口
        &args,		// OS内核参数
        if crate::KERNEL_64BIT { 1 } else { 0 },	//	 32位或64位系统
    );
}

体系结构无关main函数

src/main.rs

fn main<
    D: Disk,
    V: Iterator<Item=OsVideoMode>
>(os: &mut dyn Os<D, V>) -> (usize, u64, KernelArgs) {
    println!("Redox OS Bootloader {} on {}", env!("CARGO_PKG_VERSION"), os.name());

    let (mut fs, password_opt) = redoxfs(os);

    print!("RedoxFS ");
    for i in 0..fs.header.uuid().len() {
        if i == 4 || i == 6 || i == 8 || i == 10 {
            print!("-");
        }

        print!("{:>02x}", fs.header.uuid()[i]);
    }
    println!(": {} MiB", fs.header.size() / MIBI as u64);
    println!();

    let mut mode_opts = Vec::new();
    for output_i in 0..os.video_outputs() {
        if output_i > 0 {
            os.clear_text();
        }
        mode_opts.push(select_mode(os, output_i));
    }

    let stack_size = 128 * KIBI;
    let stack_base = os.alloc_zeroed_page_aligned(stack_size);
    if stack_base.is_null() {
        panic!("Failed to allocate memory for stack");
    }
    
    ...

    let (kernel, kernel_entry) = {
        let kernel = load_to_memory(os, &mut fs, "boot", "kernel", Filetype::Elf);		// 加载OS内核镜像到内存中
        let (kernel_entry, kernel_64bit) = elf_entry(kernel);		// 读取ELF文件头获取OS内核入口地址,及确定是否是64位系统
        unsafe { KERNEL_64BIT = kernel_64bit; }
        (kernel, kernel_entry)
    };

    let (bootstrap_size, bootstrap_base, bootstrap_entry, initfs_offset, initfs_len) = {
        let bootstrap_slice = load_to_memory(os, &mut fs, "boot", "bootstrap", Filetype::Elf);
        let bootstrap_len = (bootstrap_slice.len()+4095)/4096*4096;
        let (bootstrap_entry, bootstrap_64bit) = elf_entry(bootstrap_slice);
        unsafe { assert_eq!(KERNEL_64BIT, bootstrap_64bit); }

        let initfs_slice = load_to_memory(os, &mut fs, "boot", "initfs", Filetype::Other);		// 加载initfs到内存中
        let initfs_len = (initfs_slice.len()+4095)/4096*4096;

        let memory = unsafe {
            let total_size = initfs_len + bootstrap_len;
            let ptr = os.alloc_zeroed_page_aligned(total_size);
            assert!(!ptr.is_null(), "failed to allocate bootstrap+initfs memory");
            core::slice::from_raw_parts_mut(ptr, total_size)
        };
        memory[..bootstrap_slice.len()].copy_from_slice(bootstrap_slice);
        memory[bootstrap_len..bootstrap_len + initfs_slice.len()].copy_from_slice(initfs_slice);

        (memory.len() as u64, memory.as_mut_ptr() as u64, bootstrap_entry, bootstrap_len, initfs_len)
    };

    let page_phys = unsafe {
        paging_create(os, kernel.as_ptr() as u64, kernel.len() as u64)		// 为OS内核镜像建立页面映射
    }.expect("Failed to set up paging");
    //TODO: properly reserve page table allocations so kernel does not re-use them

    let mut env_size = 4 * KIBI;
    let env_base = os.alloc_zeroed_page_aligned(env_size);
    if env_base.is_null() {
        panic!("Failed to allocate memory for stack");
    }

    {
        let mut w = SliceWriter {
            slice: unsafe {
                slice::from_raw_parts_mut(env_base, env_size)
            },
            i: 0,
        };
        
        ...
        
    (
        page_phys,
        kernel_entry,
        KernelArgs {
            kernel_base: kernel.as_ptr() as u64,
            kernel_size: kernel.len() as u64,
            stack_base: stack_base as u64,
            stack_size: stack_size as u64,
            env_base: env_base as u64,
            env_size: env_size as u64,
            acpi_rsdps_base: 0,
            acpi_rsdps_size: 0,
            areas_base: unsafe {
                AREAS.as_ptr() as u64
            },
            areas_size: unsafe {
                (AREAS.len() * mem::size_of::<OsMemoryEntry>()) as u64
            },
            bootstrap_base,
            bootstrap_size,
            bootstrap_entry,
        }
    )
}

加载相关系统文件

load_to_memory用于加载文件系统的相关文件到内存中,包括OS内核镜像、bootstrap、initfs等文件:

fn load_to_memory<D: Disk>(os: &mut dyn Os<D, impl Iterator<Item=OsVideoMode>>, fs: &mut redoxfs::FileSystem<D>, dirname: &str, filename: &str, filetype: Filetype) -> &'static mut [u8] {
    fs.tx(|tx| {
        let dir_node = tx.find_node(redoxfs::TreePtr::root(), dirname)
            .unwrap_or_else(|err| panic!("Failed to find {} directory: {}", dirname, err));

        let node = tx.find_node(dir_node.ptr(), filename)
            .unwrap_or_else(|err| panic!("Failed to find {} file: {}", filename, err));

        let size = node.data().size();

        print!("{}: 0/{} MiB", filename, size / MIBI as u64);

        let ptr = os.alloc_zeroed_page_aligned(size as usize);		// 分配全零页面
        if ptr.is_null() {
            panic!("Failed to allocate memory for {}", filename);
        }

        let slice = unsafe {
            slice::from_raw_parts_mut(ptr, size as usize)
        };

        let mut i = 0;
        for chunk in slice.chunks_mut(MIBI) {
            print!("\r{}: {}/{} MiB", filename, i / MIBI as u64, size / MIBI as u64);
            i += tx.read_node_inner(&node, i, chunk)
                .unwrap_or_else(|err| panic!("Failed to read `{}` file: {}", filename, err)) as u64;
        }
        println!("\r{}: {}/{} MiB", filename, i / MIBI as u64, size / MIBI as u64);

        if filetype == Filetype::Elf {
            let magic = &slice[..4];
            if magic != b"\x7FELF" {
                panic!("{} has invalid magic number {:#X?}", filename, magic);
            }
        }

        Ok(slice)
    }).unwrap_or_else(|err| panic!("RedoxFS transaction failed while loading `{}`: {}", filename, err))
}

映射内核镜像

这里以x86_64体系结构为例,说明bootloader是如何完成OS内核镜像的页面映射的。x86_64支持寻址48位的地址空间,并使用四级页表,在Intel手册描述中,依次称作:PML4、PDP、PD、PT。

src/arch/x86/x64.rs 

pub unsafe fn paging_create<
    D: Disk,
    V: Iterator<Item=OsVideoMode>
>(os: &mut dyn Os<D, V>, kernel_phys: u64, kernel_size: u64) -> Option<usize> {
    // Create PML4
    let pml4 = paging_allocate(os)?;		// 为PML4分配一个页面

    {
        // Create PDP for identity mapping
        let pdp = paging_allocate(os)?;	// 为PDP分配一个页面

        // Link first user and first kernel PML4 entry to PDP
        pml4[0] = pdp.as_ptr() as u64 | 1 << 1 | 1;
        pml4[256] = pdp.as_ptr() as u64 | 1 << 1 | 1;

        // Identity map 8 GiB using 2 MiB pages
        for pdp_i in 0..8 {		// 映射前8GiB的物理内存
            let pd = paging_allocate(os)?;
            pdp[pdp_i] = pd.as_ptr() as u64 | 1 << 1 | 1;
            for pd_i in 0..pd.len() {
                let addr = pdp_i as u64 * 0x4000_0000 + pd_i as u64 * 0x20_0000;
                pd[pd_i] = addr | 1 << 7 | 1 << 1 | 1;		// 2MB的页面,只需要配置到PD级别
            }
        }
    }

    {
        // Create PDP for kernel mapping
        let pdp = paging_allocate(os)?;		// 为映射OS内核的PDP分配一个页面

        // Link second to last PML4 entry to PDP
        pml4[510] = pdp.as_ptr() as u64 | 1 << 1 | 1;		// 填充第PML4第510个表项,起始线性地址为0xFFFFFF0000000000;

        // Map kernel_size at kernel offset
        let mut kernel_mapped = 0;
        let mut pdp_i = 0;
        while kernel_mapped < kernel_size && pdp_i < pdp.len() {
            let pd = paging_allocate(os)?;
            pdp[pdp_i] = pd.as_ptr() as u64 | 1 << 1 | 1;
            pdp_i += 1;

            let mut pd_i = 0;
            while kernel_mapped < kernel_size && pd_i < pd.len(){
                let pt = paging_allocate(os)?;
                pd[pd_i] = pt.as_ptr() as u64 | 1 << 1 | 1;
                pd_i += 1;

                let mut pt_i = 0;
                while kernel_mapped < kernel_size && pt_i < pt.len() {		// 以4KB的页面粒度,依次填充PT,完成线性地址到物理地址的映射
                    let addr = kernel_phys + kernel_mapped;
                    pt[pt_i] = addr | 1 << 1 | 1;
                    pt_i += 1;
                    kernel_mapped += PAGE_SIZE as u64;
                }
            }
        }
        assert!(kernel_mapped >= kernel_size);
    }

    Some(pml4.as_ptr() as usize)
}

UEFI启动

与传统BIOS启动方式相比,UEFI的启动步骤简化了许多,计算机上电后,会先加载UEFI固件,然后UEFI加载磁盘特定文件系统上的EFI程序,对应于上面提到的第三阶段流程,随后由这个EFI程序完成OS内核的引导。

UEFI引导执行入口

src/uefi/mod.rs

#[no_mangle]
pub extern "C" fn main() -> Status {
    let uefi = std::system_table();

    let _ = (uefi.BootServices.SetWatchdogTimer)(0, 0, 0, ptr::null());

    if let Err(err) = set_max_mode(uefi.ConsoleOut) {
        println!("Failed to set max mode: {:?}", err);
    }

    if let Err(err) = arch::main() {		// 调用特定体系结构的main函数
        panic!("App error: {:?}", err);
    }

    (uefi.RuntimeServices.ResetSystem)(ResetType::Cold, Status(0), 0, ptr::null());
}

体系结构相关的main函数

x86_64体系结构main函数

src/uefi/arch/x86_64.rs

pub fn main() -> Result<()> {
    LOGGER.init();

    let mut os = OsEfi::new();

    // Disable cursor
    let _ = (os.st.ConsoleOut.EnableCursor)(os.st.ConsoleOut, false);

    find_acpi_table_pointers(&mut os);

    let (page_phys, func, mut args) = crate::main(&mut os);	// 调用体系结构无关main函数,与传统BIOS引导实现共用该函数


    unsafe {
        args.acpi_rsdps_base = RSDPS_AREA_BASE as u64;
        args.acpi_rsdps_size = RSDPS_AREA_SIZE as u64;

        kernel_entry(		// 调用kernel_entry切换到OS内核执行
            page_phys,
            args.stack_base + args.stack_size + if crate::KERNEL_64BIT {
                crate::arch::x64::PHYS_OFFSET as u64
            } else {
                crate::arch::x32::PHYS_OFFSET as u64
            },
            func,
            &args,
        );
    }
}

调用kernel_entry切换到OS内核执行:

unsafe extern "C" fn kernel_entry(
    page_phys: usize,
    stack: u64,
    func: u64,
    args: *const KernelArgs,
) -> ! {
    // Read memory map and exit boot services
    {
        let mut memory_iter = memory_map();
        memory_iter.exit_boot_services();
        memory_iter.set_virtual_address_map(if crate::KERNEL_64BIT {
            crate::arch::x64::PHYS_OFFSET as u64
        } else {
            crate::arch::x32::PHYS_OFFSET as u64
        });
        mem::forget(memory_iter);
    }

    // Disable interrupts
    asm!("cli");

    // Enable FXSAVE/FXRSTOR, Page Global, Page Address Extension, and Page Size Extension
    let mut cr4 = controlregs::cr4();
    cr4 |= Cr4::CR4_ENABLE_SSE
        | Cr4::CR4_ENABLE_GLOBAL_PAGES
        | Cr4::CR4_ENABLE_PAE
        | Cr4::CR4_ENABLE_PSE;
    controlregs::cr4_write(cr4);

    // Enable Long mode and NX bit
    let mut efer = msr::rdmsr(msr::IA32_EFER);
    efer |= 1 << 11 | 1 << 8;
    msr::wrmsr(msr::IA32_EFER, efer);	// 开启长模式

    // Set new page map
    controlregs::cr3_write(page_phys as u64);		// 设置临时页全局目录

    // Enable paging, write protect kernel, protected mode
    let mut cr0 = controlregs::cr0();
    cr0 |= Cr0::CR0_ENABLE_PAGING
        | Cr0::CR0_WRITE_PROTECT
        | Cr0::CR0_PROTECTED_MODE;
    controlregs::cr0_write(cr0);

    // Set stack
    asm!("mov rsp, {}", in(reg) stack);

    // Call kernel entry
    let entry_fn: extern "sysv64" fn(*const KernelArgs) -> ! = mem::transmute(func);
    entry_fn(args);		// bootloader任务完成,切换到OS内核执行
}

Aarch64体系结构main函数

src/uefi/arch/aarch64.rs

pub fn main() -> Result<()> {
    LOGGER.init();

    //TODO: support this in addition to ACPI?
    // let dtb = find_dtb()?;

    let mut os = OsEfi::new();

    // Disable cursor
    let _ = (os.st.ConsoleOut.EnableCursor)(os.st.ConsoleOut, false);

    find_acpi_table_pointers(&mut os);

    let (page_phys, func, mut args) = crate::main(&mut os);		// 调用体系结构无关main函数,与传统BIOS引导实现共用该函数

    unsafe {
        args.acpi_rsdps_base = RSDPS_AREA_BASE as u64;
        args.acpi_rsdps_size = RSDPS_AREA_SIZE as u64;

        kernel_entry(		// 执行kernel_entry切换到OS内核执行
            page_phys,
            args.stack_base + args.stack_size + PHYS_OFFSET,
            func,
            &args,
        );
    }
}

执行kernel_entry切换到OS内核执行:

src/uefi/arch/aarch64.rs

unsafe extern "C" fn kernel_entry(
    page_phys: usize,
    stack: u64,
    func: u64,
    args: *const KernelArgs,
) -> ! {
    // Read memory map and exit boot services
    {
        let mut memory_iter = memory_map();
        memory_iter.exit_boot_services();
        memory_iter.set_virtual_address_map(PHYS_OFFSET);
        mem::forget(memory_iter);
    }

    // Disable interrupts
    asm!("msr daifset, #2");

    // Disable MMU
    asm!(
        "mrs {0}, sctlr_el1", // Read system control register
        "bic {0}, {0}, 1", // Clear MMU enable bit
        "msr sctlr_el1, {0}", // Write system control register
        "isb", // Instruction sync barrier
        out(reg) _,
    );

    // Set page tables
    asm!(		// 设置页表地址
        "dsb sy", // Data sync barrier
        "msr ttbr1_el1, {0}", // Set higher half page table
        "isb", // Instruction sync barrier
        "tlbi vmalle1is", // Invalidate TLB
        in(reg) page_phys,
    );

    // Set MAIR
    asm!(
        "msr mair_el1, {0}",
        in(reg) 0xff4400, // MAIR: Arrange for Device, Normal Non-Cache, Normal Write-Back access types
    );

    // Set TCR
    asm!(
        "mrs {1}, id_aa64mmfr0_el1", // Read memory model feature register
        "bfi {0}, {1}, #32, #3",
        "msr tcr_el1, {0}", // Write translaction control register
        "isb", // Instruction sync barrier
        in(reg) 0x1085100510u64, // TCR: (TxSZ, ASID_16, TG1_4K, Cache Attrs, SMP Attrs)
        out(reg) _,
    );

    // Enable MMU
    asm!(
        "mrs {2}, sctlr_el1", // Read system control register
        "bic {2}, {2}, {0}", // Clear bits
        "orr {2}, {2}, {1}", // Set bits
        "msr sctlr_el1, {2}", // Write system control register
        "isb", // Instruction sync barrier
        in(reg) 0x32802c2,  // Clear SCTLR bits: (EE, EOE, IESB, WXN, UMA, ITD, THEE, A)
        in(reg) 0x3485d13d, // Set SCTLR bits: (LSMAOE, nTLSMD, UCI, SPAN, nTWW, nTWI, UCT, DZE, I, SED, SA0, SA, C, M, CP15BEN)
        out(reg) _,
    );

    // Set stack
    asm!("mov sp, {}", in(reg) stack);

    // Call kernel entry
    let entry_fn: extern "C" fn(*const KernelArgs) -> ! = mem::transmute(func);
    entry_fn(args);		// bootloader任务完成,切换到OS内核执行
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值