Rust编译器研究+.NET9 PreView7

点击上方蓝字 江湖评谈设为关注/星标

outside_default.png

前言

看下Rust Linux编译+glibc部分,以及.NET9 Pre7的一个更新,

.NET9 PreView7

值类型一般的分配在栈上,但是有时候不得不把值类型封装成对象,进行一些操作。比如:

static bool Compare(object? x, object? y)
{
    if ((x == null) || (y == null))
    {
        return x == y;
    }


    return x.Equals(y);
}


public static int Main()
{
    bool result = Compare(3, 4);
    return result ? 0 : 100;
}

3,4是值类型,进行传参的时候构建了堆obj且分配了堆内存。但是实际上就这段代码而言,只需要返回100即可。

mov      eax, 100
ret

而为优化之前

push     rbx
sub      rsp, 32
mov      rcx, 0x7FFB9F8074D0      ; System.Int32
call     CORINFO_HELP_NEWSFAST
mov      rbx, rax
mov      dword ptr [rbx+0x08], 3
mov      rcx, 0x7FFB9F8074D0      ; System.Int32
call     CORINFO_HELP_NEWSFAST
mov      dword ptr [rax+0x08], 4
add      rbx, 8
mov      ecx, dword ptr [rbx]
cmp      ecx, dword ptr [rax+0x08]
sete     al
movzx    rax, al
xor      ecx, ecx
mov      edx, 100
test     eax, eax
mov      eax, edx
cmovne   eax, ecx
add      rsp, 32
pop      rbx
ret


Rust详情

Rust的可执行文件被编译器链接之后,进行的执行。它第一步是通过内核态的缺页异常(exc_page_fault)切换,来调用Glibc的_start。然后通过_start调用Rust的main入口。最后启动了整个Rust语言的编译。

编译这块可以大致分为两步,其一对于Rust语言的语法进行加载,分析,变形,这期间形成BB(Basic Block)块,IR中间表象,进行了一定程度的优化之后,通过LLVM后端编译成目标机器码进行运行。这一点与.NET非常相似(当然细节上差异极大,这点是自然的,因为毕竟两种不同的体系),如果有关注本公众号的朋友,可以看下之前文章。唯一不同点,微软的后端机器码生成是JIT来做的,而Rust是LLVM来做的。

下面实际看下,_start作为用户态的入口调用了Glibc:

(lldb) b main
(lldb) bt
* thread #1, name = 'rustfirstproj', stop reason = breakpoint 1.1
  * frame #0: 0x000055555555b850 rustfirstproj`main
    frame #1: 0x00007ffff7c29d90 libc.so.6`__libc_start_call_main(main=(rustfirstproj`main), argc=1, argv=0x00007fffffffdf08) at libc_start_call_main.h:58:16
    frame #2: 0x00007ffff7c29e40 libc.so.6`__libc_start_main_impl(main=(rustfirstproj`main), argc=1, argv=0x00007fffffffdf08, init=0x00007ffff7ffd040, fini=<unavailable>, rtld_fini=<unavailable>, stack_end=0x00007fffffffdef8) at libc-start.c:392:3
    frame #3: 0x000055555555b635 rustfirstproj`_start + 37

Glibc并没有直接到Rust-main的入口,而是调用了lang_start:‍

(lldb) di
rustfirstproj`main:
->  0x55555555b850 <+0>:  push   rax
    0x55555555b851 <+1>:  mov    rdx, rsi
    0x55555555b854 <+4>:  movsxd rsi, edi
    0x55555555b857 <+7>:  lea    rdi, [rip - 0x3e]         ; rustfirstproj::main::h2d069b53148117c9
    0x55555555b85e <+14>: xor    ecx, ecx
    0x55555555b860 <+16>: call   0x55555555b710            ; std::rt::lang_start::hbf501259140f0729
    0x55555555b865 <+21>: pop    rcx
    0x55555555b866 <+22>: ret

lang_start源码(调用了lang_start_internal)

//rust-lang/rust/blob/1.80.0/library/std/src/rt.rs#L158
#[cfg(not(any(test, doctest)))]
#[lang = "start"]
fn lang_start<T: crate::process::Termination + 'static>(
    main: fn() -> T,
    argc: isize,
    argv: *const *const u8,
    sigpipe: u8,
) -> isize {
    let Ok(v) = lang_start_internal(
        &move || crate::sys_common::backtrace::__rust_begin_short_backtrace(main).report().to_i32(),
        argc,
        argv,
        sigpipe,
    );
    v
}

lang_start_internal(其调用了rust-main入口,可以通过cargo建立的项目分析,单文件不行)

#[cfg(not(test))]
fn lang_start_internal(
    main: &(dyn Fn() -> i32 + Sync + crate::panic::RefUnwindSafe),
    argc: isize,
    argv: *const *const u8,
    sigpipe: u8,
) -> Result<isize, !> {
    use crate::{mem, panic};
    let rt_abort = move |e| {
        mem::forget(e);
        rtabort!("initialization or cleanup bug");
    };
    // Guard against the code called by this function from unwinding outside of the Rust-controlled
    // code, which is UB. This is a requirement imposed by a combination of how the
    // `#[lang="start"]` attribute is implemented as well as by the implementation of the panicking
    // mechanism itself.
    //
    // There are a couple of instances where unwinding can begin. First is inside of the
    // `rt::init`, `rt::cleanup` and similar functions controlled by bstd. In those instances a
    // panic is a std implementation bug. A quite likely one too, as there isn't any way to
    // prevent std from accidentally introducing a panic to these functions. Another is from
    // user code from `main` or, more nefariously, as described in e.g. issue #86030.
    // SAFETY: Only called once during runtime initialization.
    panic::catch_unwind(move || unsafe { init(argc, argv, sigpipe) }).map_err(rt_abort)?;
    let ret_code = panic::catch_unwind(move || panic::catch_unwind(main).unwrap_or(101) as isize)
        .map_err(move |e| {
            mem::forget(e);
            rtabort!("drop of the panic payload panicked");
        });
    panic::catch_unwind(cleanup).map_err(rt_abort)?;

这里面继续跟踪下去,即来到了main入口

(lldb) bt
* thread #1, name = 'hello-rust', stop reason = breakpoint 1.1
  * frame #0: 0x000055555555b7b0 hello-rust`hello_rust::main::h90afc128a8411154 at main.rs:1
    frame #1: 0x000055555555b76b hello-rust`core::ops::function::FnOnce::call_once::h66ec4e6a79d6f1df((null)=(hello-rust`hello_rust::main::h90afc128a8411154 at main.rs:1), (null)=<unavailable>) at function.rs:250:5
    frame #2: 0x000055555555b88e hello-rust`std::sys_common::backtrace::__rust_begin_short_backtrace::h70d836f5ed7ef6e7(f=(hello-rust`hello_rust::main::h90afc128a8411154 at main.rs:1)) at backtrace.rs:155:18
    frame #3: 0x000055555555b861 hello-rust`std::rt::lang_start::_$u7b$$u7b$closure$u7d$$u7d$::h2a770df763246f35 at rt.rs:159:18
    frame #4: 0x000055555557110d hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] core::ops::function::impls::_$LT$impl$u20$core..ops..function..FnOnce$LT$A$GT$$u20$for$u20$$RF$F$GT$::call_once::hb84c63630a35bb05 at function.rs:284:13
    frame #5: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panicking::try::do_call::h8d62108d97b3e028 at panicking.rs:559:40
    frame #6: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panicking::try::h0c0b9a214b9691f1 at panicking.rs:523:19
    frame #7: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panic::catch_unwind::ha8912f28da143edb at panic.rs:149:14
    frame #8: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::rt::lang_start_internal::_$u7b$$u7b$closure$u7d$$u7d$::h2baf77487fc7f90d at rt.rs:141:48
    frame #9: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panicking::try::do_call::h36fdc82521d3343f at panicking.rs:559:40
    frame #10: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panicking::try::h58aa1415c41e30ec at panicking.rs:523:19
    frame #11: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 [inlined] std::panic::catch_unwind::h3c4b5a8c3b1c4acf at panic.rs:149:14
    frame #12: 0x000055555557110a hello-rust`std::rt::lang_start_internal::h63a185b0ddd212e9 at rt.rs:141:20
    frame #13: 0x000055555555b83a hello-rust`std::rt::lang_start::h1bde715c2df689f0(main=(hello-rust`hello_rust::main::h90afc128a8411154 at main.rs:1), argc=1, argv=0x00007fffffffde98, sigpipe='\0') at rt.rs:158:17
    frame #14: 0x000055555555b7fe hello-rust`main + 30
    frame #15: 0x00007ffff7c29d90 libc.so.6`__libc_start_call_main(main=(hello-rust`main), argc=1, argv=0x00007fffffffde98) at libc_start_call_main.h:58:16
    frame #16: 0x00007ffff7c29e40 libc.so.6`__libc_start_main_impl(main=(hello-rust`main), argc=1, argv=0x00007fffffffde98, init=0x00007ffff7ffd040, fini=<unavailable>, rtld_fini=<unavailable>, stack_end=0x00007fffffffde88) at libc-start.c:392:3
    frame #17: 0x000055555555b635 hello-rust`_start + 37

这里的内核态缺页异常切换到用户态的Glibc入口_start,这里似乎有点问题,猜测要通过busybox来构建。

outside_default.png

但Go是没有问题的

outside_default.png

暂且按下不表。姑且认定它是

DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF,  exc_page_fault);


Go/.NET

Go是不经过Glibc的,它自己通过一套非常底层的汇编库取代了Glibc来进行Go-main的调用。.NET是经过Glibc的,.NET通过Glibc启动了CLR的入口,然后构建MSIL,JIT IR,编译等等,最后才是执行托管的C#代码。Rust经过Glibc,进行了IR构建,调用了LLVM。与.NET非常相似。

往期精彩回顾

Golang入口彻底研究+CMakelist(Clang)+单文件

从.NET9到Rust

从.NET9看Golang

outside_default.png

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值