C/C++、C#、F#、Go AMD x86-64 编译器内部实现乘法运算

94 篇文章 2 订阅

我们知道任何编译器语言都不是直接编译 “目标CPU平台汇编机器字节码”(汇编),哪怕是:C/C++ 编译器也是相同的,需要预编译为ASM汇编文本源码后,提交输入ASM编译器进行静态编译,VC++ CL内集MASM编译器。

而本文的获取的不同语言执行汇编代码就是这个步骤的由编译器生成的源码。

原型表达式:C/C++

int mul(int x, int y) { return x * y; }

原型表达式:Go

package main

func mul(x int, y int) int {
    return x * y
}

func main() {}

原型表达式:C#

class Program
{
    static int mul(int x, int y) => x * y;
}

原型表达式:F#

module Program

let mul(x, y) = x * y

C/C++ on x86-64 clang 6.0.0

mul(int, int):                               # @mul(int, int)
        imul    edi, esi
        mov     eax, edi
        ret

C/C++ on MSVC V19.10 WINE(VC++ 2019, ##VC++ 2022 version greater !19.29.30140.0)

x$ = 8
y$ = 16
int mul(int,int) PROC                                  ; mul
        imul    ecx, edx
        mov     eax, ecx
        ret     0
int mul(int,int) ENDP                                  ; mul

C# .NET 6.0(dotnet core)

Program:.ctor():this:
       ret      

Program:mul(int,int):int:
       mov      eax, edi
       imul     eax, esi
       ret      

C# .NET Framework 4.0 JIT (Intel x86-32)DEBUG【会有无效CPU指令】

static int mul(int x, int y) => x * y;

## 完整函数实现(等价于上面其它语言ASM被编译为最终机器代码)
0516B040 55                   push        ebp  
0516B041 8B EC                mov         ebp,esp  // 部署函数堆栈
0516B043 57                   push        edi  
0516B044 56                   push        esi  
0516B045 53                   push        ebx  
0516B046 83 EC 34             sub         esp,34h  // 扩大52字节计算堆栈
0516B049 33 C0                xor         eax,eax  // 置空(位运算同值异或)

## 复制计算堆栈的值(X,Y)两个变量到函数局部变量计算堆栈上及函数计算堆栈的初始化
0516B04B 89 45 F0             mov         dword ptr [ebp-10h],eax  
0516B04E 89 45 E4             mov         dword ptr [ebp-1Ch],eax  
0516B051 89 4D C4             mov         dword ptr [ebp-3Ch],ecx  
0516B054 89 55 C0             mov         dword ptr [ebp-40h],edx  
0516B057 83 3D F0 42 E8 00 00 cmp         dword ptr ds:[0E842F0h],0  
0516B05E 74 05                je          Ppp.Windows.PppApplication+Program.mul(Int32, Int32)+025h (0516B065h)  
0516B060 E8 0B 2C EB 6D       call        7301DC70  

## 该C#函数做工乘法运算执行汇编的指令(很清晰);
0516B065 8B 45 C0             mov         eax,dword ptr [ebp-40h]  
0516B068 0F AF 45 C4          imul        eax,dword ptr [ebp-3Ch]  

## 平衡函数堆栈并返回,注:EAX累加寄存器在X86汇编中常用于代表返回值
0516B06C 8D 65 F4             lea         esp,[ebp-0Ch]  
0516B06F 5B                   pop         ebx  
0516B070 5E                   pop         esi  
0516B071 5F                   pop         edi  
0516B072 5D                   pop         ebp  
0516B073 C3                   ret         ## 等价:RETN 0(就是上面未编译为机器汇编的源文本形式的:ret 0)

F# on dotNET Native AOT

Program:mul(int,int):int:
       mov      eax, edi
       imul     eax, esi
       ret      

Golang on x86 gccgo 12.2.20(打开编译器最大代码编译优化级别)

剔除其它代码至少需要执行以下的源汇编指令,这就只是做个 X, Y 简单的乘法运算而已.....

main.mul:
        cmp     rsp, QWORD PTR fs:112
        jb      .L125
.L124:
        mov     rax, rdi
        imul    rax, rsi
        ret
.L125:
        xor     r10d, r10d
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L124

编译器生成完整汇编源代码:

main.struct_4runtime_0gList_cruntime_0n_bint32_5..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L6
.L4:
        mov     rdx, QWORD PTR [rsi]
        xor     eax, eax
        cmp     QWORD PTR [rdi], rdx
        jne     .L1
        mov     eax, DWORD PTR [rsi+8]
        cmp     DWORD PTR [rdi+8], eax
        sete    al
.L1:
        ret
.L6:
        xor     r10d, r10d
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L4
main._661_7struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L16
.L15:
        xor     edx, edx
.L9:
        mov     r9, QWORD PTR [rdi+8+rdx]
        mov     r8, QWORD PTR [rdi+16+rdx]
        mov     rax, QWORD PTR [rsi+8+rdx]
        mov     rcx, QWORD PTR [rsi+16+rdx]
        mov     r10d, DWORD PTR [rsi+rdx]
        cmp     DWORD PTR [rdi+rdx], r10d
        je      .L17
        xor     eax, eax
.L7:
        ret
.L17:
        cmp     r9, rax
        sete    al
        cmp     r8, rcx
        sete    cl
        and     al, cl
        je      .L7
        add     rdx, 24
        cmp     rdx, 1464
        jne     .L9
        ret
.L16:
        xor     r10d, r10d
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L15
main.struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L23
.L22:
        mov     edx, DWORD PTR [rsi]
        xor     eax, eax
        cmp     DWORD PTR [rdi], edx
        jne     .L18
        mov     rcx, QWORD PTR [rsi+8]
        cmp     QWORD PTR [rdi+8], rcx
        je      .L24
.L18:
        ret
.L24:
        mov     rax, QWORD PTR [rsi+16]
        cmp     QWORD PTR [rdi+16], rax
        sete    al
        ret
.L23:
        xor     r10d, r10d
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L22
main._633_7float64..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L33
.L32:
        xor     eax, eax
        jmp     .L28
.L35:
        add     rax, 8
        cmp     rax, 264
        je      .L34
.L28:
        movsd   xmm0, QWORD PTR [rdi+rax]
        ucomisd xmm0, QWORD PTR [rsi+rax]
        jp      .L29
        je      .L35
.L29:
        xor     eax, eax
        ret
.L34:
        mov     eax, 1
        ret
.L33:
        xor     r10d, r10d
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L32
main._632_7uintptr..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L39
.L37:
        sub     rsp, 8
        mov     edx, 256
        call    runtime.memequal
        add     rsp, 8
        ret
.L39:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L37
main._6256_7uint64..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L43
.L41:
        sub     rsp, 8
        mov     edx, 2048
        call    runtime.memequal
        add     rsp, 8
        ret
.L43:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L41
main._6122_7uintptr..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L47
.L45:
        sub     rsp, 8
        mov     edx, 976
        call    runtime.memequal
        add     rsp, 8
        ret
.L47:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L45
main._68_7uint64..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L51
.L49:
        sub     rsp, 8
        mov     edx, 64
        call    runtime.memequal
        add     rsp, 8
        ret
.L51:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L49
main._6128_7uint8..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L55
.L53:
        sub     rsp, 8
        mov     edx, 128
        call    runtime.memequal
        add     rsp, 8
        ret
.L55:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L53
main._64096_7uint8..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L59
.L57:
        sub     rsp, 8
        mov     edx, 4096
        call    runtime.memequal
        add     rsp, 8
        ret
.L59:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L57
main._668_7uint16..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L63
.L61:
        sub     rsp, 8
        mov     edx, 136
        call    runtime.memequal
        add     rsp, 8
        ret
.L63:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L61
main._665_7uint32..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L67
.L65:
        sub     rsp, 8
        mov     edx, 260
        call    runtime.memequal
        add     rsp, 8
        ret
.L67:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L65
main._64_7uintptr..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L71
.L69:
        sub     rsp, 8
        mov     edx, 32
        call    runtime.memequal
        add     rsp, 8
        ret
.L71:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L69
main._65_7uint..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L75
.L73:
        sub     rsp, 8
        mov     edx, 40
        call    runtime.memequal
        add     rsp, 8
        ret
.L75:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L73
main._6512_7uint8..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L79
.L77:
        sub     rsp, 8
        mov     edx, 512
        call    runtime.memequal
        add     rsp, 8
        ret
.L79:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L77
main._6249_7uint8..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L83
.L81:
        sub     rsp, 8
        mov     edx, 249
        call    runtime.memequal
        add     rsp, 8
        ret
.L83:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L81
main._6129_7uint8..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L87
.L85:
        sub     rsp, 8
        mov     edx, 129
        call    runtime.memequal
        add     rsp, 8
        ret
.L87:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L85
main._632_7uint8..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L91
.L89:
        sub     rsp, 8
        mov     edx, 32
        call    runtime.memequal
        add     rsp, 8
        ret
.L91:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L89
main._627_7string..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L104
.L102:
        push    r12
        mov     r12, rdi
        push    rbp
        mov     rbp, rsi
        push    rbx
        xor     ebx, ebx
        jmp     .L95
.L106:
        cmp     rdi, rsi
        je      .L97
        call    memcmp
        test    eax, eax
        jne     .L93
.L97:
        add     rbx, 16
        cmp     rbx, 432
        je      .L105
.L95:
        movdqu  xmm0, XMMWORD PTR [r12+rbx]
        mov     rdi, QWORD PTR [r12+rbx]
        movdqu  xmm0, XMMWORD PTR [rbp+0+rbx]
        mov     rsi, QWORD PTR [rbp+0+rbx]
        mov     rdx, QWORD PTR [rbp+8+rbx]
        cmp     rdx, QWORD PTR [r12+8+rbx]
        je      .L106
.L93:
        xor     eax, eax
        pop     rbx
        pop     rbp
        pop     r12
        ret
.L105:
        mov     eax, 1
        pop     rbx
        pop     rbp
        pop     r12
        ret
.L104:
        mov     r10d, 24
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L102
main._61024_7uint8..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L110
.L108:
        sub     rsp, 8
        mov     edx, 1024
        call    runtime.memequal
        add     rsp, 8
        ret
.L110:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L108
main._62_7int32..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L114
.L112:
        sub     rsp, 8
        mov     edx, 8
        call    runtime.memequal
        add     rsp, 8
        ret
.L114:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L112
main._664_7uint8..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L118
.L116:
        sub     rsp, 8
        mov     edx, 64
        call    runtime.memequal
        add     rsp, 8
        ret
.L118:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L116
main._6256_7uint8..eq:
        cmp     rsp, QWORD PTR fs:112
        jb      .L122
.L120:
        sub     rsp, 8
        mov     edx, 256
        call    runtime.memequal
        add     rsp, 8
        ret
.L122:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L120
main.mul:
        cmp     rsp, QWORD PTR fs:112
        jb      .L125
.L124:
        mov     rax, rdi
        imul    rax, rsi
        ret
.L125:
        xor     r10d, r10d
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L124
main.main:
        cmp     rsp, QWORD PTR fs:112
        jb      .L128
        ret
.L128:
        xor     r10d, r10d
        xor     r11d, r11d
        call    __morestack
        ret
        ret
__go_init_main:
        cmp     rsp, QWORD PTR fs:112
        jb      .L132
.L130:
        sub     rsp, 8
        mov     esi, OFFSET FLAT:go..typelists
        mov     edi, 11
        call    runtime.registerTypeDescriptors
        call    internal_1cpu..import
        call    runtime..import
        add     rsp, 8
        ret
.L132:
        mov     r10d, 8
        xor     r11d, r11d
        call    __morestack
        ret
        jmp     .L130
go..typelists:
        .quad   internal_1cpu..types
        .quad   runtime..types
        .quad   internal_1abi..types
        .quad   internal_1bytealg..types
        .quad   internal_1goarch..types
        .quad   internal_1goexperiment..types
        .quad   internal_1goos..types
        .quad   runtime_1internal_1atomic..types
        .quad   runtime_1internal_1math..types
        .quad   runtime_1internal_1sys..types
        .quad   main..types
main..types:
        .zero   16
main._6256_7uint8..eq..f:
        .quad   main._6256_7uint8..eq
main._664_7uint8..eq..f:
        .quad   main._664_7uint8..eq
main._62_7int32..eq..f:
        .quad   main._62_7int32..eq
main._61024_7uint8..eq..f:
        .quad   main._61024_7uint8..eq
main._627_7string..eq..f:
        .quad   main._627_7string..eq
main._632_7uint8..eq..f:
        .quad   main._632_7uint8..eq
main._6129_7uint8..eq..f:
        .quad   main._6129_7uint8..eq
main._6249_7uint8..eq..f:
        .quad   main._6249_7uint8..eq
main._6512_7uint8..eq..f:
        .quad   main._6512_7uint8..eq
main._65_7uint..eq..f:
        .quad   main._65_7uint..eq
main._64_7uintptr..eq..f:
        .quad   main._64_7uintptr..eq
main._665_7uint32..eq..f:
        .quad   main._665_7uint32..eq
main._633_7float64..eq..f:
        .quad   main._633_7float64..eq
main._668_7uint16..eq..f:
        .quad   main._668_7uint16..eq
main._64096_7uint8..eq..f:
        .quad   main._64096_7uint8..eq
main._6128_7uint8..eq..f:
        .quad   main._6128_7uint8..eq
main._68_7uint64..eq..f:
        .quad   main._68_7uint64..eq
main._6122_7uintptr..eq..f:
        .quad   main._6122_7uintptr..eq
main.struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq..f:
        .quad   main.struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq
main._661_7struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq..f:
        .quad   main._661_7struct_4Size_buint32_cMallocs_buint64_cFrees_buint64_5..eq
main._6256_7uint64..eq..f:
        .quad   main._6256_7uint64..eq
main._632_7uintptr..eq..f:
        .quad   main._632_7uintptr..eq
main.struct_4runtime_0gList_cruntime_0n_bint32_5..eq..f:
        .quad   main.struct_4runtime_0gList_cruntime_0n_bint32_5..eq

人们从上述,可以自行看出很多门道来了;没有严格的从目标平台CPU机器汇编代码执行来判断某个编程语言编译代码的执行效率是没有意义的。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值