C代码
int _cdecl cd_call(int a1, int a2, int a3, int a4, int a5)
{
int t_a1 = a1;
int t_a2 = a2;
int t_a3 = a3;
int t_a4 = a4;
int t_a5 = a5;
return t_a1 + t_a2 + t_a3 + t_a4 + t_a5;
}
int _stdcall std_call(int a1, int a2, int a3, int a4, int a5)
{
int t_a1 = a1;
int t_a2 = a2;
int t_a3 = a3;
int t_a4 = a4;
int t_a5 = a5;
return t_a1 + t_a2 + t_a3 + t_a4 + t_a5;
}
int _fastcall fast_call(int a1, int a2, int a3, int a4, int a5)
{
int t_a1 = a1;
int t_a2 = a2;
int t_a3 = a3;
int t_a4 = a4;
int t_a5 = a5;
return t_a1 + t_a2 + t_a3 + t_a4 + t_a5;
}
int _vectorcall vector_call(int a1, int a2, int a3, int a4, int a5)
{
int t_a1 = a1;
int t_a2 = a2;
int t_a3 = a3;
int t_a4 = a4;
int t_a5 = a5;
return t_a1 + t_a2 + t_a3 + t_a4 + t_a5;
}
int main()
{
cd_call(1, 2, 3, 4, 5);
std_call(1, 2, 3, 4, 5);
fast_call(1, 2, 3, 4, 5);
vector_call(1, 2, 3, 4, 5);
}
汇编代码
_cdecl
调用函数代码
push 5
push 4
push 3
push 2
push 1
call <0623_x86_x64_asm.cdcall>
add esp,14
被调函数代码
push ebp
mov ebp,esp
sub esp,54
push ebx
push esi
push edi
mov eax,dword ptr ss:[ebp+8]//[ebp+8]是第一个参数a1
mov dword ptr ss:[ebp-4],eax//[ebp-4]是第一个局部变量t_a1
mov eax,dword ptr ss:[ebp+C]
mov dword ptr ss:[ebp-8],eax
mov eax,dword ptr ss:[ebp+10]
mov dword ptr ss:[ebp-C],eax
mov eax,dword ptr ss:[ebp+14]
mov dword ptr ss:[ebp-10],eax
mov eax,dword ptr ss:[ebp+18]
mov dword ptr ss:[ebp-14],eax
mov eax,dword ptr ss:[ebp-4]
add eax,dword ptr ss:[ebp-8]
add eax,dword ptr ss:[ebp-C]
add eax,dword ptr ss:[ebp-10]
add eax,dword ptr ss:[ebp-14]
pop edi
pop esi
pop ebx
mov esp,ebp
pop ebp
ret
_stdcall
调用函数代码
push 5
push 4
push 3
push 2
push 1
call <0623_x86_x64_asm.stdcall>
被调函数代码
push ebp
mov ebp,esp
sub esp,54
push ebx
push esi
push edi
mov eax,dword ptr ss:[ebp+8]
mov dword ptr ss:[ebp-4],eax
mov eax,dword ptr ss:[ebp+C]
mov dword ptr ss:[ebp-8],eax
mov eax,dword ptr ss:[ebp+10]
mov dword ptr ss:[ebp-C],eax
mov eax,dword ptr ss:[ebp+14]
mov dword ptr ss:[ebp-10],eax
mov eax,dword ptr ss:[ebp+18]
mov dword ptr ss:[ebp-14],eax
mov eax,dword ptr ss:[ebp-4]
add eax,dword ptr ss:[ebp-8]
add eax,dword ptr ss:[ebp-C]
add eax,dword ptr ss:[ebp-10]
add eax,dword ptr ss:[ebp-14]
pop edi
pop esi
pop ebx
mov esp,ebp
pop ebp
ret 14
_fastcall
调用函数代码
push 5
push 4
push 3
mov edx,2
mov ecx,1
call <0623_x86_x64_asm.fastcall>
被调函数代码
push ebp
mov ebp,esp
sub esp,5C
push ebx
push esi
push edi
mov dword ptr ss:[ebp-8],edx将第二个参数a2转存到[ebp-8]
mov dword ptr ss:[ebp-4],ecx//将第一个参数a1转存到[ebp-4]
mov eax,dword ptr ss:[ebp-4]
mov dword ptr ss:[ebp-C],eax
mov eax,dword ptr ss:[ebp-8]
mov dword ptr ss:[ebp-10],eax
mov eax,dword ptr ss:[ebp+8]
mov dword ptr ss:[ebp-14],eax
mov eax,dword ptr ss:[ebp+C]
mov dword ptr ss:[ebp-18],eax
mov eax,dword ptr ss:[ebp+10]
mov dword ptr ss:[ebp-1C],eax
mov eax,dword ptr ss:[ebp-C]
add eax,dword ptr ss:[ebp-10]
add eax,dword ptr ss:[ebp-14]
add eax,dword ptr ss:[ebp-18]
add eax,dword ptr ss:[ebp-1C]
pop edi
pop esi
pop ebx
mov esp,ebp
pop ebp
ret C
_vectorcall
调用函数代码
push 5
push 4
push 3
mov edx,2
mov ecx,1
被调函数代码
push ebp
mov ebp,esp
sub esp,5C
push ebx
push esi
push edi
mov dword ptr ss:[ebp-8],edx
mov dword ptr ss:[ebp-4],ecx
mov eax,dword ptr ss:[ebp-4]
mov dword ptr ss:[ebp-C],eax
mov eax,dword ptr ss:[ebp-8]
mov dword ptr ss:[ebp-10],eax
mov eax,dword ptr ss:[ebp+8]
mov dword ptr ss:[ebp-14],eax
mov eax,dword ptr ss:[ebp+C]
mov dword ptr ss:[ebp-18],eax
mov eax,dword ptr ss:[ebp+10]
mov dword ptr ss:[ebp-1C],eax
mov eax,dword ptr ss:[ebp-C]
add eax,dword ptr ss:[ebp-10]
add eax,dword ptr ss:[ebp-14]
add eax,dword ptr ss:[ebp-18]
add eax,dword ptr ss:[ebp-1C]
pop edi
pop esi
pop ebx
mov esp,ebp
pop ebp
ret C
分析
1.传参顺序都是从右至左传参。
2._fastcall和_vectorcall传参时会至多借用两个寄存器(ecx,edx)传参,且只用来传到第一个参数和第二个参数。如果参数超过了两个,仍然还是用push传参
3对于_cdecl和_stdcall而言,即使函数内部不创建任何局部变量,也要设置0x40大小的栈帧,但是如果创建了局部变量,就要设置0x40+sizeof(局部变量)大小的栈帧。对于_fastcall和_vectorcall而言,要设置的栈帧更大,因为栈帧的大小是0x40+sizeof(局部变量)+sizeof(参数1,参数2)
4…对于_fastcall和_vectorcall而言,假设传了两个参数,参数传到寄存器后,函数内部会将转存到ebp-4(第一个参数)和ebp-8(第二个参数)的位置,说白了就是把第一个和第二个参数当局部变量来存
5.除了_cdecl是外平栈,其余三种都是内平栈,对于_cdecl和_stdcall要平栈的大小都是sizeof(参数),对于_fastcall和_vectorcall,由于这两种调用约定把头两个参数当作局部变量放在了栈内部,所以平栈大小只要sizeof(参数-(参数1+参数2))
6.在此文的分析中_fastcall和_vectorcall好像完全一样,那是由于实验数据都为整形,在传递浮点型数据时,这两种调用约定才有区别,但此文就略过不提了
堆栈图
_cdecl和_stdcall
---------> t_a5
---------> t_a4
---------> t_a3
---------> t_a2
---------> t_a1
ebp------> 上一个函数的栈帧的EBP
---------> 要ret的地址
---------> a1
---------> a2
---------> a3
---------> a4
---------> a5
_fastcall和_vectorcall
---------> t_a5
---------> t_a4
---------> t_a3
---------> t_a2
---------> t_a1
---------> a2
---------> a1
ebp------> 上一个函数的栈帧的EBP
---------> 要ret的地址
---------> a3
---------> a4
---------> a5