所谓的面向对象及继承、多态等等只是在语言层面的实现,机器在执行程序时根本就不知道这些。这篇博客的目的是探讨一下,编译器对C++虚函数的实现。
先探讨最简单的情况:
代码如下 vtable.cpp:
其中的注释已经很详细了,只需要了解几条简单的汇编指令就能看懂(mov call ret push pop)
先探讨最简单的情况:
代码如下 vtable.cpp:
#include <stdio.h>
class Base{
public:
Base(){
puts("Base()");
}
virtual void fun1()
{
puts("fun1()");
}
virtual void fun2()
{
puts("fun2()");
}
virtual void fun3()
{
puts("fun3()");
}
void fun4(){
puts("Normal fun4()");
}
~Base(){
puts("~Base()");
}
};
typedef void (*FUN) (void);
int main()
{
FUN pfun1 = NULL;
FUN pfun2 = NULL;
FUN pfun3 = NULL;
Base b;
//printf("sizeof b = %d\n",sizeof b);
printf("&b = %p\n",(&b));
printf("vtable = %p\n",(int *)(&b));
//printf("b = 0x%x\n",*(int*)(&b));
pfun1 = (FUN)**(int**)(&b);
printf("fun1 = %p\n",pfun1);
pfun1();
pfun2 = (FUN)*(*(int**)(&b)+1);
printf("fun2 = %p\n",pfun2);
pfun2();
pfun3 = (FUN)*(*(int**)(&b)+2);
printf("fun3 = %p\n",pfun3);
pfun3();
b.fun4();
return 0;
}
gcc -S vtable.cpp 生成 vtable.s
其中的注释已经很详细了,只需要了解几条简单的汇编指令就能看懂(mov call ret push pop)
#后面是注释
.file "vtable.cpp"
.section .rodata #只读数据段
.LC0:
.string "Base()" #定义字符串 相当于C语言 char LC0[] = "Base()";
.section .text._ZN4BaseC1Ev,"axG",@progbits,_ZN4BaseC1Ev,comdat
.align 2
.weak _ZN4BaseC1Ev
.type _ZN4BaseC1Ev, @function
_ZN4BaseC1Ev: #Base的构造函数
.LFB2:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
subl $24, %esp
movl 8(%ebp), %eax #这里需要多讲一点,否折容易迷糊,8(%ebp)即为esp+8位置的内存,为什么要跳过8字节呢,
#1.函数调用时会保存%eip寄存器,用于返回时返回正确的位置(缓存溢出就是利用这一点,将栈中保存的%eip的值改写,返回倒你想要的位置),
#2.加上pushl %ebp的4个字节;这样8(%ebp)实际保存的是this指针
movl $_ZTV4Base+8, (%eax) #这是关键 相当于 *this = $_ZTV4Base+8; #$_ZTV4Base是取_ZTV4Base标号的地址,加8正好是虚函数fun1()的地址
movl $.LC0, (%esp) #准备堆栈
call puts #函数调用
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE2:
.size _ZN4BaseC1Ev, .-_ZN4BaseC1Ev
.section .rodata
.LC1:
.string "fun1()"
.section .text._ZN4Base4fun1Ev,"axG",@progbits,_ZN4Base4fun1Ev,comdat
.align 2
.weak _ZN4Base4fun1Ev
.type _ZN4Base4fun1Ev, @function
_ZN4Base4fun1Ev:
.LFB3:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
subl $24, %esp
movl $.LC1, (%esp)
call puts
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE3:
.size _ZN4Base4fun1Ev, .-_ZN4Base4fun1Ev
.section .rodata
.LC2:
.string "fun2()"
.section .text._ZN4Base4fun2Ev,"axG",@progbits,_ZN4Base4fun2Ev,comdat
.align 2
.weak _ZN4Base4fun2Ev
.type _ZN4Base4fun2Ev, @function
_ZN4Base4fun2Ev:
.LFB4:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
subl $24, %esp
movl $.LC2, (%esp)
call puts
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE4:
.size _ZN4Base4fun2Ev, .-_ZN4Base4fun2Ev
.section .rodata
.LC3:
.string "fun3()"
.section .text._ZN4Base4fun3Ev,"axG",@progbits,_ZN4Base4fun3Ev,comdat
.align 2
.weak _ZN4Base4fun3Ev
.type _ZN4Base4fun3Ev, @function
_ZN4Base4fun3Ev:
.LFB5:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
subl $24, %esp
movl $.LC3, (%esp)
call puts
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE5:
.size _ZN4Base4fun3Ev, .-_ZN4Base4fun3Ev
.section .rodata
.LC4:
.string "Normal fun4()"
.section .text._ZN4Base4fun4Ev,"axG",@progbits,_ZN4Base4fun4Ev,comdat
.align 2
.weak _ZN4Base4fun4Ev
.type _ZN4Base4fun4Ev, @function
_ZN4Base4fun4Ev:
.LFB6:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
subl $24, %esp
movl $.LC4, (%esp)
call puts
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE6:
.size _ZN4Base4fun4Ev, .-_ZN4Base4fun4Ev
.section .rodata
.LC5:
.string "~Base()"
.section .text._ZN4BaseD1Ev,"axG",@progbits,_ZN4BaseD1Ev,comdat
.align 2
.weak _ZN4BaseD1Ev
.type _ZN4BaseD1Ev, @function
_ZN4BaseD1Ev:
.LFB9:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
subl $24, %esp
movl 8(%ebp), %eax
movl $_ZTV4Base+8, (%eax)
movl $.LC5, (%esp)
call puts
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE9:
.size _ZN4BaseD1Ev, .-_ZN4BaseD1Ev
.section .rodata
.LC6:
.string "&b = %p\n"
.LC7:
.string "vtable = %p\n"
.LC8:
.string "fun1 = %p\n"
.LC9:
.string "fun2 = %p\n"
.LC10:
.string "fun3 = %p\n"
.globl _Unwind_Resume
.text
.globl main
.type main, @function
main: #在main的调用中为了阅读方便 把一些weihu
.LFB10:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
pushl %esi
pushl %ebx
subl $40, %esp
movl $0, 20(%esp) #pfun1
movl $0, 24(%esp) #pfun2
movl $0, 28(%esp) #pfun3
leal 16(%esp), %eax #%esp存放的是堆栈的指针,指令的效果是将%esp加16存放倒%eax寄存器,这样%eax中保存b的地址
movl %eax, (%esp) #将b的地址保存倒%esp指向的内存,供下面函数调用使用,相当于隐藏的this指针
.LEHB0:
.cfi_escape 0x10,0x3,0x8,0x75,0x0,0x9,0xf0,0x1a,0x9,0xf8,0x22
.cfi_escape 0x10,0x6,0x8,0x75,0x0,0x9,0xf0,0x1a,0x9,0xfc,0x22 #这22字节是防止堆栈溢出 见于《深入理解计算机系统》
call _ZN4BaseC1Ev #调用Base构造函数 %eax值为$_ZTV4Base+8,.long声明4字节整数,所以%eax保存的实际值是_ZN4Base4fun1Ev,即Base类中第一个虚函数的地址
#用C的方式理解,构造函数后b的值即为虚函数表($_ZTV4Base+8),而虚函数表指了向一个保存函数地址的数组,所以用int**
.LEHE0:
leal 16(%esp), %eax #b的地址
movl %eax, 4(%esp) #为函数调用准备堆栈
movl $.LC6, (%esp) #为函数调用准备堆栈
.LEHB1:
call printf #打印&b
leal 16(%esp), %eax
movl %eax, 4(%esp)
movl $.LC7, (%esp)
call printf #打印vtable
leal 16(%esp), %eax
movl (%eax), %eax
movl (%eax), %eax #将vtable保存的地址传送到 %eax
movl %eax, 20(%esp) #pfun1 = (FUN)**(int**)(&b);
movl 20(%esp), %eax
movl %eax, 4(%esp) #为函数调用准备堆栈
movl $.LC8, (%esp) #为函数调用准备堆栈
call printf #printf("fun1 = %p\n",pfun1);
movl 20(%esp), %eax #函数调用后%eax会改变,重新获得fun1()的地址
call *%eax #pfun1();
leal 16(%esp), %eax
movl (%eax), %eax
addl $4, %eax #*(int**)(&b)+1
movl (%eax), %eax
movl %eax, 24(%esp) #pfun2 = (FUN)*(*(int**)(&b)+1);
movl 24(%esp), %eax
movl %eax, 4(%esp)
movl $.LC9, (%esp)
call printf #printf("fun2 = %p\n",pfun2);
movl 24(%esp), %eax
call *%eax #pfun2();
leal 16(%esp), %eax
movl (%eax), %eax
addl $8, %eax #*(int**)(&b)+2;
movl (%eax), %eax
movl %eax, 28(%esp) #pfun3 = (FUN)*(*(int**)(&b)+2);
movl 28(%esp), %eax
movl %eax, 4(%esp)
movl $.LC10, (%esp)
call printf #printf("fun3 = %p\n",pfun3);
movl 28(%esp), %eax
call *%eax #pfun3();
leal 16(%esp), %eax
movl %eax, (%esp)
call _ZN4Base4fun4Ev #b.fun4();
.LEHE1:
movl $0, %ebx
leal 16(%esp), %eax
movl %eax, (%esp)
.LEHB2:
call _ZN4BaseD1Ev #调用析构函数
.LEHE2:
movl %ebx, %eax #下面是一些栈恢复操作
addl $40, %esp
popl %ebx
.cfi_remember_state
.cfi_restore 3
popl %esi
.cfi_restore 6
movl %ebp, %esp
.cfi_def_cfa_register 4
popl %ebp
.cfi_restore 5
.cfi_def_cfa_offset 4
ret
.L17:
.cfi_restore_state
.L15:
movl %edx, %ebx
movl %eax, %esi
leal 16(%esp), %eax
movl %eax, (%esp)
call _ZN4BaseD1Ev
movl %esi, %eax
movl %ebx, %edx
movl %eax, (%esp)
.LEHB3:
call _Unwind_Resume
.LEHE3:
.cfi_endproc
.LFE10:
.size main, .-main
.globl __gxx_personality_v0
.section .gcc_except_table,"a",@progbits
.LLSDA10:
.byte 0xff
.byte 0xff
.byte 0x1
.uleb128 .LLSDACSE10-.LLSDACSB10
.LLSDACSB10:
.uleb128 .LEHB0-.LFB10
.uleb128 .LEHE0-.LEHB0
.uleb128 0x0
.uleb128 0x0
.uleb128 .LEHB1-.LFB10
.uleb128 .LEHE1-.LEHB1
.uleb128 .L17-.LFB10
.uleb128 0x0
.uleb128 .LEHB2-.LFB10
.uleb128 .LEHE2-.LEHB2
.uleb128 0x0
.uleb128 0x0
.uleb128 .LEHB3-.LFB10
.uleb128 .LEHE3-.LEHB3
.uleb128 0x0
.uleb128 0x0
.LLSDACSE10:
.text
.weak _ZTV4Base
.section .rodata._ZTV4Base,"aG",@progbits,_ZTV4Base,comdat
.align 8
.type _ZTV4Base, @object
.size _ZTV4Base, 20
_ZTV4Base:
.long 0
.long _ZTI4Base
.long _ZN4Base4fun1Ev #$_ZTV4Base+8 虚函数表的地址
.long _ZN4Base4fun2Ev
.long _ZN4Base4fun3Ev
.weak _ZTS4Base
.section .rodata._ZTS4Base,"aG",@progbits,_ZTS4Base,comdat
.type _ZTS4Base, @object
.size _ZTS4Base, 6
_ZTS4Base:
.string "4Base"
.weak _ZTI4Base
.section .rodata._ZTI4Base,"aG",@progbits,_ZTI4Base,comdat
.align 4
.type _ZTI4Base, @object
.size _ZTI4Base, 8
_ZTI4Base:
.long _ZTVN10__cxxabiv117__class_type_infoE+8
.long _ZTS4Base
.ident "GCC: (GNU) 4.4.2 20091027 (Red Hat 4.4.2-7)"
.section .note.GNU-stack,"",@progbits
为了加深理解隐含参数this,将fun2改为:
virtual void fun2()
{
fun1();
puts("fun2()");
}
对应的汇编代码
.section .rodata
.LC2:
.string "fun2()"
.section .text._ZN4Base4fun2Ev,"axG",@progbits,_ZN4Base4fun2Ev,comdat
.align 2
.weak _ZN4Base4fun2Ev
.type _ZN4Base4fun2Ev, @function
_ZN4Base4fun2Ev:
.LFB4:
.cfi_startproc
.cfi_personality 0x0,__gxx_personality_v0
pushl %ebp
.cfi_def_cfa_offset 8
movl %esp, %ebp
.cfi_offset 5, -8
.cfi_def_cfa_register 5
subl $24, %esp
movl 8(%ebp), %eax #隐含参数this
movl (%eax), %eax
movl (%eax), %edx
movl 8(%ebp), %eax
movl %eax, (%esp) #隐式this传参
call *%edx #fun1(); this 保存的就是vtable ,而vtable的第一个元素就是fun1的地址
movl $.LC2, (%esp)
call puts
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE4:
.size _ZN4Base4fun2Ev, .-_ZN4Base4fun2Ev
此时的仍用原来的调用,因为没有this作为参数,所以在call *%edx会段错误
修改调用代码
//pfun2();
b.fun2();
对应的汇编代码改为
leal 16(%esp), %eax #this 放入eax
movl %eax, (%esp) #this 放入堆栈
call _ZN4Base4fun2Ev
如果把 fun1改为fun4
virtual void fun2()
{
fun4();
puts("fun2()");
}
则最开始的调用没有问题(没有this指针隐式传参的调用),因为在fun4中没有调用任何虚函数,或者成员变量,相当于没有用this指针。
Base的内存模型如下图