编译器是如何用汇编语言实现C++的虚函数表和隐式传递this指针(一)

所谓的面向对象及继承、多态等等只是在语言层面的实现,机器在执行程序时根本就不知道这些。这篇博客的目的是探讨一下,编译器对C++虚函数的实现。


先探讨最简单的情况:


代码如下 vtable.cpp:
#include <stdio.h>

class Base{
public: 
    Base(){
        puts("Base()");
    }
    virtual void fun1()
    {
        puts("fun1()");
    }
    virtual void fun2()
    {
        puts("fun2()");
    }
    virtual void fun3()
    {
        puts("fun3()");
    }
    void fun4(){
        puts("Normal fun4()");
    }
    ~Base(){
        puts("~Base()");
    }
};

typedef void (*FUN) (void);

int main()
{
    FUN pfun1 = NULL;
    FUN pfun2 = NULL;
    FUN pfun3 = NULL;

    Base b;
    //printf("sizeof b = %d\n",sizeof b);
    printf("&b = %p\n",(&b));
    printf("vtable = %p\n",(int *)(&b));
    //printf("b = 0x%x\n",*(int*)(&b));

    pfun1 = (FUN)**(int**)(&b);
    printf("fun1 = %p\n",pfun1);
    pfun1();
    pfun2 = (FUN)*(*(int**)(&b)+1);
    printf("fun2 = %p\n",pfun2);
    pfun2();
    pfun3 = (FUN)*(*(int**)(&b)+2);
    printf("fun3 = %p\n",pfun3);
    pfun3();
    b.fun4();
    return 0;
}
gcc -S vtable.cpp 生成 vtable.s 


其中的注释已经很详细了,只需要了解几条简单的汇编指令就能看懂(mov call ret push pop)


#后面是注释

	.file	"vtable.cpp"
	.section	.rodata #只读数据段
.LC0:
	.string	"Base()"  #定义字符串 相当于C语言 char LC0[] = "Base()";
	.section	.text._ZN4BaseC1Ev,"axG",@progbits,_ZN4BaseC1Ev,comdat
	.align 2
	.weak	_ZN4BaseC1Ev
	.type	_ZN4BaseC1Ev, @function
_ZN4BaseC1Ev:   #Base的构造函数
.LFB2:
	.cfi_startproc
	.cfi_personality 0x0,__gxx_personality_v0
	pushl	%ebp
	.cfi_def_cfa_offset 8
	movl	%esp, %ebp 
	.cfi_offset 5, -8
	.cfi_def_cfa_register 5
	subl	$24, %esp
	movl	8(%ebp), %eax #这里需要多讲一点,否折容易迷糊,8(%ebp)即为esp+8位置的内存,为什么要跳过8字节呢,
                                  #1.函数调用时会保存%eip寄存器,用于返回时返回正确的位置(缓存溢出就是利用这一点,将栈中保存的%eip的值改写,返回倒你想要的位置),
                                  #2.加上pushl %ebp的4个字节;这样8(%ebp)实际保存的是this指针
	movl	$_ZTV4Base+8, (%eax) #这是关键 相当于 *this = $_ZTV4Base+8;  #$_ZTV4Base是取_ZTV4Base标号的地址,加8正好是虚函数fun1()的地址
	movl	$.LC0, (%esp) #准备堆栈
	call	puts          #函数调用
	leave
	.cfi_restore 5
	.cfi_def_cfa 4, 4
	ret
	.cfi_endproc
.LFE2:
	.size	_ZN4BaseC1Ev, .-_ZN4BaseC1Ev
	.section	.rodata
.LC1:
	.string	"fun1()"
	.section	.text._ZN4Base4fun1Ev,"axG",@progbits,_ZN4Base4fun1Ev,comdat
	.align 2
	.weak	_ZN4Base4fun1Ev
	.type	_ZN4Base4fun1Ev, @function
_ZN4Base4fun1Ev:
.LFB3:
	.cfi_startproc
	.cfi_personality 0x0,__gxx_personality_v0
	pushl	%ebp
	.cfi_def_cfa_offset 8
	movl	%esp, %ebp
	.cfi_offset 5, -8
	.cfi_def_cfa_register 5
	subl	$24, %esp
	movl	$.LC1, (%esp)
	call	puts
	leave
	.cfi_restore 5
	.cfi_def_cfa 4, 4
	ret
	.cfi_endproc
.LFE3:
	.size	_ZN4Base4fun1Ev, .-_ZN4Base4fun1Ev
	.section	.rodata
.LC2:
	.string	"fun2()"
	.section	.text._ZN4Base4fun2Ev,"axG",@progbits,_ZN4Base4fun2Ev,comdat
	.align 2
	.weak	_ZN4Base4fun2Ev
	.type	_ZN4Base4fun2Ev, @function
_ZN4Base4fun2Ev:
.LFB4:
	.cfi_startproc
	.cfi_personality 0x0,__gxx_personality_v0
	pushl	%ebp
	.cfi_def_cfa_offset 8
	movl	%esp, %ebp
	.cfi_offset 5, -8
	.cfi_def_cfa_register 5
	subl	$24, %esp
	movl	$.LC2, (%esp)
	call	puts
	leave
	.cfi_restore 5
	.cfi_def_cfa 4, 4
	ret
	.cfi_endproc
.LFE4:
	.size	_ZN4Base4fun2Ev, .-_ZN4Base4fun2Ev
	.section	.rodata
.LC3:
	.string	"fun3()"
	.section	.text._ZN4Base4fun3Ev,"axG",@progbits,_ZN4Base4fun3Ev,comdat
	.align 2
	.weak	_ZN4Base4fun3Ev
	.type	_ZN4Base4fun3Ev, @function
_ZN4Base4fun3Ev:
.LFB5:
	.cfi_startproc
	.cfi_personality 0x0,__gxx_personality_v0
	pushl	%ebp
	.cfi_def_cfa_offset 8
	movl	%esp, %ebp
	.cfi_offset 5, -8
	.cfi_def_cfa_register 5
	subl	$24, %esp
	movl	$.LC3, (%esp)
	call	puts
	leave
	.cfi_restore 5
	.cfi_def_cfa 4, 4
	ret
	.cfi_endproc
.LFE5:
	.size	_ZN4Base4fun3Ev, .-_ZN4Base4fun3Ev
	.section	.rodata
.LC4:
	.string	"Normal fun4()"
	.section	.text._ZN4Base4fun4Ev,"axG",@progbits,_ZN4Base4fun4Ev,comdat
	.align 2
	.weak	_ZN4Base4fun4Ev
	.type	_ZN4Base4fun4Ev, @function
_ZN4Base4fun4Ev:
.LFB6:
	.cfi_startproc
	.cfi_personality 0x0,__gxx_personality_v0
	pushl	%ebp
	.cfi_def_cfa_offset 8
	movl	%esp, %ebp
	.cfi_offset 5, -8
	.cfi_def_cfa_register 5
	subl	$24, %esp
	movl	$.LC4, (%esp)
	call	puts
	leave
	.cfi_restore 5
	.cfi_def_cfa 4, 4
	ret
	.cfi_endproc
.LFE6:
	.size	_ZN4Base4fun4Ev, .-_ZN4Base4fun4Ev
	.section	.rodata
.LC5:
	.string	"~Base()"
	.section	.text._ZN4BaseD1Ev,"axG",@progbits,_ZN4BaseD1Ev,comdat
	.align 2
	.weak	_ZN4BaseD1Ev
	.type	_ZN4BaseD1Ev, @function
_ZN4BaseD1Ev:
.LFB9:
	.cfi_startproc
	.cfi_personality 0x0,__gxx_personality_v0
	pushl	%ebp
	.cfi_def_cfa_offset 8
	movl	%esp, %ebp
	.cfi_offset 5, -8
	.cfi_def_cfa_register 5
	subl	$24, %esp
	movl	8(%ebp), %eax
	movl	$_ZTV4Base+8, (%eax)
	movl	$.LC5, (%esp)
	call	puts
	leave
	.cfi_restore 5
	.cfi_def_cfa 4, 4
	ret
	.cfi_endproc
.LFE9:
	.size	_ZN4BaseD1Ev, .-_ZN4BaseD1Ev
	.section	.rodata
.LC6:
	.string	"&b = %p\n"
.LC7:
	.string	"vtable = %p\n"
.LC8:
	.string	"fun1 = %p\n"
.LC9:
	.string	"fun2 = %p\n"
.LC10:
	.string	"fun3 = %p\n"
.globl _Unwind_Resume
	.text
.globl main
	.type	main, @function
main:               #在main的调用中为了阅读方便 把一些weihu
.LFB10:
	pushl	%ebp
	movl	%esp, %ebp
	andl	$-16, %esp
	pushl	%esi
	pushl	%ebx
	subl	$40, %esp
	movl	$0, 20(%esp) #pfun1
	movl	$0, 24(%esp) #pfun2
	movl	$0, 28(%esp) #pfun3
	leal	16(%esp), %eax #%esp存放的是堆栈的指针,指令的效果是将%esp加16存放倒%eax寄存器,这样%eax中保存b的地址
	movl	%eax, (%esp)   #将b的地址保存倒%esp指向的内存,供下面函数调用使用,相当于隐藏的this指针
.LEHB0:
	.cfi_escape 0x10,0x3,0x8,0x75,0x0,0x9,0xf0,0x1a,0x9,0xf8,0x22
	.cfi_escape 0x10,0x6,0x8,0x75,0x0,0x9,0xf0,0x1a,0x9,0xfc,0x22 #这22字节是防止堆栈溢出 见于《深入理解计算机系统》
	call	_ZN4BaseC1Ev #调用Base构造函数 %eax值为$_ZTV4Base+8,.long声明4字节整数,所以%eax保存的实际值是_ZN4Base4fun1Ev,即Base类中第一个虚函数的地址
                                #用C的方式理解,构造函数后b的值即为虚函数表($_ZTV4Base+8),而虚函数表指了向一个保存函数地址的数组,所以用int**
.LEHE0:
	leal	16(%esp), %eax #b的地址
	movl	%eax, 4(%esp)  #为函数调用准备堆栈
	movl	$.LC6, (%esp)  #为函数调用准备堆栈
.LEHB1:
	call	printf        #打印&b
	leal	16(%esp), %eax
	movl	%eax, 4(%esp)
	movl	$.LC7, (%esp)
	call	printf       #打印vtable
	leal	16(%esp), %eax
	movl	(%eax), %eax
	movl	(%eax), %eax   #将vtable保存的地址传送到 %eax
	movl	%eax, 20(%esp) #pfun1 = (FUN)**(int**)(&b);
	movl	20(%esp), %eax 
	movl	%eax, 4(%esp) #为函数调用准备堆栈
	movl	$.LC8, (%esp) #为函数调用准备堆栈
	call	printf         #printf("fun1 = %p\n",pfun1);
	movl	20(%esp), %eax #函数调用后%eax会改变,重新获得fun1()的地址
	call	*%eax          #pfun1();
	leal	16(%esp), %eax
	movl	(%eax), %eax
	addl	$4, %eax       #*(int**)(&b)+1
	movl	(%eax), %eax
	movl	%eax, 24(%esp) #pfun2 = (FUN)*(*(int**)(&b)+1);
	movl	24(%esp), %eax
	movl	%eax, 4(%esp)
	movl	$.LC9, (%esp)
	call	printf         #printf("fun2 = %p\n",pfun2);
	movl	24(%esp), %eax
	call	*%eax          #pfun2();
	leal	16(%esp), %eax
	movl	(%eax), %eax
	addl	$8, %eax       #*(int**)(&b)+2;
	movl	(%eax), %eax
	movl	%eax, 28(%esp) #pfun3 = (FUN)*(*(int**)(&b)+2);
	movl	28(%esp), %eax
	movl	%eax, 4(%esp)
	movl	$.LC10, (%esp)
	call	printf         #printf("fun3 = %p\n",pfun3);
	movl	28(%esp), %eax
	call	*%eax          #pfun3();
	leal	16(%esp), %eax
	movl	%eax, (%esp)
	call	_ZN4Base4fun4Ev     #b.fun4();
.LEHE1:
	movl	$0, %ebx
	leal	16(%esp), %eax
	movl	%eax, (%esp)
.LEHB2:
	call	_ZN4BaseD1Ev #调用析构函数
.LEHE2:
	movl	%ebx, %eax    #下面是一些栈恢复操作
	addl	$40, %esp
	popl	%ebx
	.cfi_remember_state
	.cfi_restore 3
	popl	%esi
	.cfi_restore 6
	movl	%ebp, %esp
	.cfi_def_cfa_register 4
	popl	%ebp
	.cfi_restore 5
	.cfi_def_cfa_offset 4
	ret
.L17:
	.cfi_restore_state
.L15:
	movl	%edx, %ebx
	movl	%eax, %esi
	leal	16(%esp), %eax
	movl	%eax, (%esp)
	call	_ZN4BaseD1Ev
	movl	%esi, %eax
	movl	%ebx, %edx
	movl	%eax, (%esp)
.LEHB3:
	call	_Unwind_Resume
.LEHE3:
	.cfi_endproc
.LFE10:
	.size	main, .-main
.globl __gxx_personality_v0
	.section	.gcc_except_table,"a",@progbits
.LLSDA10:
	.byte	0xff
	.byte	0xff
	.byte	0x1
	.uleb128 .LLSDACSE10-.LLSDACSB10
.LLSDACSB10:
	.uleb128 .LEHB0-.LFB10
	.uleb128 .LEHE0-.LEHB0
	.uleb128 0x0
	.uleb128 0x0
	.uleb128 .LEHB1-.LFB10
	.uleb128 .LEHE1-.LEHB1
	.uleb128 .L17-.LFB10
	.uleb128 0x0
	.uleb128 .LEHB2-.LFB10
	.uleb128 .LEHE2-.LEHB2
	.uleb128 0x0
	.uleb128 0x0
	.uleb128 .LEHB3-.LFB10
	.uleb128 .LEHE3-.LEHB3
	.uleb128 0x0
	.uleb128 0x0
.LLSDACSE10:
	.text
	.weak	_ZTV4Base
	.section	.rodata._ZTV4Base,"aG",@progbits,_ZTV4Base,comdat
	.align 8
	.type	_ZTV4Base, @object
	.size	_ZTV4Base, 20
_ZTV4Base:
	.long	0
	.long	_ZTI4Base
	.long	_ZN4Base4fun1Ev  #$_ZTV4Base+8 虚函数表的地址
	.long	_ZN4Base4fun2Ev
	.long	_ZN4Base4fun3Ev
	.weak	_ZTS4Base
	.section	.rodata._ZTS4Base,"aG",@progbits,_ZTS4Base,comdat
	.type	_ZTS4Base, @object
	.size	_ZTS4Base, 6
_ZTS4Base:
	.string	"4Base"
	.weak	_ZTI4Base
	.section	.rodata._ZTI4Base,"aG",@progbits,_ZTI4Base,comdat
	.align 4
	.type	_ZTI4Base, @object
	.size	_ZTI4Base, 8
_ZTI4Base:
	.long	_ZTVN10__cxxabiv117__class_type_infoE+8
	.long	_ZTS4Base
	.ident	"GCC: (GNU) 4.4.2 20091027 (Red Hat 4.4.2-7)"
	.section	.note.GNU-stack,"",@progbits

为了加深理解隐含参数this,将fun2改为:
    virtual void fun2()

    {

        fun1();

        puts("fun2()");

    }

对应的汇编代码
     	.section	.rodata
.LC2:
	.string	"fun2()"
	.section	.text._ZN4Base4fun2Ev,"axG",@progbits,_ZN4Base4fun2Ev,comdat
	.align 2
	.weak	_ZN4Base4fun2Ev
	.type	_ZN4Base4fun2Ev, @function
_ZN4Base4fun2Ev:
.LFB4:
	.cfi_startproc
	.cfi_personality 0x0,__gxx_personality_v0
	pushl	%ebp
	.cfi_def_cfa_offset 8
	movl	%esp, %ebp
	.cfi_offset 5, -8
	.cfi_def_cfa_register 5
	subl	$24, %esp
	movl	8(%ebp), %eax #隐含参数this 
	movl	(%eax), %eax  
	movl	(%eax), %edx
	movl	8(%ebp), %eax
	movl	%eax, (%esp) #隐式this传参
	call	*%edx        #fun1(); this 保存的就是vtable ,而vtable的第一个元素就是fun1的地址  
	movl	$.LC2, (%esp)
	call	puts
	leave
	.cfi_restore 5
	.cfi_def_cfa 4, 4
	ret
	.cfi_endproc
.LFE4:
	.size	_ZN4Base4fun2Ev, .-_ZN4Base4fun2Ev

此时的仍用原来的调用,因为没有this作为参数,所以在call *%edx会段错误
修改调用代码
    //pfun2();

    b.fun2();
对应的汇编代码改为
	leal	16(%esp), %eax  #this 放入eax
	movl	%eax, (%esp)    #this 放入堆栈
	call	_ZN4Base4fun2Ev
如果把 fun1改为fun4
   virtual void fun2()

    {

        fun4();

        puts("fun2()");

    }
则最开始的调用没有问题(没有this指针隐式传参的调用),因为在fun4中没有调用任何虚函数,或者成员变量,相当于没有用this指针。

Base的内存模型如下图




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值