一个C++程序被编译为目标程序的过程中经历了四个部分,分别是预处理、编译、汇编、链接。
下面将通过一个简单的C++代码分别执行预处理、编译、汇编、链接四个步骤后的结果和基本原理讲解。
注意:博主是在ubuntu20.0下编译和运行,g++版本是9.3.0
一、一个简单的C++代码
下面是一段简单的C++代码
Test1.h
#ifndef TEST1_H_
#define TEST1_H_
struct Test1
{
Test1();
int getVar() const;
private:
int var;
};
#endif /* TEST1_H_ */
Test1.cpp
#include "Test1.h"
Test1::Test1()
: var(100)
{
}
int Test1::getVar() const
{
return var;
}
main.cpp
#include "Test1.h"
int main() {
Test1{}.getVar();
return 0;
}
1.1预处理
预处理主要对Test1.cpp、main.cpp和相关的头文件进行预编译成一个Test1.ii、main.ii文件。该步骤主要操作是对一些宏进行替换以及一些#include包含的头文件进行替换。下面是在ubuntu20.0下通过g++ -E命令生成预处理后的文件(注意:一般c++预处理后文件格式为*.ii,C语言是*.i)
g++ -E main.cpp -o main.ii
文件main.ii
# 1 "main.cpp"
# 1 "<built-in>"
# 1 "<command-line>"
# 1 "/usr/include/stdc-predef.h" 1 3 4
# 1 "<command-line>" 2
# 1 "main.cpp"
# 1 "Test1.h" 1
struct Test1
{
Test1();
int getVar() const;
private:
int var;
};
# 2 "main.cpp" 2
int main() {
Test1{}.getVar();
return 0;
}
g++ -E Test1.cpp -o Test1.ii
文件Test1.ii
# 1 "Test1.cpp"
# 1 "<built-in>"
# 1 "<command-line>"
# 1 "/usr/include/stdc-predef.h" 1 3 4
# 1 "<command-line>" 2
# 1 "Test1.cpp"
# 1 "Test1.h" 1
struct Test1
{
Test1();
int getVar() const;
private:
int var;
};
# 2 "Test1.cpp" 2
Test1::Test1()
: var(100)
{
}
int Test1::getVar() const
{
return var;
}
由上面的Test1.ii和main.ii两个文件可以看出,进过预处理后*.ii文件不包含任何宏定义,因为所有宏已经被展开,并且包含的文件也已经被插入到main.ii文件中。
1.1.1预处理阶段功能总结
预编译过程主要处理那些源代码文件中的以“#”开始的预编译指令。比如“#include”、“#define”等,处理规则如下:
- 将所有的“#define”删除,并且展开所有的宏定义
- 处理所有条件预编译指令,比如“#if”、“#ifdef”、“#elif”、“#else”、“#endif”
- 处理“#include”预编译指令,将被包含的文件插入到该预编译指令的位置。注意,这个过程是递归进行的,也就是说被包含的文件可能还包含其它文件
- 删除所有的注释“//”和“/* */”
- 添加行号和文件名标识,比如:“ # 1 "Test1.h" 1 ”,以便编译时编译器产生调试用的行号信息及用于编译时产生编译错误或警告时能够显示行号
- 保留所有的#pragma编译器指令,因为编译器需要使用它们
1.2编译
编译就是对预处理完后的文件进行一系列词法分析、语法分析、语义分析及优化后生成相关的汇编代码文件。
一般将预处理和汇编合并成一个步骤,linux下使用一个cc1plus来完成这两个步骤。
下面是用编译生成main.s文件命令
g++ -S main.ii -o main.s
main.s
.file "main.cpp"
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
endbr64
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movq %fs:40, %rax
movq %rax, -8(%rbp)
xorl %eax, %eax
leaq -12(%rbp), %rax
movq %rax, %rdi
call _ZN5Test1C1Ev@PLT
leaq -12(%rbp), %rax
movq %rax, %rdi
call _ZNK5Test16getVarEv@PLT
movl $0, %eax
movq -8(%rbp), %rdx
xorq %fs:40, %rdx
je .L3
call __stack_chk_fail@PLT
.L3:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0"
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a"
.align 8
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4:
Test1.s
g++ -S Test1.ii -o Test1.s
.file "Test1.cpp"
.text
.align 2
.globl _ZN5Test1C2Ev
.type _ZN5Test1C2Ev, @function
_ZN5Test1C2Ev:
.LFB1:
.cfi_startproc
endbr64
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movl $100, (%rax)
nop
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size _ZN5Test1C2Ev, .-_ZN5Test1C2Ev
.globl _ZN5Test1C1Ev
.set _ZN5Test1C1Ev,_ZN5Test1C2Ev
.align 2
.globl _ZNK5Test16getVarEv
.type _ZNK5Test16getVarEv, @function
_ZNK5Test16getVarEv:
.LFB3:
.cfi_startproc
endbr64
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movl (%rax), %eax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE3:
.size _ZNK5Test16getVarEv, .-_ZNK5Test16getVarEv
.ident "GCC: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0"
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a"
.align 8
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4:
1.2.1编译总结
由上面main.s和Test1.s两个汇编代码文件可知,编译阶段生成的是汇编代码文件。g++本质上只是后台程序的包装,根据不同参数(-E、-S、-c)要求调用预处理编译程序cc1plus、汇编器as、连接器ld。
1.3汇编
汇编器将汇编代码转化为机器可以识别和执行的指令(二进制代码),每条汇编语句几乎都对应一条机器指令。
main.o的16进制代码分析可参考这篇博客:小湿妹问沃什么是可执行程序?(全网最简洁,必看,错过后悔终生!!!)
下面是生成目标文件main.o指令
g++ -c main.s -o main.o
#或者下面命令as
# $as main.s -o main.o
main.o文件
tjq@ubuntu:~/eclipse-workspace/Test1/src$ hexdump main.o -C
00000000 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 |.ELF............|
00000010 01 00 3e 00 01 00 00 00 00 00 00 00 00 00 00 00 |..>.............|
00000020 00 00 00 00 00 00 00 00 e0 02 00 00 00 00 00 00 |................|
00000030 00 00 00 00 40 00 00 00 00 00 40 00 0c 00 0b 00 |....@.....@.....|
00000040 55 48 89 e5 48 83 ec 10 48 8d 45 fc 48 89 c7 e8 |UH..H...H.E.H...|
00000050 00 00 00 00 48 8d 45 fc 48 89 c7 e8 00 00 00 00 |....H.E.H.......|
00000060 b8 00 00 00 00 c9 c3 00 47 43 43 3a 20 28 55 62 |........GCC: (Ub|
00000070 75 6e 74 75 20 39 2e 33 2e 30 2d 31 37 75 62 75 |untu 9.3.0-17ubu|
00000080 6e 74 75 31 7e 32 30 2e 30 34 29 20 39 2e 33 2e |ntu1~20.04) 9.3.|
00000090 30 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 |0...............|
000000a0 01 7a 52 00 01 78 10 01 1b 0c 07 08 90 01 00 00 |.zR..x..........|
000000b0 1c 00 00 00 1c 00 00 00 00 00 00 00 27 00 00 00 |............'...|
000000c0 00 41 0e 10 86 02 43 0d 06 62 0c 07 08 00 00 00 |.A....C..b......|
000000d0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
000000e0 00 00 00 00 00 00 00 00 01 00 00 00 04 00 f1 ff |................|
000000f0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000100 00 00 00 00 03 00 01 00 00 00 00 00 00 00 00 00 |................|
00000110 00 00 00 00 00 00 00 00 00 00 00 00 03 00 03 00 |................|
00000120 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000130 00 00 00 00 03 00 04 00 00 00 00 00 00 00 00 00 |................|
00000140 00 00 00 00 00 00 00 00 00 00 00 00 03 00 06 00 |................|
00000150 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000160 00 00 00 00 03 00 07 00 00 00 00 00 00 00 00 00 |................|
00000170 00 00 00 00 00 00 00 00 00 00 00 00 03 00 05 00 |................|
00000180 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000190 0a 00 00 00 12 00 01 00 00 00 00 00 00 00 00 00 |................|
000001a0 27 00 00 00 00 00 00 00 0f 00 00 00 10 00 00 00 |'...............|
000001b0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
000001c0 25 00 00 00 10 00 00 00 00 00 00 00 00 00 00 00 |%...............|
000001d0 00 00 00 00 00 00 00 00 33 00 00 00 10 00 00 00 |........3.......|
000001e0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
000001f0 00 6d 61 69 6e 2e 63 70 70 00 6d 61 69 6e 00 5f |.main.cpp.main._|
00000200 47 4c 4f 42 41 4c 5f 4f 46 46 53 45 54 5f 54 41 |GLOBAL_OFFSET_TA|
00000210 42 4c 45 5f 00 5f 5a 4e 35 54 65 73 74 31 43 31 |BLE_._ZN5Test1C1|
00000220 45 76 00 5f 5a 4e 4b 35 54 65 73 74 31 36 67 65 |Ev._ZNK5Test16ge|
00000230 74 56 61 72 45 76 00 00 10 00 00 00 00 00 00 00 |tVarEv..........|
00000240 04 00 00 00 0a 00 00 00 fc ff ff ff ff ff ff ff |................|
00000250 1c 00 00 00 00 00 00 00 04 00 00 00 0b 00 00 00 |................|
00000260 fc ff ff ff ff ff ff ff 20 00 00 00 00 00 00 00 |........ .......|
00000270 02 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 |................|
00000280 00 2e 73 79 6d 74 61 62 00 2e 73 74 72 74 61 62 |..symtab..strtab|
00000290 00 2e 73 68 73 74 72 74 61 62 00 2e 72 65 6c 61 |..shstrtab..rela|
000002a0 2e 74 65 78 74 00 2e 64 61 74 61 00 2e 62 73 73 |.text..data..bss|
000002b0 00 2e 63 6f 6d 6d 65 6e 74 00 2e 6e 6f 74 65 2e |..comment..note.|
000002c0 47 4e 55 2d 73 74 61 63 6b 00 2e 72 65 6c 61 2e |GNU-stack..rela.|
000002d0 65 68 5f 66 72 61 6d 65 00 00 00 00 00 00 00 00 |eh_frame........|
000002e0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
00000320 20 00 00 00 01 00 00 00 06 00 00 00 00 00 00 00 | ...............|
00000330 00 00 00 00 00 00 00 00 40 00 00 00 00 00 00 00 |........@.......|
00000340 27 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |'...............|
00000350 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000360 1b 00 00 00 04 00 00 00 40 00 00 00 00 00 00 00 |........@.......|
00000370 00 00 00 00 00 00 00 00 38 02 00 00 00 00 00 00 |........8.......|
00000380 30 00 00 00 00 00 00 00 09 00 00 00 01 00 00 00 |0...............|
00000390 08 00 00 00 00 00 00 00 18 00 00 00 00 00 00 00 |................|
000003a0 26 00 00 00 01 00 00 00 03 00 00 00 00 00 00 00 |&...............|
000003b0 00 00 00 00 00 00 00 00 67 00 00 00 00 00 00 00 |........g.......|
000003c0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |........