编译过程
当我们的源代码如下:
#include <stdio.h>
int main(int argc, char **argv)
{
printf("Hello World! via %x/n", printf);
return 0;
}
编译后的exe文件是什么?
hello: file format elf32-i386
Disassembly of section .init:
08048298 <_init>:
8048298: 55 push %ebp
8048299: 89 e5 mov %esp,%ebp
804829b: 83 ec 08 sub $0x8,%esp
804829e: e8 71 00 00 00 call 8048314 <call_gmon_start>
80482a3: e8 f8 00 00 00 call 80483a0 <frame_dummy>
80482a8: e8 d3 01 00 00 call 8048480 <__do_global_ctors_aux>
80482ad: c9 leave
80482ae: c3 ret
Disassembly of section .plt:
080482b0 <__gmon_start__@plt-0x10>:
80482b0: ff 35 40 96 04 08 pushl 0x8049640
80482b6: ff 25 44 96 04 08 jmp *0x8049644
80482bc: 00 00 add %al,(%eax)
...
080482c0 <__gmon_start__@plt>:
80482c0: ff 25 48 96 04 08 jmp *0x8049648
80482c6: 68 00 00 00 00 push $0x0
80482cb: e9 e0 ff ff ff jmp 80482b0 <_init+0x18>
080482d0 <__libc_start_main@plt>:
80482d0: ff 25 4c 96 04 08 jmp *0x804964c
80482d6: 68 08 00 00 00 push $0x8
80482db: e9 d0 ff ff ff jmp 80482b0 <_init+0x18>
080482e0 <printf@plt>:
80482e0: ff 25 50 96 04 08 jmp *0x8049650
80482e6: 68 10 00 00 00 push $0x10
80482eb: e9 c0 ff ff ff jmp 80482b0 <_init+0x18>
...
080483c4 <main>:
80483c4: 8d 4c 24 04 lea 0x4(%esp),%ecx
80483c8: 83 e4 f0 and $0xfffffff0,%esp
80483cb: ff 71 fc pushl -0x4(%ecx)
80483ce: 55 push %ebp
80483cf: 89 e5 mov %esp,%ebp
80483d1: 51 push %ecx
80483d2: 83 ec 14 sub $0x14,%esp
80483d5: c7 44 24 04 e0 82 04 movl $0x80482e0,0x4(%esp)
80483dc: 08
80483dd: c7 04 24 d0 84 04 08 movl $0x80484d0,(%esp)
80483e4: e8 f7 fe ff ff call 80482e0 <printf@plt>
80483e9: b8 00 00 00 00 mov $0x0,%eax
80483ee: 83 c4 14 add $0x14,%esp
80483f1: 59 pop %ecx
80483f2: 5d pop %ebp
80483f3: 8d 61 fc lea -0x4(%ecx),%esp
80483f6: c3 ret
80483f7: 90 nop
...
它是如何从.c到exe的。
预处理(preprocessing)->编译(compilation)->汇编(assembly)->链接(linking)(生成exe)
来看下编译
.file "hello.c"
.section .rodata
.LC0:
.string "Hello World! via %x/n"
.text
.globl main
.type main, @function
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
movl %esp, %ebp
pushl %ecx
subl $20, %esp
movl $printf, 4(%esp)
movl $.LC0, (%esp)
call printf
movl $0, %eax
addl $20, %esp
popl %ecx
popl %ebp
leal -4(%ecx), %esp
ret
.size main, .-main
.ident "GCC: (GNU) 4.1.2 20070925 (Red Hat 4.1.2-33)"
.section .note.GNU-stack,"",@progbits
看下汇编
# objdump -d hello.o
hello.o: file format elf32-i386
Disassembly of section .text:
00000000 <main>:
0: 8d 4c 24 04 lea 0x4(%esp),%ecx
4: 83 e4 f0 and $0xfffffff0,%esp
7: ff 71 fc pushl -0x4(%ecx)
a: 55 push %ebp
b: 89 e5 mov %esp,%ebp
d: 51 push %ecx
e: 83 ec 14 sub $0x14,%esp
11: c7 44 24 04 00 00 00 movl $0x0,0x4(%esp)
18: 00
19: c7 04 24 00 00 00 00 movl $0x0,(%esp)
20: e8 fc ff ff ff call 21 <main+0x21>
25: b8 00 00 00 00 mov $0x0,%eax
2a: 83 c4 14 add $0x14,%esp
2d: 59 pop %ecx
2e: 5d pop %ebp
2f: 8d 61 fc lea -0x4(%ecx),%esp
32: c3 ret
整个过程,实际是将高级语言翻译到0101的二进制,最后将 按exe格式,写入文件头,二进制数据,文件尾。
其中高级语言到汇编的过程是研究的重点,因为汇编到二进制相对固定。
汇编到二进制,是根据当前的cpu指令翻译成01代码。比如
83 c4 14 add $0x14,%esp
高级语言到汇编又会经过,词法分析,语法分析,优化三个结构。
printf("Hello World! via %x/n", printf);
经过词法分析后的结果:
printf "Hello World! via %x/n"printf
词法分析的实际用途是将意义相近的词,替换为一个确定的词,以便后面对其计算。
0.5,0.50,0.500通过词法分析后,都会变为0.5
语法分析,是对源码逻辑的一个确定。
0.5 + 0.3 ,对于+我们可以让它对应ADD,也可以对应SUB,那么它的最后结果就可能是0.8,或者0.2.
优化是对逻辑的一个简化处理