编译基础-从hello.c到hello可执行文件的过程


编译基础 从hello.c到hello可执行文件的过程

编译的步骤

可以分为 预处理->编译->汇编->连接阶段

预处理:加入头文件,替换宏。
编译:包含预处理,将 C 程序转换成汇编程序。
汇编:包含预处理和编译,将汇编程序转换成可链接的二进制程序。
链接:包含以上所有操作,将可链接的二进制程序和其它别的库链接在一起,形成可执行的程序文件。

一步一步编译

预处理-源文件生成预处理文件: 							       gcc -E HelloWorld.c -o HelloWorld.i
编译器编译-预处理文件生成汇编代码文件: 					    gcc -S HelloWorld.i -o HelloWorld.s
汇编器编译-汇编代码文件生成不可执行二进制文件: 		    gcc -c HelloWorld.s -o HelloWorld.o
链接-不可执行二进制文件生成可执行二进制文件:          gcc HelloWorld.o -o HelloWorld

说明:不可执行二进制文件为什么不可以执行?因为还没有通过链接器链接

指定编译到某个阶段

编译生成-->预处理文件:    		gcc -E HelloWorld.c -o HelloWorld.i
编译到-->汇编代码文件:					gcc -S HelloWorld.c -o HelloWorld.s
编译到-->不可执行文件	         gcc -c HelloWorld.c -o HelloWorld.o
编译到-->可执行文件				    gcc HelloWorld.o -o HelloWorld    生成可执行二进制文件

以下是编译的图:
在这里插入图片描述

gcc -E -S -c

-E                      Only run the preprocessor
-S                      Only run preprocess and compilation steps
-c                      Only run preprocess, compile, and assemble steps

HelloWorld.i HelloWorld.s HelloWorld.o HelloWorld 每个文件中内容是什么?

接下来用下面这段程序HelloWorld.c 做为源文件

#include "stdio.h"
int main(int argc, char const *argv[])
{
    int a=1;
    int b=2;
    int c=3;
  printf("Hello World!\n");
  return 0;
}

HelloWorld.i 预处理文件

# 1 "HelloWorld.c"
# 1 "<built-in>" 1
# 1 "<built-in>" 3
# 361 "<built-in>" 3
# 1 "<command line>" 1
# 1 "<built-in>" 2
# 1 "HelloWorld.c" 2

# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/stdio.h" 1 3 4
# 64 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/stdio.h" 3 4
# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/_stdio.h" 1 3 4
# 68 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/_stdio.h" 3 4
# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/sys/cdefs.h" 1 3 4
# 608 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/sys/cdefs.h" 3 4
# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/sys/_symbol_aliasing.h" 1 3 4
# 609 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/sys/cdefs.h" 2 3 4
# 674 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/sys/cdefs.h" 3 4
# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/sys/_posix_availability.h" 1 3 4
# 675 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/sys/cdefs.h" 2 3 4
# 69 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/_stdio.h" 2 3 4
# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/Availability.h" 1 3 4
# 242 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/Availability.h" 3 4
# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/AvailabilityInternal.h" 1 3 4
# 243 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/Availability.h" 2 3 4
# 70 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/_stdio.h" 2 3 4

# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/_types.h" 1 3 4
# 27 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/_types.h" 3 4
# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/sys/_types.h" 1 3 4
# 33 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/sys/_types.h" 3 4
# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/machine/_types.h" 1 3 4
# 32 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/machine/_types.h" 3 4
# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/i386/_types.h" 1 3 4
# 37 "/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/include/i386/_types.h" 3 4
... 省略了很多信息

__attribute__((__availability__(swift, unavailable, message="Use mkstemp(3) instead.")))

__attribute__((deprecated("This function is provided for compatibility reasons only.  Due to security concerns inherent in the design of tempnam(3), it is highly recommended that you use mkstemp(3) instead.")))

char *tempnam(const char *__dir, const char *__prefix) __asm("_" "tempnam" );

int main(int argc, char const *argv[])
{
    int a=1;
    int b=2;
    int c=3;
  printf("Hello World!\n");
  return 0;
}

HelloWorld.s文件中根据观察是加入了头文件.h信息

HelloWorld.s 汇编代码文件

	.section	__TEXT,__text,regular,pure_instructions
	.build_version macos, 10, 14	sdk_version 10, 14
	.globl	_main                   ## -- Begin function main
	.p2align	4, 0x90
_main:                                  ## @main
	.cfi_startproc
## %bb.0:
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register %rbp
	subq	$32, %rsp
	movl	$0, -4(%rbp)
	movl	%edi, -8(%rbp)
	movq	%rsi, -16(%rbp)
	movl	$1, -20(%rbp)
	movl	$2, -24(%rbp)
	movl	$3, -28(%rbp)
	leaq	L_.str(%rip), %rdi
	movb	$0, %al
	callq	_printf
	xorl	%ecx, %ecx
	movl	%eax, -32(%rbp)         ## 4-byte Spill
	movl	%ecx, %eax
	addq	$32, %rsp
	popq	%rbp
	retq
	.cfi_endproc
                                        ## -- End function
	.section	__TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
	.asciz	"Hello World!\n"


.subsections_via_symbols

这个为ATT格式汇编代码

HelloWorld.o 不可执行二进制文件

  Offset: 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 	
00000000: CF FA ED FE 07 00 00 01 03 00 00 00 01 00 00 00    Ozm~............
00000010: 04 00 00 00 08 02 00 00 00 20 00 00 00 00 00 00    ................
00000020: 19 00 00 00 88 01 00 00 00 00 00 00 00 00 00 00    ................

...

00000300: 00 00 00 00 00 00 00 00 07 00 00 00 01 00 00 00    ................
00000310: 00 00 00 00 00 00 00 00 00 5F 6D 61 69 6E 00 5F    ........._main._
00000320: 70 72 69 6E 74 66 00 00                            printf..

这个也就是机器指令,CPU就是读这个执行指令的

HelloWorld 可执行二进制文件

  Offset: 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 	
00000000: CF FA ED FE 07 00 00 01 03 00 00 80 02 00 00 00    Ozm~............
00000010: 0F 00 00 00 C0 04 00 00 85 00 20 00 00 00 00 00    ....@...........
00000020: 19 00 00 00 48 00 00 00 5F 5F 50 41 47 45 5A 45    ....H...__PAGEZE
00000030: 52 4F 00 00 00 00 00 00 00 00 00 00 00 00 00 00    RO..............
....
00001fd0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00    ................
00001fe0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00    ................
00001ff0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00    ................
00002000: 11 22 10 51 00 00 00 00 11 40 64 79 6C 64 5F 73    .".Q.....@dyld_s
00002010: 74 75 62 5F 62 69 6E 64 65 72 00 51 72 00 90 00    tub_binder.Qr...
00002020: 72 10 11 40 5F 70 72 69 6E 74 66 00 90 00 00 00    r..@_printf.....
00002030: 00 01 5F 00 05 00 02 5F 6D 68 5F 65 78 65 63 75    .._...._mh_execu
00002040: 74 65 5F 68 65 61 64 65 72 00 21 6D 61 69 6E 00    te_header.!main.
00002050: 25 02 00 00 00 03 00 C0 1E 00 00 00 00 00 00 00    %......@........
00002060: C0 1E 00 00 00 00 00 00 02 00 00 00 0F 01 10 00    @...............
00002070: 00 00 00 00 01 00 00 00 16 00 00 00 0F 01 00 00    ................
00002080: 40 0F 00 00 01 00 00 00 1C 00 00 00 01 00 00 01    @...............
00002090: 00 00 00 00 00 00 00 00 24 00 00 00 01 00 00 01    ........$.......
000020a0: 00 00 00 00 00 00 00 00 02 00 00 00 03 00 00 00    ................
000020b0: 00 00 00 40 02 00 00 00 20 00 5F 5F 6D 68 5F 65    ...@......__mh_e
000020c0: 78 65 63 75 74 65 5F 68 65 61 64 65 72 00 5F 6D    xecute_header._m
000020d0: 61 69 6E 00 5F 70 72 69 6E 74 66 00 64 79 6C 64    ain._printf.dyld
000020e0: 5F 73 74 75 62 5F 62 69 6E 64 65 72 00 00 00 00    _stub_binder....

上一个HelloWorld.o的不可执行文件的最后一个地址为00000320 ,而HelloWorld的可执行文件的地址为000020e0

显然可执行文件是比HelloWorld.o大的,所以HelloWorld的可执行文件链接了很多库文件信息,所以大的多

好的,到此整个从HelloWorld.c到HelloWorld可执行文件的过程分析完了,其实还是挺有趣,感觉很充实

接下来我们玩一玩反汇编,

可能会用到的gcc 指令 -g,-masm

这两个-g,-masm是无意间发现的

gcc -masm 指定汇编风格

$ gcc -S -masm=intel HelloWorld.c -o HelloWorld.s

	.section	__TEXT,__text,regular,pure_instructions
	.build_version macos, 10, 14	sdk_version 10, 14
	.intel_syntax noprefix
	.globl	_main                   ## -- Begin function main
	.p2align	4, 0x90
_main:                                  ## @main
	.cfi_startproc
## %bb.0:
	push	rbp
	.cfi_def_cfa_offset 16
	.cfi_offset rbp, -16
	mov	rbp, rsp
	.cfi_def_cfa_register rbp
	sub	rsp, 32
	mov	dword ptr [rbp - 4], 0
	mov	dword ptr [rbp - 8], edi
	mov	qword ptr [rbp - 16], rsi
	mov	dword ptr [rbp - 20], 1
	mov	dword ptr [rbp - 24], 2
	mov	dword ptr [rbp - 28], 3
	lea	rdi, [rip + L_.str]
	mov	al, 0
	call	_printf
	xor	ecx, ecx
	mov	dword ptr [rbp - 32], eax ## 4-byte Spill
	mov	eax, ecx
	add	rsp, 32
	pop	rbp
	ret
	.cfi_endproc
                                        ## -- End function
	.section	__TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
	.asciz	"Hello World!\n"


.subsections_via_symbols

gcc -g 在可执行文件中加入调试信息
softwaredeMacBook-Pro:gcc software$ gcc -c -g HelloWorld.c -o HelloWorld.o

反汇编工具 objdump

在MacOS 下objdump很不友好,浪费了我两个小时时间在这个上,最后把辛酸路程总结在下文,供大家参考

MacOS下的objdump是LLVM平台的,其他windows,Linux的objdump是GUN的

LLVM 平台的objdump文档地址:https://llvm.org/docs/CommandGuide/llvm-objdump.html

GUN平台的objdump文档地址:https://sourceware.org/binutils/docs/binutils/objdump.html

首先看一下MacOS下的objdump --version

softwaredeMacBook-Pro:~ software$ objdump --version
Apple LLVM version 10.0.1 (clang-1001.0.46.4)
  Optimized build.
  Default target: x86_64-apple-darwin18.7.0
  Host CPU: skylake

  Registered Targets:
    aarch64    - AArch64 (little endian)
    aarch64_be - AArch64 (big endian)
    arm        - ARM
    arm64      - ARM64 (little endian)
    armeb      - ARM (big endian)
    thumb      - Thumb
    thumbeb    - Thumb (big endian)
    x86        - 32-bit X86: Pentium-Pro and above
    x86-64     - 64-bit X86: EM64T and AMD64

目前我们可以用objdump把二进制文件HelloWorld.o(或HelloWorld)反汇编至汇编代码

softwaredeMacBook-Pro:gcc software$ objdump -d HelloWorld.o

HelloWorld.o:   file format Mach-O 64-bit x86-64

Disassembly of section __TEXT,__text:
_main:
       0:       55      pushq   %rbp
       1:       48 89 e5        movq    %rsp, %rbp
       4:       48 83 ec 20     subq    $32, %rsp
       8:       c7 45 fc 00 00 00 00    movl    $0, -4(%rbp)
       f:       89 7d f8        movl    %edi, -8(%rbp)
      12:       48 89 75 f0     movq    %rsi, -16(%rbp)
      16:       c7 45 ec 01 00 00 00    movl    $1, -20(%rbp)
      1d:       c7 45 e8 02 00 00 00    movl    $2, -24(%rbp)
      24:       c7 45 e4 03 00 00 00    movl    $3, -28(%rbp)
      2b:       48 8d 3d 14 00 00 00    leaq    20(%rip), %rdi
      32:       b0 00   movb    $0, %al
      34:       e8 00 00 00 00  callq   0 <_main+0x39>
      39:       31 c9   xorl    %ecx, %ecx
      3b:       89 45 e0        movl    %eax, -32(%rbp)
      3e:       89 c8   movl    %ecx, %eax
      40:       48 83 c4 20     addq    $32, %rsp
      44:       5d      popq    %rbp
      45:       c3      retq

看起是不是很辣眼睛?是的,这就是LLVM.objdump,好的坑已踩好,这时我们就想办法跳出来

先解释一下,从左到右:
_main:标号
0,1,4,8:汇编地址
55:机器代码
pushq: 汇编代码

然后我决定对这个屎一样输出进行优化:首先要解决1.汇编风格为Intel,然后解决,2.输出内容未对齐的文件

MacOS 对objdump的输出进行优化

执行以下指令:

objdump -d --no-show-raw-insn -S  -x86-asm-syntax=intel  hello.o 

输出:


hello.o:        file format Mach-O 64-bit x86-64

Disassembly of section __TEXT,__text:
_main:
; {
       0:       push    rbp
       1:       mov     rbp, rsp
       4:       sub     rsp, 32
       8:       mov     dword ptr [rbp - 4], 0
       f:       mov     dword ptr [rbp - 8], edi
      12:       mov     qword ptr [rbp - 16], rsi
; int a=1;
      16:       mov     dword ptr [rbp - 20], 1
; int b=2;
      1d:       mov     dword ptr [rbp - 24], 2
; int c=3;
      24:       mov     dword ptr [rbp - 28], 3
; printf("Hello World!\n");
      2b:       lea     rdi, [rip + 20]
      32:       mov     al, 0
      34:       call    0 <_main+0x39>
      39:       xor     ecx, ecx
; return 0;
      3b:       mov     dword ptr [rbp - 32], eax
      3e:       mov     eax, ecx
      40:       add     rsp, 32
      44:       pop     rbp
      45:       ret

这样是不是就清爽了很多,哈哈

hello.o: file format Mach-O 64-bit x86-64 ,二进制文件为 mac h-O 64-bit格式的

注意:

gcc hello.c -g -c -o hello.o

要加上-g 把调试信息放到 hello.o中,这样objdump才有效

总结完毕,感觉思路更清晰了,离自己写出操作系统又近了一步:

好的,我的分享到此结束,如果大家对自己动手写操作系统有兴趣,可以访问下面贴的专栏,我们大家一起学习进步:

在这里插入图片描述

  • 6
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 5
    评论
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值