写在前面
前置阅读
ARM汇编[0]hello world
ARM汇编[1]格式化字符串打印
在能够编写简单的汇编程序之后,我的疑问就是:“这完全是自己瞎摸索的啊,,,这样写合适吗?别人是怎么做的呢?”
那我们不妨来反编译看一下GNU的汇编器是把C源代码怎样翻译成汇编的,看一下规范的做法,并学习一下我们写汇编程序时没有注意的地方
C源代码
这是一个随便写的链表程序,能编译正常运行
#include<stdio.h>
#include<stdlib.h>
typedef struct node {
int val;
struct node* next;
struct node* before;
} Node;
typedef struct list {
Node* head;
Node* end;
int length;
} List;
Node* create_node(int value)
{
Node* node = malloc(sizeof(Node));
node->val = value;
node->next = NULL;
return node;
}
List* create_list()
{
List* list = malloc(sizeof(List));
list->head = create_node(0);
list->end = create_node(0);
list->head->next = list->end;
list->end->before = list->head;
list->length = 0;
return list;
}
void append_node(List* list, int value)
{
Node* n = create_node(value);
Node* p = list->end->before;
p->next = n;
n->next = list->end;
list->end->before = n;
list->length++;
}
void print_list(List* list)
{
int i;
Node* s = list->head;
Node* p = s->next;
for (i = 0; i < list->length; i++) {
printf("%d ", p->val);
p = p->next;
}
printf("\n");
}
int main()
{
List* list = create_list();
append_node(list, 1);
append_node(list, 2);
append_node(list, 3);
print_list(list);
return 2726;
}
调用链如图所示:(CLion选中函数之后按ctrl+alt+H显示Hierarchy)
等下从main函数开始分析,完整的反汇编结果包括了c运行时库等几个目标文件,这里我们暂且不看(全部的反汇编结果在最后一章 294行
反汇编代码分析
反汇编指令
objdump -d list > main.s
main函数
函数的调用、传参、保存和恢复调用栈
00000000000009c0 <main>:
9c0: a9be7bfd stp x29, x30, [sp, #-32]!
// x29、x30寄存器存入栈
9c4: 910003fd mov x29, sp
// 将x29寄存器的值作为当前的栈指针
9c8: 97ffffa1 bl 84c <create_list>
// 调用函数create_list,无参数
9cc: f9000fe0 str x0, [sp, #24]
// 返回值(链表指针)保存到栈里
9d0: 52800021 mov w1, #0x1 // #1
// 用1作为第2个参数(
9d4: f9400fe0 ldr x0, [sp, #24]
// 从栈中把保存的链表指针取出,并且作为第1个参数
9d8: 97ffffbb bl 8c4 <append_node>
// 调用append_node函数(参数1是链表指针,参数2是节点的值
9dc: 52800041 mov w1, #0x2 // #2
9e0: f9400fe0 ldr x0, [sp, #24]
9e4: 97ffffb8 bl 8c4 <append_node>
9e8: 52800061 mov w1, #0x3 // #3
9ec: f9400fe0 ldr x0, [sp, #24]
9f0: 97ffffb5 bl 8c4 <append_node>
// 上面几行同理, 是创建节点2和3
9f4: f9400fe0 ldr x0, [sp, #24]
// 从栈中取出链表指针作为参数1
9f8: 97ffffd1 bl 93c <print_list>
// 调用print_list函数
9fc: 528154c0 mov w0, #0xaa6 // #2726
// 以2726作为返回值(把return 2726改成其他值这里就会变更
a00: a8c27bfd ldp x29, x30, [sp], #32
a04: d65f03c0 ret
// 上面两行是恢复调用栈的标准操作
create_node函数
结构体数据的操作、指针的使用
0000000000000814 <create_node>:
814: a9bd7bfd stp x29, x30, [sp, #-48]!
818: 910003fd mov x29, sp
81c: b9001fe0 str w0, [sp, #28]
820: d2800300 mov x0, #0x18 // #24
// 这个要被mov到x0的0x18是malloc函数的参数1,关于0x18是怎么来的:
// 1个int + 2个指针, 4字节的int发生了内存对齐,具体见下图:地址820处指令的解释
824: 97ffff9b bl 690 <malloc@plt>
// 调用c库函数malloc
828: f90017e0 str x0, [sp, #40]
// x0这时是malloc函数返回的指针,也就是Node指针
82c: f94017e0 ldr x0, [sp, #40]
// 存进栈
830: b9401fe1 ldr w1, [sp, #28]
// 取出保存的参数1 (节点的val
834: b9000001 str w1, [x0]
// [x0]表示以x0的值作为地址, 即把val存到x0这个地址处, 也就是malloc返回的指针指向的地方
// c源代码是node->val = value; 可以看到汇编层面直接是按offset处理的, 并不会管什么变量名, Node结构体的第一个参数就是val, 所以这里偏移是0
838: f94017e0 ldr x0, [sp, #40]
83c: f900041f str xzr, [x0, #8]
// xzr是ARM里的全零寄存器, 始终返回0, 所以这里就是给malloc返回的指针指向的地方+8字节的偏移处赋值0
// 对应源码node->next = NULL;
840: f94017e0 ldr x0, [sp, #40]
// 此函数的返回值是Node*, 所以要把那个指针作为返回值再返回回去
844: a8c37bfd ldp x29, x30, [sp], #48
848: d65f03c0 ret
图:地址820处指令的解释
全部的反汇编结果
list: file format elf64-littleaarch64
Disassembly of section .init:
0000000000000630 <_init>:
630: d503201f nop
634: a9bf7bfd stp x29, x30, [sp, #-16]!
638: 910003fd mov x29, sp
63c: 9400003e bl 734 <call_weak_fn>
640: a8c17bfd ldp x29, x30, [sp], #16
644: d65f03c0 ret
Disassembly of section .plt:
0000000000000650 <.plt>:
650: a9bf7bf0 stp x16, x30, [sp, #-16]!
654: 90000090 adrp x16, 10000 <__FRAME_END__+0xf450>
658: f947ca11 ldr x17, [x16, #3984]
65c: 913e4210 add x16, x16, #0xf90
660: d61f0220 br x17
664: d503201f nop
668: d503201f nop
66c: d503201f nop
0000000000000670 <__libc_start_main@plt>:
670: 90000090 adrp x16, 10000 <__FRAME_END__+0xf450>
674: f947ce11 ldr x17, [x16, #3992]
678: 913e6210 add x16, x16, #0xf98
67c: d61f0220 br x17
0000000000000680 <__cxa_finalize@plt>:
680: 90000090 adrp x16, 10000 <__FRAME_END__+0xf450>
684: f947d211 ldr x17, [x16, #4000]
688: 913e8210 add x16, x16, #0xfa0
68c: d61f0220 br x17
0000000000000690 <malloc@plt>:
690: 90000090 adrp x16, 10000 <__FRAME_END__+0xf450>
694: f947d611 ldr x17, [x16, #4008]
698: 913ea210 add x16, x16, #0xfa8
69c: d61f0220 br x17
00000000000006a0 <__gmon_start__@plt>:
6a0: 90000090 adrp x16, 10000 <__FRAME_END__+0xf450>
6a4: f947da11 ldr x17, [x16, #4016]
6a8: 913ec210 add x16, x16, #0xfb0
6ac: d61f0220 br x17
00000000000006b0 <abort@plt>:
6b0: 90000090 adrp x16, 10000 <__FRAME_END__+0xf450>
6b4: f947de11 ldr x17, [x16, #4024]
6b8: 913ee210 add x16, x16, #0xfb8
6bc: d61f0220 br x17
00000000000006c0 <printf@plt>:
6c0: 90000090 adrp x16, 10000 <__FRAME_END__+0xf450>
6c4: f947e211 ldr x17, [x16, #4032]
6c8: 913f0210 add x16, x16, #0xfc0
6cc: d61f0220 br x17
00000000000006d0 <putchar@plt>:
6d0: 90000090 adrp x16, 10000 <__FRAME_END__+0xf450>
6d4: f947e611 ldr x17, [x16, #4040]
6d8: 913f2210 add x16, x16, #0xfc8
6dc: d61f0220 br x17
Disassembly of section .text:
0000000000000700 <_start>:
700: d503201f nop
704: d280001d mov x29, #0x0 // #0
708: d280001e mov x30, #0x0 // #0
70c: aa0003e5 mov x5, x0
710: f94003e1 ldr x1, [sp]
714: 910023e2 add x2, sp, #0x8
718: 910003e6 mov x6, sp
71c: 90000080 adrp x0, 10000 <__FRAME_END__+0xf450>
720: f947f800 ldr x0, [x0, #4080]
724: d2800003 mov x3, #0x0 // #0
728: d2800004 mov x4, #0x0 // #0
72c: 97ffffd1 bl 670 <__libc_start_main@plt>
730: 97ffffe0 bl 6b0 <abort@plt>
0000000000000734 <call_weak_fn>:
734: 90000080 adrp x0, 10000 <__FRAME_END__+0xf450>
738: f947f400 ldr x0, [x0, #4072]
73c: b4000040 cbz x0, 744 <call_weak_fn+0x10>
740: 17ffffd8 b 6a0 <__gmon_start__@plt>
744: d65f03c0 ret
748: d503201f nop
74c: d503201f nop
0000000000000750 <deregister_tm_clones>:
750: b0000080 adrp x0, 11000 <__data_start>
754: 91004000 add x0, x0, #0x10
758: b0000081 adrp x1, 11000 <__data_start>
75c: 91004021 add x1, x1, #0x10
760: eb00003f cmp x1, x0
764: 540000c0 b.eq 77c <deregister_tm_clones+0x2c> // b.none
768: 90000081 adrp x1, 10000 <__FRAME_END__+0xf450>
76c: f947ec21 ldr x1, [x1, #4056]
770: b4000061 cbz x1, 77c <deregister_tm_clones+0x2c>
774: aa0103f0 mov x16, x1
778: d61f0200 br x16
77c: d65f03c0 ret
0000000000000780 <register_tm_clones>:
780: b0000080 adrp x0, 11000 <__data_start>
784: 91004000 add x0, x0, #0x10
788: b0000081 adrp x1, 11000 <__data_start>
78c: 91004021 add x1, x1, #0x10
790: cb000021 sub x1, x1, x0
794: d37ffc22 lsr x2, x1, #63
798: 8b810c41 add x1, x2, x1, asr #3
79c: 9341fc21 asr x1, x1, #1
7a0: b40000c1 cbz x1, 7b8 <register_tm_clones+0x38>
7a4: 90000082 adrp x2, 10000 <__FRAME_END__+0xf450>
7a8: f947fc42 ldr x2, [x2, #4088]
7ac: b4000062 cbz x2, 7b8 <register_tm_clones+0x38>
7b0: aa0203f0 mov x16, x2
7b4: d61f0200 br x16
7b8: d65f03c0 ret
7bc: d503201f nop
00000000000007c0 <__do_global_dtors_aux>:
7c0: a9be7bfd stp x29, x30, [sp, #-32]!
7c4: 910003fd mov x29, sp
7c8: f9000bf3 str x19, [sp, #16]
7cc: b0000093 adrp x19, 11000 <__data_start>
7d0: 39404260 ldrb w0, [x19, #16]
7d4: 35000140 cbnz w0, 7fc <__do_global_dtors_aux+0x3c>
7d8: 90000080 adrp x0, 10000 <__FRAME_END__+0xf450>
7dc: f947f000 ldr x0, [x0, #4064]
7e0: b4000080 cbz x0, 7f0 <__do_global_dtors_aux+0x30>
7e4: b0000080 adrp x0, 11000 <__data_start>
7e8: f9400400 ldr x0, [x0, #8]
7ec: 97ffffa5 bl 680 <__cxa_finalize@plt>
7f0: 97ffffd8 bl 750 <deregister_tm_clones>
7f4: 52800020 mov w0, #0x1 // #1
7f8: 39004260 strb w0, [x19, #16]
7fc: f9400bf3 ldr x19, [sp, #16]
800: a8c27bfd ldp x29, x30, [sp], #32
804: d65f03c0 ret
808: d503201f nop
80c: d503201f nop
0000000000000810 <frame_dummy>:
810: 17ffffdc b 780 <register_tm_clones>
0000000000000814 <create_node>:
814: a9bd7bfd stp x29, x30, [sp, #-48]!
818: 910003fd mov x29, sp
81c: b9001fe0 str w0, [sp, #28]
820: d2800300 mov x0, #0x18 // #24
824: 97ffff9b bl 690 <malloc@plt>
828: f90017e0 str x0, [sp, #40]
82c: f94017e0 ldr x0, [sp, #40]
830: b9401fe1 ldr w1, [sp, #28]
834: b9000001 str w1, [x0]
838: f94017e0 ldr x0, [sp, #40]
83c: f900041f str xzr, [x0, #8]
840: f94017e0 ldr x0, [sp, #40]
844: a8c37bfd ldp x29, x30, [sp], #48
848: d65f03c0 ret
000000000000084c <create_list>:
84c: a9be7bfd stp x29, x30, [sp, #-32]!
850: 910003fd mov x29, sp
854: d2800300 mov x0, #0x18 // #24
858: 97ffff8e bl 690 <malloc@plt>
85c: f9000fe0 str x0, [sp, #24]
860: 52800000 mov w0, #0x0 // #0
864: 97ffffec bl 814 <create_node>
868: aa0003e1 mov x1, x0
86c: f9400fe0 ldr x0, [sp, #24]
870: f9000001 str x1, [x0]
874: 52800000 mov w0, #0x0 // #0
878: 97ffffe7 bl 814 <create_node>
87c: aa0003e1 mov x1, x0
880: f9400fe0 ldr x0, [sp, #24]
884: f9000401 str x1, [x0, #8]
888: f9400fe0 ldr x0, [sp, #24]
88c: f9400000 ldr x0, [x0]
890: f9400fe1 ldr x1, [sp, #24]
894: f9400421 ldr x1, [x1, #8]
898: f9000401 str x1, [x0, #8]
89c: f9400fe0 ldr x0, [sp, #24]
8a0: f9400400 ldr x0, [x0, #8]
8a4: f9400fe1 ldr x1, [sp, #24]
8a8: f9400021 ldr x1, [x1]
8ac: f9000801 str x1, [x0, #16]
8b0: f9400fe0 ldr x0, [sp, #24]
8b4: b900101f str wzr, [x0, #16]
8b8: f9400fe0 ldr x0, [sp, #24]
8bc: a8c27bfd ldp x29, x30, [sp], #32
8c0: d65f03c0 ret
00000000000008c4 <append_node>:
8c4: a9bd7bfd stp x29, x30, [sp, #-48]!
8c8: 910003fd mov x29, sp
8cc: f9000fe0 str x0, [sp, #24]
8d0: b90017e1 str w1, [sp, #20]
8d4: b94017e0 ldr w0, [sp, #20]
8d8: 97ffffcf bl 814 <create_node>
8dc: f90013e0 str x0, [sp, #32]
8e0: f9400fe0 ldr x0, [sp, #24]
8e4: f9400400 ldr x0, [x0, #8]
8e8: f9400800 ldr x0, [x0, #16]
8ec: f90017e0 str x0, [sp, #40]
8f0: f94017e0 ldr x0, [sp, #40]
8f4: f94013e1 ldr x1, [sp, #32]
8f8: f9000401 str x1, [x0, #8]
8fc: f9400fe0 ldr x0, [sp, #24]
900: f9400401 ldr x1, [x0, #8]
904: f94013e0 ldr x0, [sp, #32]
908: f9000401 str x1, [x0, #8]
90c: f9400fe0 ldr x0, [sp, #24]
910: f9400400 ldr x0, [x0, #8]
914: f94013e1 ldr x1, [sp, #32]
918: f9000801 str x1, [x0, #16]
91c: f9400fe0 ldr x0, [sp, #24]
920: b9401000 ldr w0, [x0, #16]
924: 11000401 add w1, w0, #0x1
928: f9400fe0 ldr x0, [sp, #24]
92c: b9001001 str w1, [x0, #16]
930: d503201f nop
934: a8c37bfd ldp x29, x30, [sp], #48
938: d65f03c0 ret
000000000000093c <print_list>:
93c: a9bc7bfd stp x29, x30, [sp, #-64]!
940: 910003fd mov x29, sp
944: f9000fe0 str x0, [sp, #24]
948: f9400fe0 ldr x0, [sp, #24]
94c: f9400000 ldr x0, [x0]
950: f9001fe0 str x0, [sp, #56]
954: f9401fe0 ldr x0, [sp, #56]
958: f9400400 ldr x0, [x0, #8]
95c: f9001be0 str x0, [sp, #48]
960: b9002fff str wzr, [sp, #44]
964: 1400000d b 998 <print_list+0x5c>
968: f9401be0 ldr x0, [sp, #48]
96c: b9400000 ldr w0, [x0]
970: 2a0003e1 mov w1, w0
974: 90000000 adrp x0, 0 <__abi_tag-0x278>
978: 9128a000 add x0, x0, #0xa28
97c: 97ffff51 bl 6c0 <printf@plt>
980: f9401be0 ldr x0, [sp, #48]
984: f9400400 ldr x0, [x0, #8]
988: f9001be0 str x0, [sp, #48]
98c: b9402fe0 ldr w0, [sp, #44]
990: 11000400 add w0, w0, #0x1
994: b9002fe0 str w0, [sp, #44]
998: f9400fe0 ldr x0, [sp, #24]
99c: b9401000 ldr w0, [x0, #16]
9a0: b9402fe1 ldr w1, [sp, #44]
9a4: 6b00003f cmp w1, w0
9a8: 54fffe0b b.lt 968 <print_list+0x2c> // b.tstop
9ac: 52800140 mov w0, #0xa // #10
9b0: 97ffff48 bl 6d0 <putchar@plt>
9b4: d503201f nop
9b8: a8c47bfd ldp x29, x30, [sp], #64
9bc: d65f03c0 ret
00000000000009c0 <main>:
9c0: a9be7bfd stp x29, x30, [sp, #-32]!
9c4: 910003fd mov x29, sp
9c8: 97ffffa1 bl 84c <create_list>
9cc: f9000fe0 str x0, [sp, #24]
9d0: 52800021 mov w1, #0x1 // #1
9d4: f9400fe0 ldr x0, [sp, #24]
9d8: 97ffffbb bl 8c4 <append_node>
9dc: 52800041 mov w1, #0x2 // #2
9e0: f9400fe0 ldr x0, [sp, #24]
9e4: 97ffffb8 bl 8c4 <append_node>
9e8: 52800061 mov w1, #0x3 // #3
9ec: f9400fe0 ldr x0, [sp, #24]
9f0: 97ffffb5 bl 8c4 <append_node>
9f4: f9400fe0 ldr x0, [sp, #24]
9f8: 97ffffd1 bl 93c <print_list>
9fc: 52800000 mov w0, #0xaa6 // #2726
a00: a8c27bfd ldp x29, x30, [sp], #32
a04: d65f03c0 ret
Disassembly of section .fini:
0000000000000a08 <_fini>:
a08: d503201f nop
a0c: a9bf7bfd stp x29, x30, [sp, #-16]!
a10: 910003fd mov x29, sp
a14: a8c17bfd ldp x29, x30, [sp], #16
a18: d65f03c0 ret