Before main() 分析

41 篇文章 0 订阅
导读:
  本文分析了在main()之前的ELF程序流程,试图让您更清楚的把握程序的流程的脉络走向。
  从而更深入的了解ELF。不正确之处,还请斧正。
  ★ 综述
  ELF的可执行文件与共享库在结构上非常类似,它们具有一张程序段表,用来描述这些段如何映射到进程空间.
  对于可执行文件来说,段的加载位置是固定的,程序段表中如实反映了段的加载地址.对于共享库来说,段的加
  载位置是浮动的,位置无关的,程序段表反映的是以0作为基准地址的相对加载地址.尽管共享库的连接是不
  充分的,为了便于测试动态链接器,Linux允许直接加载共享库运行.如果应用程序具有动态链接器的描述段,
  内核在完成程序段加载后,紧接着加载动态链接器,并且启动动态链接器的入口.如果没有动态链接器的描述段,
  就直接交给用户程序入口。
  上述这部分请参考:linuxforum论坛上opera写的《分析ELF的加载过程》
  在控制权交给动态链接器的入口后,首先调用_dl_start函数获得真实的程序入口(注:该入口地址
  不是main的地址,也就是说一般程序的入口不是main),然后循环调用每个共享object的初始化函数,
  接着跳转到真实的程序入口,一般为_start(程序中的_start)的一个例程,该例程压入一些参数到堆栈,
  就直接调用__libc_start_main函数。在__libc_start_main函数中替动态连接器和自己程序安排
  destructor,并运行程序的初始化函数。然后才把控制权交给main()函数。
  ★ main()之前流程
  下面就是动态链接器的入口。
  /* Initial entry point code for the dynamic linker.
  The C function `_dl_start' is the real entry point;
  its return value is the user program's entry point. */
  #define RTLD_START asm ("/
  .text/n/
  .globl _start/n/
  .globl _dl_start_user/n/
  _start:/n/
  pushl %esp/n/
  call _dl_start/n//*该函数返回时候,%eax中存放着user entry point address*/
  popl %ebx/n//*%ebx放着是esp的内容*/
  _dl_start_user:/n/
  # Save the user entry point address in %edi./n/
  movl %eax, %edi/n//*入口地址放在%edi*/
  # Point %ebx at the GOT.
  call 0f/n/
  0: popl %ebx/n/
  addl $_GLOBAL_OFFSET_TABLE_+[.-0b], %ebx/n/
  # Store the highest stack address/n/
  movl __libc_stack_end@GOT(%ebx), %eax/n/
  movl %esp, (%eax)/n//*把栈顶%esp放到GOT的__libc_stack_end中*/
  # See if we were run as a command with the executable file/n/
  # name as an extra leading argument./n/
  movl _dl_skip_args@GOT(%ebx), %eax/n/
  movl (%eax), %eax/n/
  # Pop the original argument count./n/
  popl %ecx/n/
  # Subtract _dl_skip_args from it./n/
  subl %eax, %ecx/n/
  
  # Adjust the stack pointer to skip _dl_skip_args words./n/
  leal (%esp,%eax,4), %esp/n/
  
  # Push back the modified argument count./n/
  pushl %ecx/n/
  
  # Push the searchlist of the main object as argument in/n/
  # _dl_init_next call below./n/
  movl _dl_main_searchlist@GOT(%ebx), %eax/n/
  movl (%eax), %esi/n/
  0: movl %esi,%eax/n/
  
  # Call _dl_init_next to return the address of an initializer/n/
  # function to run./n/
  call _dl_init_next@PLT/n//*该函数返回初始化函数的地址,返回地址放在%eax中*/
  
  # Check for zero return, when out of initializers./n/
  testl %eax, %eax/n/
  jz 1f/n/
  
  # Call the shared object initializer function./n/
  # NOTE: We depend only on the registers (%ebx, %esi and %edi)/n/
  # and the return address pushed by this call;/n/
  # the initializer is called with the stack just/n/
  # as it appears on entry, and it is free to move/n/
  # the stack around, as long as it winds up jumping to/n/
  # the return address on the top of the stack./n/
  call *%eax/n//*调用共享object初始化函数*/
  
  # Loop to call _dl_init_next for the next initializer./n/
  jmp 0b/n/
  1: # Clear the startup flag./n/
  movl _dl_starting_up@GOT(%ebx), %eax/n/
  movl $0, (%eax)/n/
  
  # Pass our finalizer function to the user in %edx, as per ELF ABI./n/
  movl _dl_fini@GOT(%ebx), %edx/n/
  
  # Jump to the user's entry point./n/
  jmp *%edi/n/
  .previous/n/
  ");
  sysdeps/i386/start.s中
  user's entry也就是下面的_start例程
  /* This is the canonical entry point, usually the first thing in the text
  segment. The SVR4/i386 ABI (pages 3-31, 3-32) says that when the entry
  point runs, most registers' values are unspecified, except for:
  %edx Contains a function pointer to be registered with `atexit'.
  This is how the dynamic linker arranges to have DT_FINI
  functions called for shared libraries that have been loaded
  before this code runs.
  %esp The stack contains the arguments and environment:
  0(%esp) argc
  4(%esp) argv[0]
  ...
  (4*argc)(%esp) NULL
  (4*(argc+1))(%esp) envp[0]
  ...
  NULL
  */
  .text
  .globl _start
  _start:
  /* Clear the frame pointer. The ABI suggests this be done, to mark
  the outermost frame obviously. */
  xorl %ebp, %ebp
  /* Extract the arguments as encoded on the stack and set up
  the arguments for `main': argc, argv. envp will be determined
  later in __libc_start_main. */
  popl %esi /* Pop the argument count. */
  movl %esp, %ecx /* argv starts just at the current stack top.*/
  /* Before pushing the arguments align the stack to a double word
  boundary to avoid penalties from misaligned accesses. Thanks
  to Edward Seidl <seidl@janed.com> for pointing this out. */
  andl $0xfffffff8, %esp
  pushl %eax /* Push garbage because we allocate
  28 more bytes. */
  /* Provide the highest stack address to the user code (for stacks
  which grow downwards). */
  pushl %esp
  pushl %edx /* Push address of the shared library
  termination function. */
  /* Push address of our own entry points to .fini and .init. */
  pushl $_fini
  pushl $_init
  pushl %ecx /* Push second argument: argv. */
  pushl %esi /* Push first argument: argc. */
  pushl $main
  /* Call the user's main function, and exit with its value.
  But let the libc call main. */
  call __libc_start_main
  hlt /* Crash if somehow `exit' does return. */
  __libc_start_main在sysdeps/generic/libc_start.c中
  假设定义的是PIC的代码。
  struct startup_info
  {
  void *sda_base;
  int (*main) (int, char **, char **, void *);
  int (*init) (int, char **, char **, void *);
  void (*fini) (void);
  };
  int
  __libc_start_main (int argc, char **argv, char **envp,
  void *auxvec, void (*rtld_fini) (void),
  struct startup_info *stinfo,
  char **stack_on_entry)
  {
  /* the PPC SVR4 ABI says that the top thing on the stack will
  be a NULL pointer, so if not we assume that we're being called
  as a statically-linked program by Linux... */
  if (*stack_on_entry != NULL)
  {
  /* ...in which case, we have argc as the top thing on the
  stack, followed by argv (NULL-terminated), envp (likewise),
  and the auxilary vector. */
  argc = *(int *) stack_on_entry;
  argv = stack_on_entry + 1;
  envp = argv + argc + 1;
  auxvec = envp;
  while (*(char **) auxvec != NULL)
  ++auxvec;
  ++auxvec;
  rtld_fini = NULL;
  }
  /* Store something that has some relationship to the end of the
  stack, for backtraces. This variable should be thread-specific. */
  __libc_stack_end = stack_on_entry + 4;
  /* Set the global _environ variable correctly. */
  __environ = envp;
  /* Register the destructor of the dynamic linker if there is any. */
  if (rtld_fini != NULL)
  atexit (rtld_fini);/*替动态连接器安排destructor*/
  /* Call the initializer of the libc. */
  __libc_init_first (argc, argv, envp);/*一个空函数*/
  /* Register the destructor of the program, if any. */
  if (stinfo->fini)
  atexit (stinfo->fini);/*安排程序自己的destructor*/
  /* Call the initializer of the program, if any. */
  /*运行程序的初始化函数*/
  if (stinfo->init)
  stinfo->init (argc, argv, __environ, auxvec);
  /*运行程序main函数,到此,控制权才交给我们一般所说的程序入口*/
  exit (stinfo->main (argc, argv, __environ, auxvec));
  }
  void
  __libc_init_first (int argc __attribute__ ((unused)), ...)
  {
  }
  int
  atexit (void (*func) (void))
  {
  struct exit_function *new = __new_exitfn ();
  if (new == NULL)
  return -1;
  new->flavor = ef_at;
  new->func.at = func;
  return 0;
  }
  /* Run initializers for MAP and its dependencies, in inverse dependency
  order (that is, leaf nodes first). */
  ElfW(Addr)
  internal_function
  _dl_init_next (struct r_scope_elem *searchlist)
  {
  unsigned int i;
  /* The search list for symbol lookup is a flat list in top-down
  dependency order, so processing that list from back to front gets us
  breadth-first leaf-to-root order. */
  i = searchlist->r_nlist;
  while (i-- >0)
  {
  struct link_map *l = searchlist->r_list[i];
  if (l->l_init_called)
  /* This object is all done. */
  continue;
  if (l->l_init_running)
  {
  /* This object's initializer was just running.
  Now mark it as having run, so this object
  will be skipped in the future. */
  l->l_init_running = 0;
  l->l_init_called = 1;
  continue;
  }
  if (l->l_info[DT_INIT]
  &&(l->l_name[0] != '/0' || l->l_type != lt_executable))
  {
  /* Run this object's initializer. */
  l->l_init_running = 1;
  /* Print a debug message if wanted. */
  if (_dl_debug_impcalls)
  _dl_debug_message (1, "/ncalling init: ",
  l->l_name[0] ? l->l_name : _dl_argv[0],
  "/n/n", NULL);
  /*共享库的基地址+init在基地址中的偏移量*/
  return l->l_addr + l->l_info[DT_INIT]->d_un.d_ptr;
  
  }
  /* No initializer for this object.
  Mark it so we will skip it in the future. */
  l->l_init_called = 1;
  }
  /* Notify the debugger all new objects are now ready to go. */
  _r_debug.r_state = RT_CONSISTENT;
  _dl_debug_state ();
  return 0;
  }
  在main()之前的程序流程看试有点简单,但正在运行的时候还是比较复杂的
  (自己用GBD跟踪下就知道了),因为一般的程序都需要涉及到PLT,GOT标号的
  重定位。弄清楚这个对ELF由为重要,以后有机会再补上一篇吧。
  ★ 手动确定程序和动态连接器的入口
  [alert7@redhat62 alert7]$ cat helo.c
  #include <stdio.h>
  int main(int argc,char **argv)
  {
  printf("hello/n");
  return 0;
  }
  [alert7@redhat62 alert7]$ gcc -o helo helo.c
  [alert7@redhat62 alert7]$ readelf -h helo
  ELF Header:
  Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
  Class: ELF32
  Data: 2's complement, little endian
  Version: 1 (current)
  OS/ABI: UNIX - System V
  ABI Version: 0
  Type: EXEC (Executable file)
  Machine: Intel 80386
  Version: 0x1
  Entry point address: 0x8048320
  Start of program headers: 52 (bytes into file)
  Start of section headers: 8848 (bytes into file)
  Flags: 0x0
  Size of this header: 52 (bytes)
  Size of program headers: 32 (bytes)
  Number of program headers: 6
  Size of section headers: 40 (bytes)
  Number of section headers: 29
  Section header string table index: 26
  在这里我们看到程序的入口为0x8048320,可以看看是否为main函数。
  [alert7@redhat62 alert7]$ gdb -q helo
  (gdb) disass 0x8048320
  Dump of assembler code for function _start:
  0x8048320 <_start>: xor %ebp,%ebp
  0x8048322 <_start+2>: pop %esi
  0x8048323 <_start+3>: mov %esp,%ecx
  0x8048325 <_start+5>: and $0xfffffff8,%esp
  0x8048328 <_start+8>: push %eax
  0x8048329 <_start+9>: push %esp
  0x804832a <_start+10>: push %edx
  0x804832b <_start+11>: push $0x804841c
  0x8048330 <_start+16>: push $0x8048298
  0x8048335 <_start+21>: push %ecx
  0x8048336 <_start+22>: push %esi
  0x8048337 <_start+23>: push $0x80483d0
  0x804833c <_start+28>: call 0x80482f8 <__libc_start_main>
  0x8048341 <_start+33>: hlt
  0x8048342 <_start+34>: nop
  End of assembler dump.
  呵呵,不是main吧,程序的入口是个_start例程。
  再来看动态连接器的入口是多少
  [alert7@redhat62 alert7]$ ldd helo
  libc.so.6 => /lib/libc.so.6 (0x40018000)
  /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000)
  动态连接器ld-linux.so.2加载到进程地址空间0x40000000。
  [alert7@redhat62 alert7]$ readelf -h /lib/ld-linux.so.2
  ELF Header:
  Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
  Class: ELF32
  Data: 2's complement, little endian
  Version: 1 (current)
  OS/ABI: UNIX - System V
  ABI Version: 0
  Type: DYN (Shared object file)
  Machine: Intel 80386
  Version: 0x1
  Entry point address: 0x1990
  Start of program headers: 52 (bytes into file)
  Start of section headers: 328916 (bytes into file)
  Flags: 0x0
  Size of this header: 52 (bytes)
  Size of program headers: 32 (bytes)
  Number of program headers: 3
  Size of section headers: 40 (bytes)
  Number of section headers: 23
  Section header string table index: 20
  共享object入口地址为0x1990。加上整个ld-linux.so.2被加载到进程地址空间0x40000000。
  那么动态连接器的入口地址为0x1990+0x40000000=0x40001990。
  用户空间执行的第一条指令地址就是0x40001990,既上面#define RTLD_START的开始。

本文转自
http://www.xfocus.net/articles/200109/269.html  
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值