查看elf结构中函数的偏移量

目标

实现目标:查看某个readline函数在/bin/bash中的偏移量。

方法一:使用readelf命令,从符号表中查看偏移量。可以看到偏移量为00000000000b8520

➜  readelf --symbols /bin/bash | grep readline
882: 00000000000b8520   154 FUNC    GLOBAL DEFAULT   16 readline

2022/1/6:借助popen函数,使用代码,实现方法一。代码见附录。

方法二:使用libelf解析elf文件,从符号表中查看偏移量。可以看到偏移量为754976=0Xb8520代码来源uprobe_helpers.c。本篇运行代码见仓库,或者本篇附录。

make
➜  ./get_elf_func_offset 
754976

背景介绍

参考一:计算机那些事(4)——ELF文件结构ELF文件格式简介readelf命令

参考二:elf – FreeBSD Manual Pageself(5) — Linux manual page

参考三:elfutils – source code

在这里插入图片描述


代码分析

头文件中需要关注的是<gelf.h>。该头文件定义了基本的ELF 类型,结构和宏。<gelf.h>包含了<libelf.h>,其为libelf的接口。

#include <stdio.h>
#include <gelf.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>

宏定义了一个将相关信息输出到错误输出。

#define warn(...) fprintf(stderr, __VA_ARGS__)

打开关闭elf文件

elf相关操作的第一步是调用elf_version()函数 – retrieve or set ELF library operating version。

elf_begin()函数用以打开ELF 文件。ELF_C_READ表示打开方式为只读。

elf_kind()函数用来检查打开的ELF文件类型。ELF_K_ELF表示为ELF文件。

/*
 * Opens an elf at `path` of kind ELF_K_ELF.  Returns NULL on failure.  On
 * success, close with close_elf(e, fd_close).
 */
Elf *open_elf(const char *path, int *fd_close)
{
	int fd;
	Elf *e;

	if (elf_version(EV_CURRENT) == EV_NONE) {
		warn("elf init failed\n");
		return NULL;
	}
	fd = open(path, O_RDONLY);
	if (fd < 0) {
		warn("Could not open %s\n", path);
		return NULL;
	}
	e = elf_begin(fd, ELF_C_READ, NULL);
	if (!e) {
		warn("elf_begin failed: %s\n", elf_errmsg(-1));
		close(fd);
		return NULL;
	}
	if (elf_kind(e) != ELF_K_ELF) {
		warn("elf kind %d is not ELF_K_ELF\n", elf_kind(e));
		elf_end(e);
		close(fd);
		return NULL;
	}
	*fd_close = fd;
	return e;
}

elf_end()函数,释放ELF descriptor。

void close_elf(Elf *e, int fd_close)
{
	elf_end(e);
	close(fd_close);
}

获取函数在elf中的偏移量

思路:遍历sections以找到符号表 – 借助string table,从符号表中找到偏移量 – 检查是否合法。

获取elf文件头

gelf_getehdr()函数从Elf文件描述符e中提取出文件头信息。

	Elf *e;
	GElf_Ehdr ehdr;
	e = open_elf(path, &fd);

	if (!gelf_getehdr(e, &ehdr))
		goto out;

elf文件头信息的存储结构体GElf_Ehdr如下所示:

typedef struct
{
  unsigned char	e_ident[EI_NIDENT];	/* Magic number and other info */
  Elf64_Half	e_type;			/* Object file type */
  Elf64_Half	e_machine;		/* Architecture */
  Elf64_Word	e_version;		/* Object file version */
  Elf64_Addr	e_entry;		/* Entry point virtual address */
  Elf64_Off	e_phoff;		/* Program header table file offset */
  Elf64_Off	e_shoff;		/* Section header table file offset */
  Elf64_Word	e_flags;		/* Processor-specific flags */
  Elf64_Half	e_ehsize;		/* ELF header size in bytes */
  Elf64_Half	e_phentsize;		/* Program header table entry size */
  Elf64_Half	e_phnum;		/* Program header table entry count */
  Elf64_Half	e_shentsize;		/* Section header table entry size */
  Elf64_Half	e_shnum;		/* Section header table entry count */
  Elf64_Half	e_shstrndx;		/* Section header string table index */
} Elf64_Ehdr;

文件头信息使用如下面命令查看:

➜ readelf -h /bin/bash
ELF 头:
  Magic:   7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 
  类别:                              ELF64
  数据:                              2 补码,小端序 (little endian)
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI 版本:                          0
  类型:                              DYN (共享目标文件)
  系统架构:                          Advanced Micro Devices X86-64
  版本:                              0x1
  入口点地址:               0x30430
  程序头起点:          64 (bytes into file)
  Start of section headers:          1181528 (bytes into file)
  标志:             0x0
  Size of this header:               64 (bytes)
  Size of program headers:           56 (bytes)
  Number of program headers:         13
  Size of section headers:           64 (bytes)
  Number of section headers:         30
  Section header string table index: 29

其中,e_shstrndx就表示.shstrtab在段表中的下标。.shstrtab中主要存储的是段的名称。

可以使用如下命令查看/bin/bash.shstrtab在段表中的下标:

readelf --section-details /bin/bash
...
  [29] .shstrtab
       STRTAB           0000000000000000  0000000000120638  0
       000000000000011d 0000000000000000  0                 1
       [0000000000000000]: 

读取.shstrtab段下标

elf_getshdrstrndx()函数提取段名字符串(section name string table)所在下标。

size_t shstrndx;
if (elf_getshdrstrndx(e, &shstrndx) != 0)
		goto out;

遍历sections->section头信息->找到符号表类型的section->遍历section中(data)->遍历data->从string table中查找符号

elf_nextscn()函数,以迭代的方式取得指向section descriptor的指针。

gelf_getshdr()函数,提取section头信息。头信息的结构体如下:

typedef struct
{
  Elf64_Word	sh_name;		/* Section name (string tbl index) */
  Elf64_Word	sh_type;		/* Section type */
  Elf64_Xword	sh_flags;		/* Section flags */
  Elf64_Addr	sh_addr;		/* Section virtual addr at execution */
  Elf64_Off	sh_offset;		/* Section file offset */
  Elf64_Xword	sh_size;		/* Section size in bytes */
  Elf64_Word	sh_link;		/* Link to another section */
  Elf64_Word	sh_info;		/* Additional section information */
  Elf64_Xword	sh_addralign;		/* Section alignment */
  Elf64_Xword	sh_entsize;		/* Entry size if section holds table */
} Elf64_Shdr;

查找shdr->sh_type类型为SHT_SYMTAB符号表,或SHT_DYNSYM动态符号表。符号表存储在.symtab节中,是一个ElfN_Sym的数组,保存了符号信息。动态符号表存储在.dynsym节,保存在text段中。其保存了从共享库导入的动态符号表。.dynsym保存了引用来自外部文件符号的全局符号。如printf库函数。.dynsym保存的符号是.symtab所保存符合的子集,.symtab中还保存了可执行文件的本地符号。如全局变量,代码中定义的本地函数等。

elf_getdata()函数,迭代遍历section中的data。

gelf_getsym()函数,从data中提取符号信息(symbol information)。符号信息的结构体如下:

typedef struct
{
  Elf64_Word	st_name;		/* Symbol name (string tbl index) */
  unsigned char	st_info;		/* Symbol type and binding */
  unsigned char st_other;		/* Symbol visibility */
  Elf64_Section	st_shndx;		/* Section index */
  Elf64_Addr	st_value;		/* Symbol value */
  Elf64_Xword	st_size;		/* Symbol size */
} Elf64_Sym;

可以看到,符号的名称存储在string table的sym->st_name下标位置。且当section类型为符号表类型时,shdr->sh_link指向string table。

elf_strptr()函数,从string table中,提取string pointer。GELF_ST_TYPE用以提取符号的类型,判断是否为STT_FUNC(The symbol is associated with a function or other executable code.) sym->st_value中存储着code的偏移量。

	Elf *e;
	Elf_Scn *scn;
	Elf_Data *data;
	GElf_Ehdr ehdr;
	GElf_Shdr shdr[1];
	GElf_Phdr phdr;
	GElf_Sym sym[1];
	size_t shstrndx, nhdrs;
	char *n;

	scn = NULL;
	while ((scn = elf_nextscn(e, scn))) {
		if (!gelf_getshdr(scn, shdr))
			continue;
		if (!(shdr->sh_type == SHT_SYMTAB || shdr->sh_type == SHT_DYNSYM))
			continue;
		data = NULL;
		while ((data = elf_getdata(scn, data))) {
			for (i = 0; gelf_getsym(data, i, sym); i++) {
				n = elf_strptr(e, shdr->sh_link, sym->st_name);
				if (!n)
					continue;
				if (GELF_ST_TYPE(sym->st_info) != STT_FUNC)
					continue;
				if (!strcmp(n, func)) {
					ret = sym->st_value;
					goto check;
				}
			}
		}
	}

可以使用如下命令查看sections信息:

readelf --section-details /bin/bash

可以使用如下命令查看sections header信息:

readelf --section-headers /bin/bash
There are 30 section headers, starting at offset 0x120758:

节头:
  [] 名称              类型             地址              偏移量
       大小              全体大小          旗标   链接   信息   对齐
  [ 0]                   NULL             0000000000000000  00000000
       0000000000000000  0000000000000000           0     0     0
  [ 1] .interp           PROGBITS         0000000000000318  00000318
       000000000000001c  0000000000000000   A       0     0     1
  [ 2] .note.gnu.propert NOTE             0000000000000338  00000338
       0000000000000020  0000000000000000   A       0     0     8
  [ 3] .note.gnu.build-i NOTE             0000000000000358  00000358
       0000000000000024  0000000000000000   A       0     0     4
  [ 4] .note.ABI-tag     NOTE             000000000000037c  0000037c
       0000000000000020  0000000000000000   A       0     0     4
  [ 5] .gnu.hash         GNU_HASH         00000000000003a0  000003a0
       0000000000004aac  0000000000000000   A       6     0     8
  [ 6] .dynsym           DYNSYM           0000000000004e50  00004e50
       000000000000e418  0000000000000018   A       7     1     8
  [ 7] .dynstr           STRTAB           0000000000013268  00013268
       0000000000009740  0000000000000000   A       0     0     1
  [ 8] .gnu.version      VERSYM           000000000001c9a8  0001c9a8
       0000000000001302  0000000000000002   A       6     0     2
  [ 9] .gnu.version_r    VERNEED          000000000001dcb0  0001dcb0
       00000000000000d0  0000000000000000   A       7     3     8
  [10] .rela.dyn         RELA             000000000001dd80  0001dd80
       000000000000dc80  0000000000000018   A       6     0     8
  [11] .rela.plt         RELA             000000000002ba00  0002ba00
       0000000000001470  0000000000000018  AI       6    25     8
  [12] .init             PROGBITS         000000000002d000  0002d000
       000000000000001b  0000000000000000  AX       0     0     4
  [13] .plt              PROGBITS         000000000002d020  0002d020
       0000000000000db0  0000000000000010  AX       0     0     16
  [14] .plt.got          PROGBITS         000000000002ddd0  0002ddd0
       0000000000000030  0000000000000010  AX       0     0     16
  [15] .plt.sec          PROGBITS         000000000002de00  0002de00
       0000000000000da0  0000000000000010  AX       0     0     16
  [16] .text             PROGBITS         000000000002eba0  0002eba0
       00000000000aeb55  0000000000000000  AX       0     0     16
  [17] .fini             PROGBITS         00000000000dd6f8  000dd6f8
       000000000000000d  0000000000000000  AX       0     0     4
  [18] .rodata           PROGBITS         00000000000de000  000de000
       000000000001a094  0000000000000000   A       0     0     32
  [19] .eh_frame_hdr     PROGBITS         00000000000f8094  000f8094
       00000000000044dc  0000000000000000   A       0     0     4
  [20] .eh_frame         PROGBITS         00000000000fc570  000fc570
       0000000000017c28  0000000000000000   A       0     0     8
  [21] .init_array       INIT_ARRAY       0000000000115cf0  00114cf0
       0000000000000008  0000000000000008  WA       0     0     8
  [22] .fini_array       FINI_ARRAY       0000000000115cf8  00114cf8
       0000000000000008  0000000000000008  WA       0     0     8
  [23] .data.rel.ro      PROGBITS         0000000000115d00  00114d00
       00000000000028f0  0000000000000000  WA       0     0     32
  [24] .dynamic          DYNAMIC          00000000001185f0  001175f0
       0000000000000210  0000000000000010  WA       7     0     8
  [25] .got              PROGBITS         0000000000118800  00117800
       00000000000007e8  0000000000000008  WA       0     0     8
  [26] .data             PROGBITS         0000000000119000  00118000
       0000000000008604  0000000000000000  WA       0     0     32
  [27] .bss              NOBITS           0000000000121620  00120604
       0000000000009c78  0000000000000000  WA       0     0     32
  [28] .gnu_debuglink    PROGBITS         0000000000000000  00120604
       0000000000000034  0000000000000000           0     0     4
  [29] .shstrtab         STRTAB           0000000000000000  00120638
       000000000000011d  0000000000000000           0     0     1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
  L (link order), O (extra OS processing required), G (group), T (TLS),
  C (compressed), x (unknown), o (OS specific), E (exclude),
  l (large), p (processor specific)

下面对其中的一些进行简单的介绍:

  • .interp保存了解释器的文件名,这是一个ASCII字符串
  • .data保存初始化的数据,这是普通程序数据一部分,可以再程序运行时修改
  • .rodata保存了只读数据,可以读取但不能修改。例如,编译器将出现在printf语句中的所有静态字符串封装到该节
  • .init和.fini保存了进程初始化和结束所用的代码,这两个节通常都是由编译器自动添加
  • .gnu.hash是一个散列表,允许在不对全表元素进行线性搜索的情况下,快速访问所有的符号表项

可以使用下面命令查看符号信息:

➜  readelf --symbols /bin/bash | grep readline
...
882: 00000000000b8520   154 FUNC    GLOBAL DEFAULT   16 readline
...

遍历program headers

最后需要检查下该偏移位置在程序段中。

elf_getphdrnum()函数返回program headers的数量。使用gelf_getphdr()函数,获取程序头。检查类型为PT_LOAD(a loadable segment),flag为PF_X(An executable segment)。偏移量位于program header中间。

check:
	if (ehdr.e_type == ET_EXEC || ehdr.e_type == ET_DYN) {
		if (elf_getphdrnum(e, &nhdrs) != 0) {
			ret = -1;
			goto out;
		}
		for (i = 0; i < (int)nhdrs; i++) {
			if (!gelf_getphdr(e, i, &phdr))
				continue;
			if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X))
				continue;
			if (phdr.p_vaddr <= ret && ret < (phdr.p_vaddr + phdr.p_memsz)) {
				ret = ret - phdr.p_vaddr + phdr.p_offset;
				goto out;
			}
		}
		ret = -1;
	}
out:
	close_elf(e, fd);
	return ret;
}

可以使用下面命令查看program headers

➜  ~ readelf --program-headers   /bin/bash  

Elf 文件类型为 DYN (共享目标文件)
Entry point 0x30430
There are 13 program headers, starting at offset 64

程序头:
  Type           Offset             VirtAddr           PhysAddr
                 FileSiz            MemSiz              Flags  Align
  PHDR           0x0000000000000040 0x0000000000000040 0x0000000000000040
                 0x00000000000002d8 0x00000000000002d8  R      0x8
  INTERP         0x0000000000000318 0x0000000000000318 0x0000000000000318
                 0x000000000000001c 0x000000000000001c  R      0x1
      [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
  LOAD           0x0000000000000000 0x0000000000000000 0x0000000000000000
                 0x000000000002ce70 0x000000000002ce70  R      0x1000
  LOAD           0x000000000002d000 0x000000000002d000 0x000000000002d000
                 0x00000000000b0705 0x00000000000b0705  R E    0x1000
  LOAD           0x00000000000de000 0x00000000000de000 0x00000000000de000
                 0x0000000000036198 0x0000000000036198  R      0x1000
  LOAD           0x0000000000114cf0 0x0000000000115cf0 0x0000000000115cf0
                 0x000000000000b914 0x00000000000155a8  RW     0x1000
  DYNAMIC        0x00000000001175f0 0x00000000001185f0 0x00000000001185f0
                 0x0000000000000210 0x0000000000000210  RW     0x8
  NOTE           0x0000000000000338 0x0000000000000338 0x0000000000000338
                 0x0000000000000020 0x0000000000000020  R      0x8
  NOTE           0x0000000000000358 0x0000000000000358 0x0000000000000358
                 0x0000000000000044 0x0000000000000044  R      0x4
  GNU_PROPERTY   0x0000000000000338 0x0000000000000338 0x0000000000000338
                 0x0000000000000020 0x0000000000000020  R      0x8
  GNU_EH_FRAME   0x00000000000f8094 0x00000000000f8094 0x00000000000f8094
                 0x00000000000044dc 0x00000000000044dc  R      0x4
  GNU_STACK      0x0000000000000000 0x0000000000000000 0x0000000000000000
                 0x0000000000000000 0x0000000000000000  RW     0x10
  GNU_RELRO      0x0000000000114cf0 0x0000000000115cf0 0x0000000000115cf0
                 0x0000000000003310 0x0000000000003310  R      0x1

附录

查看elf结构中函数偏移量的完整代码

// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2021 Google LLC. */
#include <stdio.h>
#include <gelf.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>

#define warn(...) fprintf(stderr, __VA_ARGS__)
/*
 * Opens an elf at `path` of kind ELF_K_ELF.  Returns NULL on failure.  On
 * success, close with close_elf(e, fd_close).
 */
Elf *open_elf(const char *path, int *fd_close)
{
	int fd;
	Elf *e;

	if (elf_version(EV_CURRENT) == EV_NONE) {
		warn("elf init failed\n");
		return NULL;
	}
	fd = open(path, O_RDONLY);
	if (fd < 0) {
		warn("Could not open %s\n", path);
		return NULL;
	}
	e = elf_begin(fd, ELF_C_READ, NULL);
	if (!e) {
		warn("elf_begin failed: %s\n", elf_errmsg(-1));
		close(fd);
		return NULL;
	}
	if (elf_kind(e) != ELF_K_ELF) {
		warn("elf kind %d is not ELF_K_ELF\n", elf_kind(e));
		elf_end(e);
		close(fd);
		return NULL;
	}
	*fd_close = fd;
	return e;
}


void close_elf(Elf *e, int fd_close)
{
	elf_end(e);
	close(fd_close);
}


/* Returns the offset of a function in the elf file `path`, or -1 on failure. */
off_t get_elf_func_offset(const char *path, const char *func)
{
	off_t ret = -1;
	int i, fd = -1;
	Elf *e;
	Elf_Scn *scn;
	Elf_Data *data;
	GElf_Ehdr ehdr;
	GElf_Shdr shdr[1];
	GElf_Phdr phdr;
	GElf_Sym sym[1];
	size_t shstrndx, nhdrs;
	char *n;

	e = open_elf(path, &fd);

	if (!gelf_getehdr(e, &ehdr))
		goto out;

	if (elf_getshdrstrndx(e, &shstrndx) != 0)
		goto out;

	scn = NULL;
	while ((scn = elf_nextscn(e, scn))) {
		if (!gelf_getshdr(scn, shdr))
			continue;
		if (!(shdr->sh_type == SHT_SYMTAB || shdr->sh_type == SHT_DYNSYM))
			continue;
		data = NULL;
		while ((data = elf_getdata(scn, data))) {
			for (i = 0; gelf_getsym(data, i, sym); i++) {
				n = elf_strptr(e, shdr->sh_link, sym->st_name);
				if (!n)
					continue;
				if (GELF_ST_TYPE(sym->st_info) != STT_FUNC)
					continue;
				if (!strcmp(n, func)) {
					ret = sym->st_value;
					goto check;
				}
			}
		}
	}

check:
	if (ehdr.e_type == ET_EXEC || ehdr.e_type == ET_DYN) {
		if (elf_getphdrnum(e, &nhdrs) != 0) {
			ret = -1;
			goto out;
		}
		for (i = 0; i < (int)nhdrs; i++) {
			if (!gelf_getphdr(e, i, &phdr))
				continue;
			if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X))
				continue;
			if (phdr.p_vaddr <= ret && ret < (phdr.p_vaddr + phdr.p_memsz)) {
				ret = ret - phdr.p_vaddr + phdr.p_offset;
				goto out;
			}
		}
		ret = -1;
	}
out:
	close_elf(e, fd);
	return ret;
}

int main(void){
    const char *bash_path = "/bin/bash";
    off_t func_off;
    func_off = get_elf_func_offset(bash_path, "readline");
    if(func_off != -1)
        printf("%ld\n",func_off);
    return 0;
}

编译过程如下:

.PHONY:all

all:get_elf_func_offset

get_elf_func_offset:get_elf_func_offset.c
	gcc  $^ -lelf -o $@

.PHONY:clean

clean:
	rm -rf get_elf_func_offset

popen+readelf实现相同功能

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <string.h>

// What are the meanings of the columns of the symbol table displayed by readelf?:
// https://stackoverflow.com/questions/3065535/what-are-the-meanings-of-the-columns-of-the-symbol-table-displayed-by-readelf
struct elf_sym {
    int sym_num;
    off_t offset;
    int size;
    char type[10];
    char bind[10];
    char visibility[10];
    int sec_num;
    char name[30];
};

off_t get_elf_func_offset_by_readelf(const char *path, const char *func){
    char cmomand[100];
    FILE *fp;
    sprintf(cmomand,"readelf --symbols %s | grep %s",path,func);
    if((fp = popen(cmomand,"r")) == NULL){
        fprintf(stderr,"popen fail...");
        exit(1);
    }

    char *line=NULL;
    size_t line_sz = 0;
    off_t result=-1;
    while(getline(&line,&line_sz,fp) >= 0){
        struct elf_sym sym;
        if(sscanf(line, "%d: %16lx %d %s %s %s %d %20s",&sym.sym_num,&sym.offset,&sym.size,sym.type,sym.bind,sym.visibility,&sym.sec_num,sym.name) < 1)
            continue;
        if(strcmp(sym.name,func) == 0){
            result = sym.offset;
            break;
        }
    }

    pclose(fp);
    return result;
}

int main(void){
    char *path = "/bin/bash";
    char *func = "readline";
    off_t result = get_elf_func_offset_by_readelf(path,func);
    printf("%ld\n",result);
}

编译过程如下:

APPS = get_elf_func_offset  get_elf_func_offset_by_readelf

.PHONY:all

all:$(APPS)

$(APPS): %: %.c
	gcc  $< -lelf -o $@

.PHONY:clean

clean:
	rm -rf $(APPS)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

da1234cao

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值