Linux 采用 ELF 作为其可链接可执行文件的格式,并提供诸如 nm 之类的工具进行 ELF 符号表的解析。比如,一个简单的 Hello World 程序:
#include <iostream>
using namespace std;
int main()
{
cout << "Hello World!" << endl;
return 0;
}
我们可以利用 nm 命令查看 Hello World 程序依赖的动态链接符号,以及各个符号所对应的版本:
[Linux]$ g++ helloworld.cc
[Linux]$ nm -g a.out
0000000000400978 R _IO_stdin_used
w _Jv_RegisterClasses
U _ZNSolsEPFRSoS_E@@GLIBCXX_3.4
U _ZNSt8ios_base4InitC1Ev@@GLIBCXX_3.4
U _ZNSt8ios_base4InitD1Ev@@GLIBCXX_3.4
0000000000600d20 B _ZSt4cout@@GLIBCXX_3.4
U _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@@GLIBCXX_3.4
U _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@@GLIBCXX_3.4
0000000000600ad0 D __DTOR_END__
0000000000600d04 A __bss_start
U __cxa_atexit@@GLIBC_2.2.5
0000000000600d00 D __data_start
0000000000400980 R __dso_handle
w __gmon_start__
U __gxx_personality_v0@@CXXABI_1.3
0000000000400890 T __libc_csu_fini
00000000004008a0 T __libc_csu_init
U __libc_start_main@@GLIBC_2.2.5
0000000000600d04 A _edata
0000000000600e48 A _end
0000000000400968 T _fini
0000000000400680 T _init
0000000000400730 T _start
0000000000600d00 W data_start
0000000000400814 T main
那么,在程序运行过程中,有没有办法获取当前进程的符号信息和符号版本信息,达到类似于 nm 工具的效果呢?Glibc 提供了一个函数,名为 dl_iterate_phdr,可以遍历 ELF 程序头(即Program Header,后文简称“phdr”),phdr驻留于内存,可以在运行时使用。参考了 ELF man page,以及一些大神的文章后,找到了正确的方法。我们修改一下 helloworld.cc:
#include <link.h>
#include <string>
#include <map>
#include <iostream>
using namespace std;
static uint32_t GetNumberOfSymbolsFromGnuHash(Elf64_Addr gnuHashAddress)
{
// See https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ and
// https://sourceware.org/ml/binutils/2006-10/msg00377.html
typedef struct {
uint32_t nbuckets;
uint32_t symoffset;
uint32_t bloom_size;
uint32_t bloom_shift;
} Header;
Header *header = (Header*)gnuHashAddress;
const char *bucketsAddress = (char*)gnuHashAddress + sizeof(Header) +
(sizeof(uint64_t) * header->bloom_size);
// Locate the chain that handles the largest index bucket.
uint32_t lastSymbol = 0;
uint32_t *bucketAddress = (uint32_t*)bucketsAddress;
for (uint32_t i = 0; i < header->nbuckets; ++i) {
uint32_t bucket = *bucketAddress;
if (lastSymbol < bucket) {
lastSymbol = bucket;
}
bucketAddress++;
}
if (lastSymbol < header->symoffset) {
return header->symoffset;
}
// Walk the bucket's chain to add the chain length to the total.
const char *chainBaseAddress = bucketsAddress + (sizeof(uint32_t) * header->nbuckets);
for (;;) {
uint32_t *chainEntry = (uint32_t*)(chainBaseAddress +
(lastSymbol - header->symoffset) * sizeof(uint32_t));
lastSymbol++;
// If the low bit is set, this entry is the end of the chain.
if (*chainEntry & 1) {
break;
}
}
return lastSymbol;
}
/* Callback for dl_iterate_phdr.
* Is called by dl_iterate_phdr for every loaded shared lib until something
* else than 0 is returned by one call of this function.
*/
int retrieve_symbolnames(struct dl_phdr_info *info,
size_t info_size,
void *symbol_names_vector)
{
/* ElfW is a macro that creates proper typenames for the used system architecture
* (e.g. on a 32 bit system, ElfW(Dyn*) becomes "Elf32_Dyn*") */
ElfW(Dyn*) dyn;
ElfW(Sym*) sym;
ElfW(Word*) hash;
char* strtab = 0;
char* sym_name = 0;
ElfW(Word) sym_cnt = 0;
ElfW(Versym*) versym;
ElfW(Verneed*) verneed;
ElfW(Word) verneed_num = 0;
/* the void pointer (3rd argument) should be a pointer to a map<string, string>
* in this example -> cast it to make it usable */
map<string, string>* symbol_names = reinterpret_cast<map<string, string>*>(symbol_names_vector);
/* Iterate over all headers of the current shared lib
* (first call is for the executable itself) */
for (size_t header_index = 0; header_index < info->dlpi_phnum; header_index++) {
/* Further processing is only needed if the dynamic section is reached */
if (info->dlpi_phdr[header_index].p_type == PT_DYNAMIC) {
/* Get a pointer to the first entry of the dynamic section.
* It's address is the shared lib's address + the virtual address */
dyn = (ElfW(Dyn)*)(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr);
/* Iterate over all entries of the dynamic section until the
* end of the symbol table is reached. This is indicated by
* an entry with d_tag == DT_NULL.
*
* Only the following entries need to be processed to find the
* symbol names:
* - DT_HASH -> second word of the hash is the number of symbols
* - DT_STRTAB -> pointer to the beginning of a string table that
* contains the symbol names
* - DT_SYMTAB -> pointer to the beginning of the symbols table
*/
while(dyn->d_tag != DT_NULL) {
if (dyn->d_tag == DT_HASH) {
/* Get a pointer to the hash */
hash = (ElfW(Word*))dyn->d_un.d_ptr;
/* The 2nd word is the number of symbols */
sym_cnt = hash[1];
} else if (dyn->d_tag == DT_GNU_HASH && sym_cnt == 0) {
sym_cnt = GetNumberOfSymbolsFromGnuHash(dyn->d_un.d_ptr);
} else if (dyn->d_tag == DT_STRTAB) {
/* Get the pointer to the string table */
strtab = (char*)dyn->d_un.d_ptr;
} else if (dyn->d_tag == DT_SYMTAB) {
/* Get the pointer to the first entry of the symbol table */
sym = (ElfW(Sym*))dyn->d_un.d_ptr;
/* Iterate over the symbol table */
for (ElfW(Word) sym_index = 0; sym_index < sym_cnt; sym_index++) {
/* get the name of the i-th symbol.
* This is located at the address of st_name
* relative to the beginning of the string table. */
sym_name = &strtab[sym[sym_index].st_name];
}
} else if (dyn->d_tag == DT_VERNEED) {
verneed = (ElfW(Verneed*))dyn->d_un.d_val;
} else if (dyn->d_tag == DT_VERNEEDNUM) {
verneed_num = (ElfW(Word))dyn->d_un.d_val;
cout << "DT_VERNEEDNUM: " << verneed_num << endl;
} else if (dyn->d_tag == DT_VERSYM) {
map<unsigned, string> vermap;
ElfW(Verneed*) vn = verneed;
for (ElfW(Word)i = 0; i < verneed_num; i++) {
cout << "DT_VERNEED " << i << ": " << (unsigned long) vn
<< " vn_version " << vn->vn_version
<< " vn_cnt " << vn->vn_cnt
<< " vn_file " << string(&strtab[vn->vn_file])
<< " vn_aux " << vn->vn_aux
<< " vn_next " << vn->vn_next
<< endl;
ElfW(Vernaux*) vna = (ElfW(Vernaux*))((char*)vn + vn->vn_aux);
for (ElfW(Half)j = 0; j < vn->vn_cnt; j++) {
cout << " aux " << j << ": "
<< " vna_name " << string(&strtab[vna->vna_name])
<< " vna_other " << vna->vna_other
<< endl;
vermap.insert(make_pair<unsigned, string>(vna->vna_other,
string(&strtab[vna->vna_name])));
vna = (ElfW(Vernaux*))((char*)vna + vna->vna_next);
}
vn = (ElfW(Verneed*))((char*)vn + vn->vn_next);
}
versym = (ElfW(Versym*))dyn->d_un.d_val;
cout << "DT_VERSYM: " << dyn->d_un.d_val << endl;
for (ElfW(Word) sym_index = 0; sym_index < sym_cnt; sym_index++) {
string version = versym[sym_index] == 0 ?
string("NONE") :
vermap[versym[sym_index]];
sym_name = &strtab[sym[sym_index].st_name];
cout << " symbol " << sym_index
<< " name: " << sym_name
<< " version: " << version
<< endl;
symbol_names->insert(make_pair<string, string>(sym_name, version));
}
}
/* move pointer to the next entry */
dyn++;
}
}
}
/* Returning something != 0 stops further iterations,
* since only the first entry, which is the executable itself, is needed
* 1 is returned after processing the first entry.
*
* If the symbols of all loaded dynamic libs shall be found,
* the return value has to be changed to 0.
*/
return 1;
}
int main()
{
cout << "Hello World!" << endl;
map<string, string> symbolNames;
dl_iterate_phdr(retrieve_symbolnames, &symbolNames);
return 0;
}
编译运行:
[Linux]$ g++ helloworld.cc
[Linux]$ ./a.out
Hello World!
DT_VERNEEDNUM: 3
DT_VERNEED 0: 4196824 vn_version 1 vn_cnt 1 vn_file libgcc_s.so.1 vn_aux 16 vn_next 32
aux 0: vna_name GCC_3.0 vna_other 5
DT_VERNEED 1: 4196856 vn_version 1 vn_cnt 1 vn_file libc.so.6 vn_aux 16 vn_next 32
aux 0: vna_name GLIBC_2.2.5 vna_other 4
DT_VERNEED 2: 4196888 vn_version 1 vn_cnt 2 vn_file libstdc++.so.6 vn_aux 16 vn_next 0
aux 0: vna_name CXXABI_1.3 vna_other 3
aux 1: vna_name GLIBCXX_3.4 vna_other 2
DT_VERSYM: 4196750
symbol 0 name: version: NONE
symbol 1 name: _ZNSsC1Ev version: GLIBCXX_3.4
symbol 2 name: _ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base version: GLIBCXX_3.4
symbol 3 name: __gmon_start__ version: NONE
symbol 4 name: _Jv_RegisterClasses version: NONE
symbol 5 name: _ZdlPv version: GLIBCXX_3.4
symbol 6 name: __cxa_rethrow version: CXXABI_1.3
symbol 7 name: _ZNSt8ios_base4InitC1Ev version: GLIBCXX_3.4
symbol 8 name: __libc_start_main version: GLIBC_2.2.5
symbol 9 name: _ZSt18_Rb_tree_decrementPSt18_Rb_tree_node_base version: GLIBCXX_3.4
symbol 10 name: _ZNKSs7compareERKSs version: GLIBCXX_3.4
symbol 11 name: _ZNSsC1ERKSs version: GLIBCXX_3.4
symbol 12 name: __cxa_atexit version: GLIBC_2.2.5
symbol 13 name: _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc version: GLIBCXX_3.4
symbol 14 name: _ZNSsD1Ev version: GLIBCXX_3.4
symbol 15 name: _ZNSolsEm version: GLIBCXX_3.4
symbol 16 name: _ZSt18_Rb_tree_incrementPKSt18_Rb_tree_node_base version: GLIBCXX_3.4
symbol 17 name: _ZNSsC1EPKcRKSaIcE version: GLIBCXX_3.4
symbol 18 name: _ZNSolsEj version: GLIBCXX_3.4
symbol 19 name: dl_iterate_phdr version: GLIBC_2.2.5
symbol 20 name: _ZSt29_Rb_tree_insert_and_rebalancebPSt18_Rb_tree_node_baseS0_RS_ version: GLIBCXX_3.4
symbol 21 name: _ZStlsIcSt11char_traitsIcESaIcEERSt13basic_ostreamIT_T0_ES7_RKSbIS4_S5_T1_E version: GLIBCXX_3.4
symbol 22 name: _ZNSaIcED1Ev version: GLIBCXX_3.4
symbol 23 name: pthread_cancel version: NONE
symbol 24 name: _ZNSolsEPFRSoS_E version: GLIBCXX_3.4
symbol 25 name: __cxa_end_catch version: CXXABI_1.3
symbol 26 name: _ZSt17__throw_bad_allocv version: GLIBCXX_3.4
symbol 27 name: __cxa_begin_catch version: CXXABI_1.3
symbol 28 name: _ZNSolsEt version: GLIBCXX_3.4
symbol 29 name: _ZNSaIcEC1Ev version: GLIBCXX_3.4
symbol 30 name: _Znwm version: GLIBCXX_3.4
symbol 31 name: _Unwind_Resume version: GCC_3.0
symbol 32 name: _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_ version: GLIBCXX_3.4
symbol 33 name: _ZNSt8ios_base4InitD1Ev version: GLIBCXX_3.4
symbol 34 name: _ZSt4cout version: GLIBCXX_3.4
symbol 35 name: __gxx_personality_v0 version: CXXABI_1.3
如上,当前进程所依赖的动态库中的符号以及版本信息都可以被正确获取了。
注:遍历 ELF phdr 的代码参考了 stackoverflow 上两位大神 Kanalpiroge 和 Andrey Belykh 的实现,原文请参考:https://stackoverflow.com/questions/15779185/how-to-list-on-the-fly-all-the-functions-symbols-available-in-c-code-on-a-linux