VerifyElfHeader
elf加载过程其实最主要的部分就是创建程序的段:
ElfReader::ElfReader(const char* name, int fd, off64_t file_offset, off64_t file_size)
: name_(name), fd_(fd), file_offset_(file_offset), file_size_(file_size),
phdr_num_(0), phdr_mmap_(nullptr), phdr_table_(nullptr), phdr_size_(0),
load_start_(nullptr), load_size_(0), load_bias_(0),
loaded_phdr_(nullptr) {
}
在Android中ElfReader用于加载elf文件,创建程序段,构造函数如上,这里name就是elf文件路径,fd为包含elf格式的文件描述符。 offset则为elf格式在整个文件中的偏移。
加载函数为Load函数。
bool ElfReader::Load(const android_dlextinfo* extinfo) {
return ReadElfHeader() &&
VerifyElfHeader() &&
ReadProgramHeader() &&
ReserveAddressSpace(extinfo) &&
LoadSegments() &&
FindPhdr();
}
加载过程分为6个步骤:
1、ReadElfHeader() 读取elf头。
2、VerifyElfHeader() 对elf头进行验证。
3、ReadProgramHeader() 读取程序头表。
4、ReserveAddressSpace() 创建加载段的内存空间。
5、LoadSegments() 映射elf到相应的段。
6、FindPhdr() 加载程序头(一般程序头表第一项为程序头,程序头的tag为PT_PHDR),确保elf文件加载正确。
bool ElfReader::ReadElfHeader() {
ssize_t rc = TEMP_FAILURE_RETRY(pread64(fd_, &header_, sizeof(header_), file_offset_));
if (rc < 0) {
DL_ERR("can't read file \"%s\": %s", name_, strerror(errno));
return false;
}
if (rc != sizeof(header_)) {
DL_ERR("\"%s\" is too small to be an ELF executable: only found %zd bytes", name_,
static_cast<size_t>(rc));
return false;
}
return true;
}
这个函数主要使用pread64线程安全的读取Elf头,并做一些验证工作。
bool ElfReader::VerifyElfHeader() {
if (memcmp(header_.e_ident, ELFMAG, SELFMAG) != 0) { //验证魔数是elf
DL_ERR("\"%s\" has bad ELF magic", name_);
return false;
}
// Try to give a clear diagnostic for ELF class mismatches, since they're
// an easy mistake to make during the 32-bit/64-bit transition period.
int elf_class = header_.e_ident[EI_CLASS]; //验证文件类别,这里主要验证 发起加载的程序和要加载的elf文件类别是否匹配。32位程序无法加载64位库,反之依然。(64为操作系统可以运行32为库,那是因为它使用的加载器分为32位版本和64位版本,32为加载器只能加载32位程序,64位加载器只能加载64为程序)。
#if defined(__LP64__)
if (elf_class != ELFCLASS64) {
if (elf_class == ELFCLASS32) {
DL_ERR("\"%s\" is 32-bit instead of 64-bit", name_);
} else {
DL_ERR("\"%s\" has unknown ELF class: %d", name_, elf_class);
}
return false;
}
#else
if (elf_class != ELFCLASS32) {
if (elf_class == ELFCLASS64) {
DL_ERR("\"%s\" is 64-bit instead of 32-bit", name_);
} else {
DL_ERR("\"%s\" has unknown ELF class: %d", name_, elf_class);
}
return false;
}
#endif
if (header_.e_ident[EI_DATA] != ELFDATA2LSB) { //不支持加载大端程序。
DL_ERR("\"%s\" not little-endian: %d", name_, header_.e_ident[EI_DATA]);
return false;
}
if (header_.e_type != ET_DYN) {//Android只支持PIE,PIC模式。
DL_ERR("\"%s\" has unexpected e_type: %d", name_, header_.e_type);
return false;
}
if (header_.e_version != EV_CURRENT) { //版本只支持1
DL_ERR("\"%s\" has unexpected e_version: %d", name_, header_.e_version);
return false;
}
if (header_.e_machine != GetTargetElfMachine()) { //验证平台。
DL_ERR("\"%s\" has unexpected e_machine: %d", name_, header_.e_machine);
return false;
}
return true;
}
// Loads the program header table from an ELF file into a read-only private
// anonymous mmap-ed block.
bool ElfReader::ReadProgramHeader() {
phdr_num_ = header_.e_phnum;
// Like the kernel, we only accept program header tables that
// are smaller than 64KiB.
if (phdr_num_ < 1 || phdr_num_ > 65536/sizeof(ElfW(Phdr))) {
DL_ERR("\"%s\" has invalid e_phnum: %zd", name_, phdr_num_);
return false;
}
ElfW(Addr) page_min = PAGE_START(header_.e_phoff);
ElfW(Addr) page_max = PAGE_END(header_.e_phoff + (phdr_num_ * sizeof(ElfW(Phdr))));
ElfW(Addr) page_offset = PAGE_OFFSET(header_.e_phoff);
phdr_size_ = page_max - page_min;
void* mmap_result =
mmap64(nullptr, phdr_size_, PROT_READ, MAP_PRIVATE, fd_, file_offset_ + page_min);
if (mmap_result == MAP_FAILED) {
DL_ERR("\"%s\" phdr mmap failed: %s", name_, strerror(errno));
return false;
}
phdr_mmap_ = mmap_result;
phdr_table_ = reinterpret_cast<ElfW(Phdr)*>(reinterpret_cast<char*>(mmap_result) + page_offset);
return true;
}
按照只读页面对齐的方式来映射程序头表。phdr_table_为程序投标新映射到的地址。
ReserveAddressSpace为加载段创建空间。
extinfo是Android定义的操作,这里不分析。
// Reserve a virtual address range big enough to hold all loadable
// segments of a program header table. This is done by creating a
// private anonymous mmap() with PROT_NONE.
bool ElfReader::ReserveAddressSpace(const android_dlextinfo* extinfo) {
ElfW(Addr) min_vaddr;
load_size_ = phdr_table_get_load_size(phdr_table_, phdr_num_, &min_vaddr); //获取所需的vaddr大小和开始地址结束地址。elf要求段加载到内存是紧凑的。phdr_table_get_load_size函数会返回页面对齐的起始地址和结束地址。
if (load_size_ == 0) {
DL_ERR("\"%s\" has no loadable segments", name_);
return false;
}
uint8_t* addr = reinterpret_cast<uint8_t*>(min_vaddr);
void* start;
size_t reserved_size = 0;
bool reserved_hint = true;
// Assume position independent executable by default.
uint8_t* mmap_hint = nullptr;
if (extinfo != nullptr) {
if (extinfo->flags & ANDROID_DLEXT_RESERVED_ADDRESS) {
reserved_size = extinfo->reserved_size;
reserved_hint = false;
} else if (extinfo->flags & ANDROID_DLEXT_RESERVED_ADDRESS_HINT) {
reserved_size = extinfo->reserved_size;
}
if ((extinfo->flags & ANDROID_DLEXT_FORCE_FIXED_VADDR) != 0) {
mmap_hint = addr;
}
}
if (load_size_ > reserved_size) {
if (!reserved_hint) {
DL_ERR("reserved address space %zd smaller than %zd bytes needed for \"%s\"",
reserved_size - load_size_, load_size_, name_);
return false;
}
int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
start = mmap(mmap_hint, load_size_, PROT_NONE, mmap_flags, -1, 0); //使用mmap分配匿名的不可读写的内存空间,用于加载段。
if (start == MAP_FAILED) {
DL_ERR("couldn't reserve %zd bytes of address space for \"%s\"", load_size_, name_);
return false;
}
} else {
start = extinfo->reserved_addr;
}
load_start_ = start; //为段分配内存的起始地址。
load_bias_ = reinterpret_cast<uint8_t*>(start) - addr; //实际段加载的地址与elf中要求的地址偏移。
return true;
}
下面就是最关键的函数,段加载函数的分析。
bool ElfReader::LoadSegments() {
for (size_t i = 0; i < phdr_num_; ++i) {
const ElfW(Phdr)* phdr = &phdr_table_[i];
if (phdr->p_type != PT_LOAD) { //非PT_LOAD段不需要加载到内存中。
continue;
}
// Segment addresses in memory.
ElfW(Addr) seg_start = phdr->p_vaddr + load_bias_; //段起始地址
ElfW(Addr) seg_end = seg_start + phdr->p_memsz; //段结束地址
ElfW(Addr) seg_page_start = PAGE_START(seg_start); //段起始所在的内存页起始地址
ElfW(Addr) seg_page_end = PAGE_END(seg_end); //段结束所在的内存页面结束地址。
ElfW(Addr) seg_file_end = seg_start + phdr->p_filesz; //映射的文件区域在段内存的结束地址。
// File offsets.
ElfW(Addr) file_start = phdr->p_offset; //该段对应文件的开始地址
ElfW(Addr) file_end = file_start + phdr->p_filesz; //段对应文件的结束地址
ElfW(Addr) file_page_start = PAGE_START(file_start); //对应文件开始地址的页面对齐地址
ElfW(Addr) file_length = file_end - file_page_start; //要映射的文件区域的长度
if (file_size_ <= 0) {
DL_ERR("\"%s\" invalid file size: %" PRId64, name_, file_size_);
return false;
}
if (file_end >= static_cast<size_t>(file_size_)) {
DL_ERR("invalid ELF file \"%s\" load segment[%zd]:"
" p_offset (%p) + p_filesz (%p) ( = %p) past end of file (0x%" PRIx64 ")",
name_, i, reinterpret_cast<void*>(phdr->p_offset),
reinterpret_cast<void*>(phdr->p_filesz),
reinterpret_cast<void*>(file_end), file_size_);
return false;
}
if (file_length != 0) {
//映射elf文件区域到段的内存区域。该区域的读写属性通过PFLAGS_TO_PROT获取。这里一定使用MAP_PRIVATE来映射,使用页面对齐的方式映射,段之间有部分区域重合。
void* seg_addr = mmap64(reinterpret_cast<void*>(seg_page_start),
file_length,
PFLAGS_TO_PROT(phdr->p_flags),
MAP_FIXED|MAP_PRIVATE,
fd_,
file_offset_ + file_page_start);
if (seg_addr == MAP_FAILED) {
DL_ERR("couldn't map \"%s\" segment %zd: %s", name_, i, strerror(errno));
return false;
}
}
// if the segment is writable, and does not end on a page boundary,
// zero-fill it until the page limit.
if ((phdr->p_flags & PF_W) != 0 && PAGE_OFFSET(seg_file_end) > 0) { 映射文件页面未对齐的区域写0
memset(reinterpret_cast<void*>(seg_file_end), 0, PAGE_SIZE - PAGE_OFFSET(seg_file_end));
}
seg_file_end = PAGE_END(seg_file_end);
// seg_file_end is now the first page address after the file
// content. If seg_end is larger, we need to zero anything
// between them. This is done by using a private anonymous
// map for all extra pages.
if (seg_page_end > seg_file_end) {//为对应文件的部分映射,这部分一般为数据段或者bss段。
void* zeromap = mmap(reinterpret_cast<void*>(seg_file_end),
seg_page_end - seg_file_end,
PFLAGS_TO_PROT(phdr->p_flags),
MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE,
-1,
0);
if (zeromap == MAP_FAILED) {
DL_ERR("couldn't zero fill \"%s\" gap: %s", name_, strerror(errno));
return false;
}
}
}
return true;
}
// Sets loaded_phdr_ to the address of the program header table as it appears
// in the loaded segments in memory. This is in contrast with phdr_table_,
// which is temporary and will be released before the library is relocated.
bool ElfReader::FindPhdr() {
const ElfW(Phdr)* phdr_limit = phdr_table_ + phdr_num_;
// If there is a PT_PHDR, use it directly.
for (const ElfW(Phdr)* phdr = phdr_table_; phdr < phdr_limit; ++phdr) {
if (phdr->p_type == PT_PHDR) {
return CheckPhdr(load_bias_ + phdr->p_vaddr);
}
}
// Otherwise, check the first loadable segment. If its file offset
// is 0, it starts with the ELF header, and we can trivially find the
// loaded program header from it.
for (const ElfW(Phdr)* phdr = phdr_table_; phdr < phdr_limit; ++phdr) {
if (phdr->p_type == PT_LOAD) {
if (phdr->p_offset == 0) {
ElfW(Addr) elf_addr = load_bias_ + phdr->p_vaddr;
const ElfW(Ehdr)* ehdr = reinterpret_cast<const ElfW(Ehdr)*>(elf_addr);
ElfW(Addr) offset = ehdr->e_phoff;
return CheckPhdr(reinterpret_cast<ElfW(Addr)>(ehdr) + offset);
}
break;
}
}
DL_ERR("can't find loaded phdr for \"%s\"", name_);
return false;
}
这里就是对phdr的验证,phdr为程序头表第一项,tag为PT_PHDR,这里验证应该就是验证段的位置有没有加载正确,懒得分析了。