前两天网上投递了简历,面试了一家C++公司,然后对面负责人给我发了一份笔试题,题目是:
请写出一个ELF文件解析器, 需要能打印出所有segments和sections,并列出每个section和segment的映射关系。
首先了解elf是什么,它的结构是怎么样的,然后去读一下别人的源码,读懂之后,自己开始编码。
源码如下("elf.h" 头文件见后文),它会报warning,但是貌似不太影响最后结果:
#include<stdlib.h>
#include<stdio.h>
#include "elf.h"
int main(int argc, char* argv[])
{
// 参数错误
if(argc < 2)
{
printf("invalid arguments\n");
exit(0);
}
// 打开文件
FILE *fp;
fp = fopen(argv[1], "r");
if (NULL == fp)
{
printf("fail to open the file");
exit(0);
}
// 解析head
Elf64_Ehdr elf_head;
int shnum, a;
// 读取 head 到elf_head
a = fread(&elf_head, sizeof(Elf64_Ehdr), 1, fp);
if (0 == a)
{
printf("fail to read head\n");
exit(0);
}
// 判断elf文件类型
if(elf_head.e_ident[0] != 0x7F ||
elf_head.e_ident[1] != 'E' ||
elf_head.e_ident[2] != 'L' ||
elf_head.e_ident[3] != 'F')
{
printf("Not a ELF file\n");
exit(0);
}
// 解析section 分配内存 section * 数量
Elf64_Shdr *shdr = (Elf64_Shdr*)malloc(sizeof(Elf64_Shdr) * elf_head.e_shnum);
if (NULL == shdr)
{
printf("shdr malloc failed\n");
exit(0);
}
// 设置fp偏移量 offset
a = fseek(fp, elf_head.e_shoff, SEEK_SET);
if(0 != a)
{
printf("\nfaile to fseek\n");
exit(0);
}
// 读取section 到 shdr, 大小为shdr * 数量
a = fread(shdr, sizeof(Elf64_Shdr) * elf_head.e_shnum, 1, fp);
if (0 == a)
{
printf("\nfail to read section\n");
exit(0);
}
// 重置指针位置
rewind(fp);
// 将fp指针移到 字符串表偏移位置处
fseek(fp, shdr[elf_head.e_shstrndx].sh_offset, SEEK_SET);
// 第e_shstrndx项是字符串表 定义 字节 长度 char类型 数组
char shstrtab[shdr[elf_head.e_shstrndx].sh_size];
char *temp = shstrtab;
// 读取内容
a = fread(shstrtab, shdr[elf_head.e_shstrndx].sh_size, 1, fp);
if (0 == a)
{
printf("\nfaile to read\n");
}
// printf("\n\节的信息: \n");
// 遍历
// for (int i = 0; i < elf_head.e_shnum; i++)
// {
// temp = shstrtab;
// temp = temp + shdr[i].sh_name;
// printf("节的名称: %s\n", temp);
// printf("节首的偏移: %x\n", shdr[i].sh_offset);
// printf("节的大小: %x\n", shdr[i].sh_size);
// printf("节尾的地址: %x\n", shdr[i].sh_offset + shdr[i].sh_size);
// printf("\n");
// }
// 解析 segment
Elf64_Phdr *phdr = (Elf64_Phdr*)malloc(sizeof(Elf64_Phdr) * elf_head.e_phnum);
a = fseek(fp, elf_head.e_phoff, SEEK_SET);
a = fread (phdr, sizeof(Elf64_Phdr) * elf_head.e_phnum, 1, fp);
rewind(fp);
fseek(fp, phdr[elf_head.e_shentsize].p_offset, SEEK_SET);
char phstrtab[phdr[elf_head.e_shentsize].p_filesz];
a = fread(phstrtab, phdr[elf_head.e_shentsize].p_filesz, 1, fp);
printf("\n\n段的信息:\n");
for (int i = 0; i < elf_head.e_phnum; i++)
{
printf("%d: \n", i);
printf(" 该段首相对偏移: %x \n", phdr[i].p_offset);
printf(" 该段的大小: %x \n", phdr[i].p_memsz);
printf(" 该段尾相对偏移: %x \n", phdr[i].p_memsz + phdr[i].p_offset);
printf(" *该段包含的节有:\n");
for (int j = 0;j < elf_head.e_shnum; j++)
{
if (
(shdr[j].sh_offset > phdr[i].p_offset) &&
( (shdr[j].sh_offset + shdr[j].sh_size) < (phdr[i].p_offset + phdr[i].p_memsz) )
)
{
temp = shstrtab;
temp = temp + shdr[j].sh_name;
printf(" 节的名称: %s\n", temp);
printf(" 节首的偏移: %x\n", shdr[j].sh_offset);
printf(" 节的大小: %x\n", shdr[j].sh_size);
printf(" 节尾的地址: %x\n", shdr[j].sh_offset + shdr[j].sh_size);
printf("\n");
}
}
printf("\n");
}
printf("\n");
return 0;
}
#include<stdlib.h>
#include<stdio.h>
#include "elf.h"
int main(int argc, char* argv[])
{
// 参数错误
if(argc < 2)
{
printf("invalid arguments\n");
exit(0);
}
// 打开文件
FILE *fp;
fp = fopen(argv[1], "r");
if (NULL == fp)
{
printf("fail to open the file");
exit(0);
}
// 解析head
Elf64_Ehdr elf_head;
int shnum, a;
// 读取 head 到elf_head
a = fread(&elf_head, sizeof(Elf64_Ehdr), 1, fp);
if (0 == a)
{
printf("fail to read head\n");
exit(0);
}
// 判断elf文件类型
if(elf_head.e_ident[0] != 0x7F ||
elf_head.e_ident[1] != 'E' ||
elf_head.e_ident[2] != 'L' ||
elf_head.e_ident[3] != 'F')
{
printf("Not a ELF file\n");
exit(0);
}
// 解析section 分配内存 section * 数量
Elf64_Shdr *shdr = (Elf64_Shdr*)malloc(sizeof(Elf64_Shdr) * elf_head.e_shnum);
if (NULL == shdr)
{
printf("shdr malloc failed\n");
exit(0);
}
// 设置fp偏移量 offset
a = fseek(fp, elf_head.e_shoff, SEEK_SET);
if(0 != a)
{
printf("\nfaile to fseek\n");
exit(0);
}
// 读取section 到 shdr, 大小为shdr * 数量
a = fread(shdr, sizeof(Elf64_Shdr) * elf_head.e_shnum, 1, fp);
if (0 == a)
{
printf("\nfail to read section\n");
exit(0);
}
// 重置指针位置
rewind(fp);
// 将fp指针移到 字符串表偏移位置处
fseek(fp, shdr[elf_head.e_shstrndx].sh_offset, SEEK_SET);
// 第e_shstrndx项是字符串表 定义 字节 长度 char类型 数组
char shstrtab[shdr[elf_head.e_shstrndx].sh_size];
char *temp = shstrtab;
// 读取内容
a = fread(shstrtab, shdr[elf_head.e_shstrndx].sh_size, 1, fp);
if (0 == a)
{
printf("\nfaile to read\n");
}
// printf("\n\节的信息: \n");
// 遍历
// for (int i = 0; i < elf_head.e_shnum; i++)
// {
// temp = shstrtab;
// temp = temp + shdr[i].sh_name;
// printf("节的名称: %s\n", temp);
// printf("节首的偏移: %x\n", shdr[i].sh_offset);
// printf("节的大小: %x\n", shdr[i].sh_size);
// printf("节尾的地址: %x\n", shdr[i].sh_offset + shdr[i].sh_size);
// printf("\n");
// }
// 解析 segment
Elf64_Phdr *phdr = (Elf64_Phdr*)malloc(sizeof(Elf64_Phdr) * elf_head.e_phnum);
a = fseek(fp, elf_head.e_phoff, SEEK_SET);
a = fread (phdr, sizeof(Elf64_Phdr) * elf_head.e_phnum, 1, fp);
rewind(fp);
fseek(fp, phdr[elf_head.e_shentsize].p_offset, SEEK_SET);
char phstrtab[phdr[elf_head.e_shentsize].p_filesz];
a = fread(phstrtab, phdr[elf_head.e_shentsize].p_filesz, 1, fp);
printf("\n\n段的信息:\n");
for (int i = 0; i < elf_head.e_phnum; i++)
{
printf("%d: \n", i);
printf(" 该段首相对偏移: %x \n", phdr[i].p_offset);
printf(" 该段的大小: %x \n", phdr[i].p_memsz);
printf(" 该段尾相对偏移: %x \n", phdr[i].p_memsz + phdr[i].p_offset);
printf(" *该段包含的节有:\n");
for (int j = 0;j < elf_head.e_shnum; j++)
{
if ((shdr[j].sh_offset > phdr[i].p_offset) && ((shdr[j].sh_offset + shdr[j].sh_size) < (phdr[i].p_offset + phdr[i].p_memsz)))
{
temp = shstrtab;
temp = temp + shdr[j].sh_name;
printf(" 节的名称: %s\n", temp);
printf(" 节首的偏移: %x\n", shdr[j].sh_offset);
printf(" 节的大小: %x\n", shdr[j].sh_size);
printf(" 节尾的地址: %x\n", shdr[j].sh_offset + shdr[j].sh_size);
printf("\n");
}
}
printf("\n");
}
printf("\n");
return 0;
}
这里用到了一个头文件 "elf.h" ,里面定义了elf文件结构的各种数据结构,能够使解析elf的过程中更加方便:
#ifndef _QEMU_ELF_H
#define _QEMU_ELF_H
#include <inttypes.h>
/* 32-bit ELF base types. */
/* 字节 uint8_t
2字节 uint16_t
4字节 uint32_t
8字节 uint64_t */
typedef uint32_t Elf32_Addr;
typedef uint16_t Elf32_Half;
typedef uint32_t Elf32_Off;
typedef int32_t Elf32_Sword;
typedef uint32_t Elf32_Word;
/* 64-bit ELF base types. */
typedef uint64_t Elf64_Addr;
typedef uint16_t Elf64_Half;
typedef int16_t Elf64_SHalf;
typedef uint64_t Elf64_Off;
typedef int32_t Elf64_Sword;
typedef uint32_t Elf64_Word;
typedef uint64_t Elf64_Xword;
typedef int64_t Elf64_Sxword;
/* These constants are for the segment types stored in the image headers */
#define PT_NULL 0
#define PT_LOAD 1
#define PT_DYNAMIC 2
#define PT_INTERP 3
#define PT_NOTE 4
#define PT_SHLIB 5
#define PT_PHDR 6
#define PT_LOPROC 0x70000000
#define PT_HIPROC 0x7fffffff
#define PT_MIPS_REGINFO 0x70000000
#define PT_MIPS_OPTIONS 0x70000001
/* Flags in the e_flags field of the header */
/* MIPS architecture level. */
#define EF_MIPS_ARCH_1 0x00000000 /* -mips1 code. */
#define EF_MIPS_ARCH_2 0x10000000 /* -mips2 code. */
#define EF_MIPS_ARCH_3 0x20000000 /* -mips3 code. */
#define EF_MIPS_ARCH_4 0x30000000 /* -mips4 code. */
#define EF_MIPS_ARCH_5 0x40000000 /* -mips5 code. */
#define EF_MIPS_ARCH_32 0x50000000 /* MIPS32 code. */
#define EF_MIPS_ARCH_64 0x60000000 /* MIPS64 code. */
/* The ABI of a file. */
#define EF_MIPS_ABI_O32 0x00001000 /* O32 ABI. */
#define EF_MIPS_ABI_O64 0x00002000 /* O32 extended for 64 bit. */
#define EF_MIPS_NOREORDER 0x00000001
#define EF_MIPS_PIC 0x00000002
#define EF_MIPS_CPIC 0x00000004
#define EF_MIPS_ABI2 0x00000020
#define EF_MIPS_OPTIONS_FIRST 0x00000080
#define EF_MIPS_32BITMODE 0x00000100
#define EF_MIPS_ABI 0x0000f000
#define EF_MIPS_ARCH 0xf0000000
/* These constants define the different elf file types */
#define ET_NONE 0
#define ET_REL 1
#define ET_EXEC 2
#define ET_DYN 3
#define ET_CORE 4
#d