eh_frame
所有的dwarf都在eh_frame的节当中,具体格式如下:
一般一个cie对应多个fde,一个fde对应相应函数的寄存器恢复信息,不仅仅是栈信息。
当我们拿到eh_frame后,就需要解析cie和fde.
(http://dwarfstd.org/doc/DWARF4.pdf) 官方文档有很详细的东西,不适合入门感觉。根据结尾的例子来分别讲解cie和fde.
common information entry encoding
R0 =0;
R1 保存返回地址
R2-R3 临时寄存器
R4-R6 保存在栈上
R7 栈顶指针 (rsp)
table D.4中
1.R8 是返回地址
2.s 表示一样的值
3.u标识undefined rule
4.rN 寄存器规则
5.cN 偏移规则
6 a 架构规则
cie : common information entry encoding
cie length 如果是0xffffffff,则会有extend length 字段,标识为64位dwarf,但我看到的一般是32位dwarf,这里的位数和可执行文件的位数不对应。长度后面是结束cie的全部36长度的内容。
augmentation 用来表示cie
这里有详细解释。
https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
readelf -wF
Contents of the .eh_frame section:
00000000 00000014 00000000 CIE "zR" cf=1 df=-8 ra=16
LOC CFA ra
0000000000000000 rsp+8 c-8
00000018 0000001c 0000001c FDE cie=00000000 pc=35fb01ea60..35fb01ea88
LOC CFA rbx ra
00000035fb01ea60 rsp+8 u c-8
00000035fb01ea68 rsp+16 c-16 c-8
00000035fb01ea87 rsp+8 c-16 c-8
readelf -wf
Contents of the .eh_frame section:
00000000 00000014 00000000 CIE
Version: 1
Augmentation: "zR"
Code alignment factor: 1
Data alignment factor: -8
Return address column: 16
Augmentation data: 1b
DW_CFA_def_cfa: r7 (rsp) ofs 8
DW_CFA_offset: r16 (rip) at cfa-8
DW_CFA_nop
DW_CFA_nop
00000018 0000001c 0000001c FDE cie=00000000 pc=35fb01ea60..35fb01ea88
DW_CFA_advance_loc: 8 to 35fb01ea68
DW_CFA_def_cfa_offset: 16
DW_CFA_offset: r3 (rbx) at cfa-16
DW_CFA_advance_loc: 31 to 35fb01ea87
DW_CFA_def_cfa_offset: 8
DW_CFA_nop
DW_CFA_nop
DW_CFA_nop
DW_CFA_nop
DW_CFA_nop
DW_CFA_nop
DW_CFA_nop
通过对比得知, cfa = rsp + DW_CFA_def_cfa_offset; rbx = cfa - DW_CFA_offset;
frame description entry encoding
argument_data_length 该字段仅在CIE扩展字符串以“ z”开头的情况下出现。对应一个无符号LEB128,这是FDE扩展数据的总大小。这可用于跳过与无法识别的扩展字符关联的数据。
argumnet_data 如果CIE未将LSDA编码设置为DW_EH_PE_omit,则Augmentation Data中包含指向LSDA的指针,该指针由CIE指定编码(LSDA encoding)。
这里显示的没有扩展字符z的fde。
之后就是对instruction进行解码,然后状态恢复。
instruction | Description |
---|---|
DW_CFA_advance_loc | Location += (delta * code_alignment_factor) |
DW_CFA_advance_loc1 | Location += (delta * code_alignment_factor) DW_CFA_advance_loc1指令采用一个表示常量增量的单个ubyte操作数。除了增量操作数的编码和大小外,该指令与DW_CFA_advance_loc相同。 |
DW_CFA_def_cfa | register = new register num; offset = new offset; CFA = register + offset |
DW_CFA_def_cfa_offset | offset = new offset ; CFA = old register + offset ;DW_CFA_def_cfa_offset指令采用单个无符号LEB128操作数表示一个(未分解的)偏移量。 所需的操作是定义当前的CFA规则以使用提供的偏移量(但保留旧寄存器)。仅当当前CFA规则定义为使用寄存器和偏移量时,此操作才有效。 |
DW_CFA_offset offset(N) | offset = (factored offset * data_alignment_factor); reg num = *(CFA + offset) ;DW_CFA_offset指令采用两个操作数:一个寄存器号(用操作码编码)和一个无符号的LEB128常量,表示因数偏移量。 所需的操作是将由寄存器编号指示的寄存器的规则更改为offset(N)规则,其中N的值是fac→redoffset⋅dataalignmentfac→r |
00000000 00000014 00000000 CIE "zR" cf=1 df=-8 ra=16
LOC CFA ra
0000000000000000 rsp+8 c-8
LOC CFA rbx ra
00000035fb01ea60 rsp+8 u c-8
00000035fb01ea68 rsp+16 c-16 c-8
00000035fb01ea87 rsp+8 c-16 c-8
DW_CFA_def_cfa: r7 (rsp) ofs 8
DW_CFA_offset: r16 (rip) at cfa-8
DW_CFA_advance_loc: 8 to 35fb01ea68
DW_CFA_def_cfa_offset: 16
DW_CFA_offset: r3 (rbx) at cfa-16
DW_CFA_advance_loc: 31 to 35fb01ea87
DW_CFA_def_cfa_offset: 8
R8 是返回地址
R7 栈顶指针 (rsp)
代码实现
对比了gcc gdb 和readelf,其中gcc的代码较友好。
gcc 4.8.5
static _Unwind_Reason_Code
uw_frame_state_for (struct _Unwind_Context *context, _Unwind_FrameState *fs)
{
const struct dwarf_fde *fde;
const struct dwarf_cie *cie;
const unsigned char *aug, *insn, *end;
memset (fs, 0, sizeof (*fs));
context->args_size = 0;
context->lsda = 0;
if (context->ra == 0)
return _URC_END_OF_STACK;
fde = _Unwind_Find_FDE (context->ra + _Unwind_IsSignalFrame (context) - 1,
&context->bases);
if (fde == NULL)
{
#ifdef MD_FALLBACK_FRAME_STATE_FOR
/* Couldn't find frame unwind info for this function. Try a
target-specific fallback mechanism. This will necessarily
not provide a personality routine or LSDA. */
return MD_FALLBACK_FRAME_STATE_FOR (context, fs);
#else
return _URC_END_OF_STACK;
#endif
}
fs->pc = context->bases.func;
cie = get_cie (fde);
insn = extract_cie_info (cie, context, fs);
if (insn == NULL)
/* CIE contained unknown augmentation. */
return _URC_FATAL_PHASE1_ERROR;
/* First decode all the insns in the CIE. */
end = (const unsigned char *) next_fde ((const struct dwarf_fde *) cie);
execute_cfa_program (insn, end, context, fs);
/* Locate augmentation for the fde. */
aug = (const unsigned char *) fde + sizeof (*fde);
aug += 2 * size_of_encoded_value (fs->fde_encoding);
insn = NULL;
if (fs->saw_z)
{
_uleb128_t i;
aug = read_uleb128 (aug, &i);
insn = aug + i;
}
if (fs->lsda_encoding != DW_EH_PE_omit)
{
_Unwind_Ptr lsda;
aug = read_encoded_value (context, fs->lsda_encoding, aug, &lsda);
context->lsda = (void *) lsda;
}
/* Then the insns in the FDE up to our target PC. */
if (insn == NULL)
insn = aug;
end = (const unsigned char *) next_fde (fde);
execute_cfa_program (insn, end, context, fs);
return _URC_NO_REASON;
}
/* Decode DWARF 2 call frame information. Takes pointers the
instruction sequence to decode, current register information and
CIE info, and the PC range to evaluate. */
static void
execute_cfa_program (const unsigned char *insn_ptr,
const unsigned char *insn_end,
struct _Unwind_Context *context,
_Unwind_FrameState *fs)
{
struct frame_state_reg_info *unused_rs = NULL;
/* Don't allow remember/restore between CIE and FDE programs. */
fs->regs.prev = NULL;
/* The comparison with the return address uses < rather than <= because
we are only interested in the effects of code before the call; for a
noreturn function, the return address may point to unrelated code with
a different stack configuration that we are not interested in. We
assume that the call itself is unwind info-neutral; if not, or if
there are delay instructions that adjust the stack, these must be
reflected at the point immediately before the call insn.
In signal frames, return address is after last completed instruction,
so we add 1 to return address to make the comparison <=. */
while (insn_ptr < insn_end
&& fs->pc < context->ra + _Unwind_IsSignalFrame (context))
{
unsigned char insn = *insn_ptr++;
_uleb128_t reg, utmp;
_sleb128_t offset, stmp;
if ((insn & 0xc0) == DW_CFA_advance_loc)
fs->pc += (insn & 0x3f) * fs->code_align;
else if ((insn & 0xc0) == DW_CFA_offset)
{
reg = insn & 0x3f;
insn_ptr = read_uleb128 (insn_ptr, &utmp);
offset = (_Unwind_Sword) utmp * fs->data_align;
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
{
fs->regs.reg[reg].how = REG_SAVED_OFFSET;
fs->regs.reg[reg].loc.offset = offset;
}
}
else if ((insn & 0xc0) == DW_CFA_restore)
{
reg = insn & 0x3f;
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
fs->regs.reg[reg].how = REG_UNSAVED;
}
else switch (insn)
{
case DW_CFA_set_loc:
{
_Unwind_Ptr pc;
insn_ptr = read_encoded_value (context, fs->fde_encoding,
insn_ptr, &pc);
fs->pc = (void *) pc;
}
break;
case DW_CFA_advance_loc1:
fs->pc += read_1u (insn_ptr) * fs->code_align;
insn_ptr += 1;
break;
case DW_CFA_advance_loc2:
fs->pc += read_2u (insn_ptr) * fs->code_align;
insn_ptr += 2;
break;
case DW_CFA_advance_loc4:
fs->pc += read_4u (insn_ptr) * fs->code_align;
insn_ptr += 4;
break;
case DW_CFA_offset_extended:
insn_ptr = read_uleb128 (insn_ptr, ®);
insn_ptr = read_uleb128 (insn_ptr, &utmp);
offset = (_Unwind_Sword) utmp * fs->data_align;
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
{
fs->regs.reg[reg].how = REG_SAVED_OFFSET;
fs->regs.reg[reg].loc.offset = offset;
}
break;
case DW_CFA_restore_extended:
insn_ptr = read_uleb128 (insn_ptr, ®);
/* FIXME, this is wrong; the CIE might have said that the
register was saved somewhere. */
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
fs->regs.reg[reg].how = REG_UNSAVED;
break;
case DW_CFA_same_value:
insn_ptr = read_uleb128 (insn_ptr, ®);
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
fs->regs.reg[reg].how = REG_UNSAVED;
break;
case DW_CFA_undefined:
insn_ptr = read_uleb128 (insn_ptr, ®);
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
fs->regs.reg[reg].how = REG_UNDEFINED;
break;
case DW_CFA_nop:
break;
case DW_CFA_register:
{
_uleb128_t reg2;
insn_ptr = read_uleb128 (insn_ptr, ®);
insn_ptr = read_uleb128 (insn_ptr, ®2);
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
{
fs->regs.reg[reg].how = REG_SAVED_REG;
fs->regs.reg[reg].loc.reg = (_Unwind_Word)reg2;
}
}
break;
case DW_CFA_remember_state:
{
struct frame_state_reg_info *new_rs;
if (unused_rs)
{
new_rs = unused_rs;
unused_rs = unused_rs->prev;
}
else
new_rs = alloca (sizeof (struct frame_state_reg_info));
*new_rs = fs->regs;
fs->regs.prev = new_rs;
}
break;
case DW_CFA_restore_state:
{
struct frame_state_reg_info *old_rs = fs->regs.prev;
fs->regs = *old_rs;
old_rs->prev = unused_rs;
unused_rs = old_rs;
}
break;
case DW_CFA_def_cfa:
insn_ptr = read_uleb128 (insn_ptr, &utmp);
fs->regs.cfa_reg = (_Unwind_Word)utmp;
insn_ptr = read_uleb128 (insn_ptr, &utmp);
fs->regs.cfa_offset = (_Unwind_Word)utmp;
fs->regs.cfa_how = CFA_REG_OFFSET;
break;
case DW_CFA_def_cfa_register:
insn_ptr = read_uleb128 (insn_ptr, &utmp);
fs->regs.cfa_reg = (_Unwind_Word)utmp;
fs->regs.cfa_how = CFA_REG_OFFSET;
break;
case DW_CFA_def_cfa_offset:
insn_ptr = read_uleb128 (insn_ptr, &utmp);
fs->regs.cfa_offset = utmp;
/* cfa_how deliberately not set. */
break;
case DW_CFA_def_cfa_expression:
fs->regs.cfa_exp = insn_ptr;
fs->regs.cfa_how = CFA_EXP;
insn_ptr = read_uleb128 (insn_ptr, &utmp);
insn_ptr += utmp;
break;
case DW_CFA_expression:
insn_ptr = read_uleb128 (insn_ptr, ®);
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
{
fs->regs.reg[reg].how = REG_SAVED_EXP;
fs->regs.reg[reg].loc.exp = insn_ptr;
}
insn_ptr = read_uleb128 (insn_ptr, &utmp);
insn_ptr += utmp;
break;
/* Dwarf3. */
case DW_CFA_offset_extended_sf:
insn_ptr = read_uleb128 (insn_ptr, ®);
insn_ptr = read_sleb128 (insn_ptr, &stmp);
offset = stmp * fs->data_align;
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
{
fs->regs.reg[reg].how = REG_SAVED_OFFSET;
fs->regs.reg[reg].loc.offset = offset;
}
break;
case DW_CFA_def_cfa_sf:
insn_ptr = read_uleb128 (insn_ptr, &utmp);
fs->regs.cfa_reg = (_Unwind_Word)utmp;
insn_ptr = read_sleb128 (insn_ptr, &stmp);
fs->regs.cfa_offset = (_Unwind_Sword)stmp;
fs->regs.cfa_how = CFA_REG_OFFSET;
fs->regs.cfa_offset *= fs->data_align;
break;
case DW_CFA_def_cfa_offset_sf:
insn_ptr = read_sleb128 (insn_ptr, &stmp);
fs->regs.cfa_offset = (_Unwind_Sword)stmp;
fs->regs.cfa_offset *= fs->data_align;
/* cfa_how deliberately not set. */
break;
case DW_CFA_val_offset:
insn_ptr = read_uleb128 (insn_ptr, ®);
insn_ptr = read_uleb128 (insn_ptr, &utmp);
offset = (_Unwind_Sword) utmp * fs->data_align;
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
{
fs->regs.reg[reg].how = REG_SAVED_VAL_OFFSET;
fs->regs.reg[reg].loc.offset = offset;
}
break;
case DW_CFA_val_offset_sf:
insn_ptr = read_uleb128 (insn_ptr, ®);
insn_ptr = read_sleb128 (insn_ptr, &stmp);
offset = stmp * fs->data_align;
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
{
fs->regs.reg[reg].how = REG_SAVED_VAL_OFFSET;
fs->regs.reg[reg].loc.offset = offset;
}
break;
case DW_CFA_val_expression:
insn_ptr = read_uleb128 (insn_ptr, ®);
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
{
fs->regs.reg[reg].how = REG_SAVED_VAL_EXP;
fs->regs.reg[reg].loc.exp = insn_ptr;
}
insn_ptr = read_uleb128 (insn_ptr, &utmp);
insn_ptr += utmp;
break;
case DW_CFA_GNU_window_save:
/* ??? Hardcoded for SPARC register window configuration. */
if (DWARF_FRAME_REGISTERS >= 32)
for (reg = 16; reg < 32; ++reg)
{
fs->regs.reg[reg].how = REG_SAVED_OFFSET;
fs->regs.reg[reg].loc.offset = (reg - 16) * sizeof (void *);
}
break;
case DW_CFA_GNU_args_size:
insn_ptr = read_uleb128 (insn_ptr, &utmp);
context->args_size = (_Unwind_Word)utmp;
break;
case DW_CFA_GNU_negative_offset_extended:
/* Obsoleted by DW_CFA_offset_extended_sf, but used by
older PowerPC code. */
insn_ptr = read_uleb128 (insn_ptr, ®);
insn_ptr = read_uleb128 (insn_ptr, &utmp);
offset = (_Unwind_Word) utmp * fs->data_align;
reg = DWARF_REG_TO_UNWIND_COLUMN (reg);
if (UNWIND_COLUMN_IN_RANGE (reg))
{
fs->regs.reg[reg].how = REG_SAVED_OFFSET;
fs->regs.reg[reg].loc.offset = -offset;
}
break;
default:
gcc_unreachable ();
}
}
}
关于文件rva 和 内存rva
这两个值因为对齐会产生差异, unwind信息里面记录的是内存rva, 所以利用文件rva进行查找会导致错误。以上是两个rva的关系图。
案例
Contents of the .eh_frame section:
00000000 00000010 00000000 CIE
Version: 1
Augmentation: "zRS"
Code alignment factor: 1
Data alignment factor: -4
Return address column: 8
Augmentation data: 1b
DW_CFA_nop
DW_CFA_nop
.......
000000c8 00000014 00000000 CIE
Version: 1
Augmentation: "zR"
Code alignment factor: 1
Data alignment factor: -4
Return address column: 8
Augmentation data: 1b
DW_CFA_def_cfa: r4 (esp) ofs 4
DW_CFA_offset: r8 (eip) at cfa-4
DW_CFA_nop
DW_CFA_nop
000000e0 00000024 0000001c FDE cie=000000c8 pc=ffffe420..ffffe434
DW_CFA_advance_loc: 1 to ffffe421
DW_CFA_def_cfa_offset: 8
DW_CFA_advance_loc: 1 to ffffe422
DW_CFA_def_cfa_offset: 12
DW_CFA_advance_loc: 1 to ffffe423
DW_CFA_def_cfa_offset: 16
DW_CFA_offset: r5 (ebp) at cfa-16
DW_CFA_advance_loc: 14 to ffffe431
DW_CFA_def_cfa_offset: 12
DW_CFA_restore: r5 (ebp)
DW_CFA_advance_loc: 1 to ffffe432
DW_CFA_def_cfa_offset: 8
DW_CFA_advance_loc: 1 to ffffe433
DW_CFA_def_cfa_offset: 4
DW_CFA_nop
DW_CFA_nop
Contents of the .eh_frame section:
00000000 00000010 00000000 CIE "zRS" cf=1 df=-4 ra=8
LOC CFA
00000000 eax+0
00000014 00000068 00000018 FDE cie=00000000 pc=ffffe3ff..ffffe408
LOC CFA eax ecx edx ebx ebp esi edi ra
ffffe3ff exp exp exp exp exp exp exp exp exp
ffffe401 exp exp exp exp exp exp exp exp exp
00000080 00000044 00000084 FDE cie=00000000 pc=ffffe40f..ffffe417
LOC CFA eax ecx edx ebx ebp esi edi ra
ffffe40f exp exp exp exp exp exp exp exp exp
000000c8 00000014 00000000 CIE "zR" cf=1 df=-4 ra=8
LOC CFA ra
00000000 esp+4 c-4
000000e0 00000024 0000001c FDE cie=000000c8 pc=ffffe420..ffffe434
LOC CFA ebp ra
ffffe420 esp+4 u c-4
ffffe421 esp+8 u c-4
ffffe422 esp+12 u c-4
ffffe423 esp+16 c-16 c-4
ffffe431 esp+12 c-4
ffffe432 esp+8 c-4
ffffe433 esp+4 c-4
DW_CFA_def_cfa cfa_register 4 cfa_offset 4
linux-vdso insert unwind ip: 0xffffe420 length: 0x1 end: 0xffffe421 hash a515582b
case DW_CFA_def_cfa: {
op1 = parse_eh_frame_uleb(cfa, cfa_length, &cursor);
op2 = parse_eh_frame_uleb(cfa, cfa_length, &cursor);
//info("DW_CFA_def_cfa cfa_register %lx cfa_offset %lx \n", op1, op2);
state->cfa_register = op1;
state->cfa_offset = op2;
break;
}
state->cfa_register == QAX_REG_SP
cfa 的值 定义寄存器的值+ 相应的偏移;cfa = context->sp + state->cfa_offset
state->saved_registers[QAX_REG_IP].from == REG_CFA cfa + offset
off = state->saved_registers[QAX_REG_BP].value ;
addr = cfa + off ;
state->saved_registers[QAX_REG_IP].from == REG_REG 指定寄存器
context->ip = context->di ;
QAX_REG_IP 16
QAX_REG_BP 6
case DW_CFA_offset: {
op2 = parse_eh_frame_uleb(cfa, cfa_length, &cursor);
state->saved_registers[op1].from = REG_CFA;
state->saved_registers[op1].value = op2 * df;
break;
}
自己的工具
[INFO] pc: ffffc900047b0960, base: 000000004254ec40, length: 00000000000001b7
[INFO] DW_CFA_def_cfa cfa_register 4 cfa_offset 4
[INFO] DW_CFA_offset from 8 cfa_offset 1 value fffffffffffffffc
[INFO] DW_CFA_offset from 5 cfa_offset 2 value fffffffffffffff8
[INFO] DW_CFA_offset from 3 cfa_offset 5 value ffffffffffffffec
[INFO] DW_CFA_offset from 6 cfa_offset 4 value fffffffffffffff0
[INFO] DW_CFA_offset from 7 cfa_offset 3 value fffffffffffffff4
readelf -wf
000000e8 0000001c 000000ec FDE cie=00000000 pc=4254ec40..4254edf7
DW_CFA_advance_loc: 1 to 4254ec41
DW_CFA_def_cfa_offset: 8
DW_CFA_offset: r5 (ebp) at cfa-8
DW_CFA_advance_loc: 2 to 4254ec43
DW_CFA_def_cfa_register: r5 (ebp)
DW_CFA_advance_loc: 3 to 4254ec46
DW_CFA_offset: r3 (ebx) at cfa-20
DW_CFA_offset: r6 (esi) at cfa-16
DW_CFA_offset: r7 (edi) at cfa-12
出问题的地址0x000000004254ed28,自己工具显示这个地址找不到cfa。
通过readelf发现,cfa 位于 DW_CFA_def_cfa_register: r5 (ebp)。所以只要找到ebp就能找到cfa.
继续跟踪发现 cfa_register 的值为ebp时 #define REG_BP (6), 但对自己的工具进行过滤后 cfa_register 此时却是为5
说明32位的 cfa_register == 5 是也是ebp代表cfa。增加 REG_BP_O标志,对新字节码进行解析。成功获取到cfa。之后就是根据cfa计算其他各个寄存器。
32位中 sp 不能从 old_rsp中获取,只能从task_pt_regs(current)->sp;中的到。
整个过程的核心其实就是对cfa值的获取。之后就是根据偏移计算其他寄存器。
修复逻辑的重点在于字节码的日志过滤,获取到相应地址的cfa。