DWARF简析


1.需求

通过elf获取到原文件中的相关数据定义,例如,c语言结构体,enmu,union等。

调查后可通过elf中的DWARF信息获取, DWARF信息在ELF的.debug_info段中。


2.DWARF简介

DWARF 是一种广泛使用的标准调试信息格式,最初DWARF的设计初衷是配合ELF格式使用,不过DWARF与具体的文件格式是没有依赖关系的。DWARF这个词是中世纪幻想小说中的用语,也没有什么官方含义,后来才提出 “Debugging With Attributed Record Formats” 这个术语来作为DWARF的另一种定义。

DWARF使用DIE(Debugging Information Entry)来描述变量、数据类型、代码等,DIE中包含了标签(Tag)一系列属性(Attributes)

DWARF还定义了一些关键的数据结构,如行号表(Line Number Table)调用栈信息(Call Frame Information)等,有了这些关键数据结构之后,开发者就可以在源码级别动态添加断点、显示完整的调用栈信息、查看调用栈中指定栈帧的信息。

CU - Compilation Unit

DIE - Debugging Information Entry

  • DW_TAG_xxxx: DIE的类型
  • DW_AT_XXX: DIE的属性
  • DIE也有父子关系,兄弟关系,例如:一个structure的成员变量就是structure DIE的子DIE.

3.如何解析

1.通过readelf tool

readelf -w xxx.elf

2. 通过python lib - pyelftools

使用示例:获取linux kernel module的各种structure定义。

import argparse
import json
import os
from collections import defaultdict
from typing import Optional

from elftools.dwarf.die import DIE
from elftools.elf.elffile import ELFFile
from loguru import logger

logger.add('test.log')

Map_TypePrefix = {
    'DW_TAG_base_type': '',
    'DW_TAG_structure_type': 'struct ',
    'DW_TAG_union_type': 'union ',
    'DW_TAG_pointer_type': 'pointer '
}

Map_AnonTypes = {
    'DW_TAG_subroutine_type': 'subroutine',
    'DW_TAG_pointer_type': 'pointer',
    'DW_TAG_union_type': 'union'
}


# recursive function to get type of a DIE node
def die_type_rec(die: DIE, prev: Optional[DIE]):
    t = die.attributes.get("DW_AT_type")
    if t is None:
        # logger.debug(die)
        prefix = '* ' if prev.tag == 'DW_TAG_pointer_type' else ''

        # got a type
        if die.attributes.get("DW_AT_name"):
            # common named type with prefix
            return prefix + Map_TypePrefix.get(die.tag, f'unknown: {die.tag}') \
                + die.attributes.get("DW_AT_name").value.decode()
        elif die.tag == 'DW_TAG_structure_type' and prev.tag == 'DW_TAG_typedef':
            # typedef-ed anonymous struct
            return prefix + 'struct ' + prev.attributes.get("DW_AT_name").value.decode()
        else:
            # no name types
            return prefix + Map_AnonTypes.get(die.tag, f'unknown: {die.tag}')
    elif t.form == 'DW_FORM_ref4':
        ref = t.value
        #ref_die = dwarfinfo.get_DIE_from_refaddr(ref + die.cu.cu_offset)
        ref_die = dwarfinfo.get_DIE_from_attribute(ref)
        return die_type_rec(ref_die, die)


# recursive function to get all struct members
def die_info_rec(die: DIE, name=''):
    # logger.debug(die)
    if die.tag == 'DW_TAG_member' and die.attributes.get("DW_AT_name"):
        member_name = die.attributes.get("DW_AT_name").value.decode()
        member_type = die_type_rec(die, None)
        if die.attributes.get("DW_AT_data_member_location"):
            member_offset = die.attributes.get("DW_AT_data_member_location").value
            logger.debug('  > .{}, type: {}, offset: {}'.format(member_name, member_type, member_offset))
        elif die.attributes.get("DW_AT_bit_size") and die.attributes.get("DW_AT_data_bit_offset"):
            member_bit_size = die.attributes.get("DW_AT_bit_size").value
            member_bit_offset = die.attributes.get("DW_AT_data_bit_offset").value
            logger.debug('  > .{}, type: {}, bit_offset: {}, bit_size: {}'.format(member_name,
                                                                                  member_type, member_bit_size,
                                                                                  member_bit_offset))

        # save to return data
        if member_type.startswith('*'):
            # pointer member, change to *name -> type
            struct_data[name]['*' + member_name] = member_type[1:]
        else:
            struct_data[name][member_name] = member_type

    if die.tag == 'DW_TAG_structure_type' and die.attributes.get("DW_AT_name"):
        name = 'struct ' + die.attributes.get("DW_AT_name").value.decode()
        if die.attributes.get("DW_AT_declaration") and die.attributes.get("DW_AT_declaration").value == 1:
            logger.debug("struct {}: just a declaration".format(name))
            return

        size = die.attributes.get("DW_AT_byte_size").value
        logger.debug("{}, size:{}".format(name, size))

        # recursion into all children DIE
        for child in die.iter_children():
            die_info_rec(child, name)


def parse_top_die_by_cu(dwarfinfo):
    j = 0
    for CU in dwarfinfo.iter_CUs():
        j = j + 1
        logger.debug('  Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length']))

        # Start with the top DIE, the root for this CU's DIE tree
        top_DIE = CU.get_top_DIE()

        logger.debug("------------------------Top Die[{}] start-----------------------------------------".format(j))
        logger.debug(top_DIE)

        # Display DIEs recursively starting with top_DIE
        i = 0
        for child in top_DIE.iter_children():
            # for child in CU.iter_DIEs():
            i = i + 1
            logger.debug("Top Die[{}]->child[{}]:", j, i)
            die_info_rec(child)

        logger.debug("------------------------Top Die[{}] end-----------------------------------------".format(j))


# dict for all struct members
struct_data = defaultdict(dict)

elf_file = ".\\test.ko"

print('Processing file:', elf_file)
f = open(elf_file, 'rb')
elffile = ELFFile(f)

if not elffile.has_dwarf_info():
    print(f'ERROR: input file {elf_file} has no DWARF info')
    exit(1)

dwarfinfo = elffile.get_dwarf_info()

parse_top_die_by_cu(dwarfinfo)

f.close()

运行结果

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值