GDB内存调试初探七

非main_arena的内存分配

初探六中提到,正常情况下创建一个新的线程时,会为新线程创建独立的arena,其类型为struct malloc_state。当一个线程退出时,其动态分配的堆内存不会被释放,而是通过malloc_state结构体中的next_free指针链接保存起来(那么当再次创建新线程时可复用):

/* malloc/malloc.c */
struct malloc_state {
    ...
    struct malloc_state * next;
    struct malloc_state * next_free;
    ...
};

当一个arena现有空闲内存不能满足应用的内存分配要求时,ptmalloc内存分配模块会向Linux内核申请内存。根据是否为main_arena,内存分配的实现不同:

/* malloc/malloc.c */
static void * sysmalloc (INTERNAL_SIZE_T nb, mstate av) {
    ...
      if (av != &main_arena)
    {    
      heap_info *old_heap, *heap;
      size_t old_heap_size;

      /* First try to extend the current heap. */
      old_heap = heap_for_ptr (old_top);
      old_heap_size = old_heap->size;
      if ((long) (MINSIZE + nb - old_size) > 0
          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
        {    
          av->system_mem += old_heap->size - old_heap_size;
          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
                    | PREV_INUSE);
        }    
      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
        {    
        ...
      else     /* av == main_arena */
    { /* Request enough space for nb + pad + overhead */
      size = nb + mp_.top_pad + MINSIZE;

      /*
         If contiguous, we can subtract out existing space that we hope to
         combine with new space. We add it back later only if
         we don't actually get contiguous space.
       */

      if (contiguous (av))
        size -= old_size;

      /*
         Round to a multiple of page size.
         If MORECORE is not contiguous, this ensures that we only call it
         with whole-page arguments.  And if MORECORE is contiguous and
         this is not first time through, this preserves page-alignment of
         previous calls. Otherwise, we correct to page-align below.
       */
      size = ALIGN_UP (size, pagesize);
      /*
         Don't try to call MORECORE if argument is so big as to appear
         negative. Note that since mmap takes size_t arg, it may succeed
         below even if we cannot call MORECORE.
       */
      if (size > 0)
        {
          brk = (char *) (MORECORE (size));
          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
        }
    ...
}

对于非main_arena的内存分配(通常用于进程的子线程),ptmalloc会调用grow_heapnew_heap等函数来分配内存,这些函数与堆空间结构体struct _heap_info相关:

/* malloc/arena.c */
/* A heap is a single contiguous memory region holding (coalesceable)
   malloc_chunks.  It is allocated with mmap() and always starts at an
   address aligned to HEAP_MAX_SIZE.  */
typedef struct _heap_info {
  mstate ar_ptr; /* Arena for this heap. */
  struct _heap_info *prev; /* Previous heap. */
  size_t size;   /* Current size in bytes. */
  size_t mprotect_size; /* Size in bytes that has been mprotected
                           PROT_READ|PROT_WRITE.  */
  /* Make sure the following data is properly aligned, particularly
     that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of
     MALLOC_ALIGNMENT. */
  char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK];
} heap_info;

堆空间最大的大小是固定的,对于32位的glibc而言该值通常为1MB;对于64位而言为64MB。非main_arena可以包含多个heap_info。不过malloc_state结构体中没有指向heap_info的结构体,参考以下函数可以了解从非main_arena获取堆指针的操作:

/* malloc/arena.c: */
/* find the heap and corresponding arena for a given ptr */
#define heap_for_ptr(ptr) \
  ((heap_info *) ((unsigned long) (ptr) & ~(HEAP_MAX_SIZE - 1)))
#define arena_for_chunk(ptr) \
  (chunk_main_arena (ptr) ? &main_arena : heap_for_ptr (ptr)->ar_ptr)

/* malloc/malloc.c: */
  /* Record incoming configuration of top */
  old_top = av->top;
  ...
  if (av != &main_arena)
    {
      heap_info *old_heap, *heap;
      size_t old_heap_size;

      /* First try to extend the current heap. */
      old_heap = heap_for_ptr (old_top);
      old_heap_size = old_heap->size;

对于main_arena的内存分配,会调用brksbrk等系统调用以分配内存。这样的一个结果是,main_arena所分配的内存是连续的(除大片内存通过mmap系统调用直接分配外):这是main_arena与非main_arena重要的区别。对于后者,可以轻易获得其所有的堆内存,解析其分配的内存(这正是内存初探六中的代码实现);对于前者,需要通过特定的计算才能获得分配内存的起始地址,暂时不讨论。

解析内存的分配

内存初探六的代码基础上,笔者增加了解析分配的内存的解析功能。ptmalloc为了尽量减少内存碎片,并减少已分配或释放的内存信息的大小,使用malloc_chunk结构体表示一个段内存:

/*
  This struct declaration is misleading (but accurate and necessary).
  It declares a "view" into memory allowing access to necessary
  fields at known offsets from a given base. See explanation below.
*/
struct malloc_chunk {
  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */
  struct malloc_chunk* fd;         /* double links -- used only if free. */
  struct malloc_chunk* bk;
  /* Only used for large blocks: pointer to next larger size.  */
  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
  struct malloc_chunk* bk_nextsize;
};

对于一个malloc_chunk在任何情况下,mchunk_size都是有效的;其保存了整段内存的大小(以及该段内存相关的信息),包括该结构体。为应用分配的内存地址为chunk起始地址偏移两个INTERNAL_SIZE_T大小:

/* conversion from malloc headers to user pointers, and back */
#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))

不过,应用可访问的内存空间大小却不是mchunk_size - 2 * SIZE_SZ,而是mchunk_size - SIZE_SZ。当一段内存在使用时,mchunk_prev_size是无效的;其占用的内存空间可以被应用正常读写。由此可见,ptmalloc内存的管理是相当高效的。笔者对ptmalloc.py解析脚本的修改如下:

diff --git a/ptmalloc.py b/ptmalloc.py
index d16cb0f..0c13e83 100644
--- a/ptmalloc.py
+++ b/ptmalloc.py
@@ -178,6 +178,47 @@ class ptmalloc(gdb.Command):
         gdb.write('\tfree chunk size:  {:#x} ({})\n'.format(mtotal, self.dump_size(mtotal)))
         return True
 
+    def dump_chunks(self, first_chunk, total_size, verbose=False):
+        cur_size = 0
+        num_chunks = 0
+        prev_chunk = None
+        chunk = first_chunk.cast(self.type_mchunk_ptr)
+        while True:
+            mchunk_size = int(chunk['mchunk_size'].cast(self.type_ulong))
+            chunk_size = mchunk_size & ~0x7
+            if chunk_size == 0:
+                break
+            cursize = cur_size + chunk_size
+            chunk_addr = int(chunk.cast(self.type_ulong))
+            if cursize > total_size:
+                gdb.write('\t\tError, invalid chunk at {:#x}, size: {:#x}\n'.format(
+                    chunk_addr, chunk_size), gdb.STDERR)
+                gdb.flush(gdb.STDERR)
+                break
+            if verbose:
+                prev_inuse = (mchunk_size & 0x1) != 0
+                is_mmapped = (mchunk_size & 0x2) != 0
+                non_main = (mchunk_size & 0x4) != 0
+                gdb.write('\t\tchunk[{:#x}], size: {:#010x}, prev_inuse: {}'.format(
+                    chunk_addr, chunk_size, prev_inuse), gdb.STDERR)
+                gdb.write(', mapped: {}, non_main: {}\n'.format(
+                    is_mmapped, non_main), gdb.STDERR)
+                gdb.flush(gdb.STDERR)
+            num_chunks += 1
+            cur_size = cursize; prev_chunk = chunk
+            chunk_addr = chunk.cast(self.type_ulong) + chunk_size
+            chunk = chunk_addr.cast(self.type_mchunk_ptr)
+        if prev_chunk is not None:
+            msize = int(prev_chunk['mchunk_size'].cast(self.type_ulong))
+            msize &= ~0x7
+            gdb.write('\t\tPrev chunk at {:#x}, size: {:#x}\n'.format(
+                int(prev_chunk.cast(self.type_ulong)), msize), gdb.STDERR)
+            gdb.flush(gdb.STDERR)
+        gdb.write('\t\tHeap: number of chunks: {}, total size: {:#x}, {:#x}\n'.format(
+            num_chunks, total_size, cur_size), gdb.STDERR)
+        gdb.flush(gdb.STDERR)
+        return True
+
     def dump_heaps(self, mstate, maddr):
         if self.is_main_arena(mstate):
             # main_arena does not have associated heap structure ?
@@ -202,6 +243,7 @@ class ptmalloc(gdb.Command):
             gdb.write('\t\tarena for heap:    {:#x}\n'.format(m_addr))
             gdb.write('\t\tsize for heap:     {:#x} ({})\n'.format(
                 heap_size, self.dump_size(heap_size)))
+            self.dump_chunks(chunk_addr, heap_size)

笔者在内存初探五中用Lua脚本写过类似的功能:需要通过gdb调试器手动将main_arena内存导出为文件,对于多结程、大量内存的应用,前一种方法并不适用。

多线程的内存检测

笔者编写了简单的多线程内存分配调试应用,它存在一个内存访问越界的缺陷:

if (prev != NULL) {
    unsigned long * badptr;
    badptr = (unsigned long *) (prev->mptr + prev->mlen);
    fprintf(stdout, "Corrupting buffer: %p\n", badptr);
    fflush(stdout);
    *badptr = 0x20210907;
    mbad = 0;
}

通过以上的内存检测,可以检测到相关的内存错误:

Program terminated with signal SIGABRT, Aborted.
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
50	../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
[Current thread is 1 (Thread 0x7ffff5593700 (LWP 6148))]
(gdb) bt
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1  0x00007ffff7dc1859 in __GI_abort () at abort.c:79
#2  0x00007ffff7e2c3ee in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff7f56285 "%s\n") at ../sysdeps/posix/libc_fatal.c:155
#3  0x00007ffff7e3447c in malloc_printerr (str=str@entry=0x7ffff7f586b8 "free(): invalid next size (normal)") at malloc.c:5347
#4  0x00007ffff7e35fbc in _int_free (av=0x7fffd8000020, p=0x7fffd9081210, have_lock=<optimized out>) at malloc.c:4322
#5  0x0000555555555336 in memblock_free (pmb=<optimized out>) at multi-thread-memory.c:158
#6  memblock_remove (head=0x7fffda5168f0, where=<optimized out>) at multi-thread-memory.c:158
#7  0x0000555555555649 in thread_func (tharg=0x7fffffffdd28) at multi-thread-memory.c:246
#8  0x00007ffff7f97609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#9  0x00007ffff7ebe293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) source ptmalloc.py
(gdb) ptmalloc
     ......
Arena at address 0x7fffd8000020 ->
	main_arena:       False
	mutex:            1
	flags:            0x2
	have_fastchunks:  0
	attached_threads: 1
	system_mem:       0x4162000 (65 MB, 392.00 KB)
	max_mem:          0x4162000 (65 MB, 392.00 KB)
	heap[0] at address: 0x7fffb0000000
		chunk for heap:    0x7fffb0000020
		arena for heap:    0x7fffd8000020
		size for heap:     0x165000 (1 MB, 404.00 KB)
		Prev chunk at 0x7fffb0164880, size: 0x780
		Heap: number of chunks: 15, total size: 0x165000, 0x164fe0
	heap[1] at address: 0x7fffd8000000
		chunk for heap:    0x7fffd80008c0
		arena for heap:    0x7fffd8000020
		size for heap:     0x3ffd000 (63 MB, 1012.00 KB)
		Error, invalid chunk at 0x7fffd909ae20, size: 0x20210900
		Prev chunk at 0x7fffd9081210, size: 0x19c10
		Heap: number of chunks: 312, total size: 0x3ffd000, 0x109a560

通过gdb可以查看异常内存及上面内存访问越界写入的0x20210907:

(gdb) x/8xg 0x7fffd9081210
0x7fffd9081210:	0x0000000000000000	0x0000000000019c15
0x7fffd9081220:	0x00007fffd81bc290	0x00007fffd91f94a0
0x7fffd9081230:	0x00007fffd9081240	0x0000000000019be8
0x7fffd9081240:	0xbd280999ec1c10bc	0x6cb8b1209928da29
(gdb) x/8xg 0x7fffd9081210+0x19c10
0x7fffd909ae20:	0x6d0aa59e0a5a3d7c	0x0000000020210907
0x7fffd909ae30:	0x00007fffdbee0220	0x00007fffdbb64210
0x7fffd909ae40:	0x00007fffd909ae50	0x00000000000052e0
0x7fffd909ae50:	0x255404a6d65fdbf0	0xc88ea18416a0218d

被踩的内存大小为8字节,对应malloc_chunk结构体的mchunk_size字段。注意,0x7fffd909ae20地址处的8字节可能是有效的,这取决于chunk[0x7fffd9081210]有没有被应用释放。

内存访问越界问题的定位

本文主要记录了,笔者在前一篇文章的基础上增加的多线程内存解析的gdb扩展代码,对内存解析的功能。它能够检测到异常的内存。内存访问越界的问题笔者此前提出两种解决方法,分别注重不同的关键点:通过mprotect直接定位到内存访问越界的代码;以malloc_chunk为基础,通过解析内存定位到异常内存,及异常内存的特点,之后获取分配该内存的返回地址从而得到异常内段的分配代码。通常内存通常存在一定的规律,例如笔者的调试应用在越界处写入了固定的值0x20210907;异常内存的大小有时候是固定的,也可以帮助我们逼近有问题的应用代码。此外,发生内存访问越界的问题时,修改内存的可能不是应用,还有可能是内核、DMA驱动等;这些内存异常的差异在实际的定位过程中都需要考虑到。ptmalloc内存管理模块提供了malloc_hook,但笔者从未使用过。参考其手册的代码,可以认为其不适合多线程的调试,这与ptmalloc定义的全局钩子变量相关:

       void *(*volatile __malloc_hook)(size_t size, const void *caller);
       void *(*volatile __realloc_hook)(void *ptr, size_t size, const void *caller);
       void *(*volatile __memalign_hook)(size_t alignment, size_t size, const void *caller);
       void (*volatile __free_hook)(void *ptr, const void *caller);

多线程访问这些变量是不可靠的,而加入互斥机制又会降低软件的性能,因此不推荐使用。

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值