前言
主线程内存管理比较简单,main_arena+一块大内存放很多chunk完事;子线程没有主线程那么简单,多涉及到一个结构体heap_info用来描述一块大内存(默认大小为HEAP_MAX_SIZE, 64M),一个不够用时就再来一个。
上图
此图着重描述的是子线程,一个heap(由heap_info结构体描述)用完,需要另一个的情况。
子线程内存特点+代码
1. 第一个heap物理内存上从低地址到高地址依次是:heap_info+malloc_state(arena)+chunks
/*
arena.c
#0 new_heap (size=size@entry=6328, top_pad=131072) at arena.c:528
#1 0x00007ffff7895c2a in _int_new_arena (size=4096) at arena.c:720
#2 arena_get2 (a_tsd=a_tsd@entry=0x0, size=size@entry=4096, avoid_arena=avoid_arena@entry=0x0) at arena.c:871
#3 0x00007ffff78963b6 in __GI___libc_malloc (bytes=4096) at malloc.c:2856
*/
/* Create a new arena with initial size "size". */
static mstate
_int_new_arena(size_t size)
{
mstate a;
heap_info *h;
char *ptr;
unsigned long misalign;
h = new_heap(size + (sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT),
mp_.top_pad);
if(!h) {
/* Maybe size is too large to fit in a single heap. So, just try
to create a minimally-sized arena and let _int_malloc() attempt
to deal with the large request via mmap_chunk(). */
h = new_heap(sizeof(*h) + sizeof(*a) + MALLOC_ALIGNMENT, mp_.top_pad);
if(!h)
return 0;
}
a = h->ar_ptr = (mstate)(h+1);
2. 第二个heap没有malloc_state(arena),其heap_info.ar_ptr指向第一个heap里的arena
/*
#0 new_heap (size=size@entry=4176, top_pad=131072) at arena.c:528
#1 0x00007ffff7894ad1 in sysmalloc (av=0x7ffff0000020, nb=4112) at malloc.c:2390
#2 _int_malloc (av=av@entry=0x7ffff0000020, bytes=bytes@entry=4096) at malloc.c:3718
#3 0x00007ffff78963d2 in __GI___libc_malloc (bytes=4096) at malloc.c:2859
*/
static void *sysmalloc(INTERNAL_SIZE_T nb, mstate av)
...
else if ((heap = new_heap(nb + (MINSIZE + sizeof(*heap)), mp_.top_pad)))
{
/* Use a newly allocated heap. */
heap->ar_ptr = av;
heap->prev = old_heap;
av->system_mem += heap->size;
arena_mem += heap->size;
/* Set up the new top. */
top(av) = chunk_at_offset(heap, sizeof(*heap));
set_head(top(av), (heap->size - sizeof(*heap)) | PREV_INUSE);
3. 每个heap都是调用mmap分配的内存,大小为HEAP_MAX_SIZE。使用mprotect使得只有几百KB可读可写(与参数M_TOP_PAD有关,可通过mallopt设置),以后不够用时再割一块使得更多内存对用户可用(grow_heap)。
//new_heap(size_t size, size_t top_pad)
if(aligned_heap_area) {
p2 = (char *)MMAP(aligned_heap_area, HEAP_MAX_SIZE, PROT_NONE,
MAP_NORESERVE);
aligned_heap_area = NULL;
if (p2 != MAP_FAILED && ((unsigned long)p2 & (HEAP_MAX_SIZE-1))) {
__munmap(p2, HEAP_MAX_SIZE);
p2 = MAP_FAILED;
}
}
if(p2 == MAP_FAILED) {
p1 = (char *)MMAP(0, HEAP_MAX_SIZE<<1, PROT_NONE, MAP_NORESERVE);
...
if(__mprotect(p2, size, PROT_READ|PROT_WRITE) != 0)
4. 每个heap的起始地址与HEAP_MAX_SIZE对齐(64M, 地址的后26bit全是0)
# define HEAP_MAX_SIZE (2 * DEFAULT_MMAP_THRESHOLD_MAX) //64M
#define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long))
两个问题
1. 如何由任意要free的地址找到其对应的arena?
arena是线程相关的,可以通过线程找到arena。但是malloc可能发生在线程A,而free不一定非要发生在同一个线程A,所以不能用执行free的线程来找arena。那free时如何找到对应的arena哪?
ar_ptr = arena_for_chunk(ptr);
free(ptr)
-> get chunk pointer chunkptr by ptr-0x10
-> get heap_info by chunkptr & ~(HEAP_MAX_SIZE-1)
-> get arena by heap_info.ar_ptr
2. 如何确保“每个heap的起始地址与HEAP_MAX_SIZE对齐”?
向mmap申请分配两倍的HEAP_MAX_SIZE,必然有个中间部分满足对齐条件,两头munmap回系统。
arena.c
p1 = (char *)MMAP(0, HEAP_MAX_SIZE<<1, PROT_NONE, MAP_NORESERVE);
if(p1 != MAP_FAILED) {
p2 = (char *)(((unsigned long)p1 + (HEAP_MAX_SIZE-1))
& ~(HEAP_MAX_SIZE-1));
ul = p2 - p1;
if (ul)
__munmap(p1, ul);
else
aligned_heap_area = p2 + HEAP_MAX_SIZE;
__munmap(p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul);
}
读者可通过cat /proc/[pid]/maps查看内存的变化。
如果申请的地址恰巧HEAP_MAX_SIZE对齐,第二块HEAP_MAX_SIZE内存也被释放但记录其地址到变量aligned_heap_area,下次再new_heap时直接在aligned_heap_area处创建mapping,如果失败则走上面的2倍逻辑。
MMAP(aligned_heap_area, HEAP_MAX_SIZE
一个例子
提供一个c程序例子,帮助读者调试。你可以给new_heap, grow_heap, 或者free下断点。
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
// Function executed by the sub-thread
void* thread_function(void* arg) {
// Allocate memory for an integer
void* ptr;
int i=0;
while(i++<16*1024){ //HEAP_MAX_SIZE/4096=64M/4096
ptr = malloc(4096);
if (ptr == NULL) {
perror("Memory allocation failed");
pthread_exit(NULL);
}
}
free(ptr);
return NULL;
}
int main() {
pthread_t tid;
int ret;
// Create a sub-thread
ret = pthread_create(&tid, NULL, thread_function, NULL);
if (ret != 0) {
perror("pthread_create failed");
return 1;
}
// Wait for the sub-thread to finish
ret = pthread_join(tid, NULL);
if (ret != 0) {
perror("pthread_join failed");
return 1;
}
void* main = malloc(20);
return 0;
}