先分析一个结构体arena_bin_s,它隶属于一个area,也可以理解成,它代表了这个area
struct arena_bin_s {
/*
* All operations on runcur, runs, and stats require that lock be
* locked. Run allocation/deallocation are protected by the arena lock,
* which may be acquired while holding one or more bin locks, but not
* vise versa.
*/ 在当前运行的run(对应一个misc)上,runs树,统计的所有信息,都需要用到这个锁
malloc_mutex_t lock;
/*
* Current run being used to service allocations of this bin's size
* class.
*/当前正在用来为bin的size class内存分配提供服务的run
arena_run_t *runcur;
/*
* Tree of non-full runs. This tree is used when looking for an
* existing run when runcur is no longer usable. We choose the
* non-full run that is lowest in memory; this policy tends to keep
* objects packed well, and it can also help reduce the number of
* almost-empty chunks.
*/还有剩余的runs组成的树,当当前的run不能再分配,需要寻找一个已经存在的run时,这棵树被使用
我们选择在内存中最低的可用run,这个策略的目的是有利于对象打包,缩减只用一点内存的chunk的数量
arena_run_tree_t runs;
/* Bin statistics. */
malloc_bin_stats_t stats; 统计信息
};
#10 je_imalloc (size=1, tsd=0x7ffff7fd8730) at include/jemalloc/internal/jemalloc_internal.h:906
#11 imalloc_body (usize=0x7fffffffc888, tsd=0x7fffffffc880, size=1) at src/jemalloc.c:1412
#12 je_malloc (size=1) at src/jemalloc.c:1427
#13 0x00000000004316b3 in zmalloc (size=1) at zmalloc.c:125
#14 0x000000000043197c in zstrdup (s=0x6cf8c5 "") at zmalloc.c:215
#15 0x000000000042824c in initServerConfig () at server.c:1476
#16 0x000000000042e55a in main (argc=1, argv=0x7fffffffe3f8) at server.c:3984
tcache分配完毕,会退回到je_imalloc函数,调用
iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)
先研究tcache_alloc_small(tsd, arena, tcache, size, zero)
binind = size2index(size); 由于每个bin都是固定大小的,size要和这些大小对齐,分配一个不小于size的内存块
tbin = &tcache->tbins[binind]; 得到tcache对应的这个size所属的tcache_bin_s,即在这个tcache_bin_s中分配这块内存
usize = index2size(binind); 实际要分配的大小
ret = tcache_alloc_easy(tbin);
第一次tbin->ncached == 0,令tbin->low_water = -1; return (NULL);
下次tbin->ncached不为0了,tbin->ncached--;返回tbin->avail[tbin->ncached];也就是先分配索引大的,再分配索引小的
如果分配后tbin->ncached < tbin->low_water,tbin->low_water = tbin->ncached; low_water总是小于等于ncached
tcache_alloc_easy分配失败,就要调用tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind);分配
arena_tcache_fill_small(arena, tbin, binind, config_prof ? tcache->prof_accumbytes : 0);先填充
arena_bin_t *bin = &arena->bins[binind]; 得到对应的arena_bin,规定了某个size应该分配多大内存
ret = tcache_alloc_easy(tbin);再分配
填充的过程,填充前先研究bitmap
malloc_mutex_lock(&bin->lock);
for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> tbin->lg_fill_div); i < nfill; i++)
je_tcache_bin_info[binind].ncached_max = 200,tbin->lg_fill_div = 1
初始化i = 0, nfill = 200 >> 1 = 100,连续填充100个
arena_run_t *run;
void *ptr;
if ((run = bin->runcur) != NULL && run->nfree > 0)
ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);第二次填充就会调用,第一次下面也会调
else
ptr = arena_bin_malloc_hard(arena, bin); 第一次分配走这里
binind = arena_bin_index(arena, bin);
szind_t binind = bin - arena->bins;带类型指针相减,结果是偏移,0
bin_info = &arena_bin_info[binind]; 全局的arena_bin_info,在arena_boot的bin_info_init中初始化
bin->runcur = NULL;
run = arena_bin_nonfull_run_get(arena, bin);
run = arena_bin_nonfull_run_tryget(bin);
arena_run_t *run = arena_bin_runs_first(bin);
arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs);
bin->runs是个rb树,树中节点是arena_chunk_map_misc_t,
最小的元素就是地址最小的arena_chunk_map_misc_t
第一次为NULL
if (run != NULL) {
arena_bin_runs_remove(bin, run);
if (config_stats)
bin->stats.reruns++;
}
return (run)第一次返回NULL
binind = arena_bin_index(arena, bin);
bin_info = &arena_bin_info[binind];
malloc_mutex_unlock(&bin->lock); 解释是下面的代码不需要area的bin的lock保护了,需要的时候再上锁
malloc_mutex_lock(&arena->lock);
run = arena_run_alloc_small(arena, bin_info->run_size, binind);
从area中获取一个可用的run,能找到一个还有490页的run(misc)(13页chunk元信息,9页cache元信息)
找到后空闲的第一页和最后一页记录剩余总大小,
arena_mapbits_small_set(chunk, run_ind+i, i, binind, flag_unzeroed);
对于small的run分配完后,没有设置大小,只设置了标志(large有大小)
说明small的run最多1页,前面本来表示大小的位用来表示页内偏移(和bitmap结构体里的注释一致)
binind = 0, bin_info->run_size = 4096
得到run后初始化run
/* Initialize run internals. */
run->binind = binind; 0
run->nfree = bin_info->nregs; 512
bitmap_init(run->bitmap, &bin_info->bitmap_info);
初始化位图
malloc_mutex_unlock(&arena->lock);
malloc_mutex_lock(&bin->lock);
bin->runcur = run;
bin->runcur->nfree = 512
return (arena_run_reg_alloc(bin->runcur, bin_info));
regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info);得到一个空闲的region
在位图中分配
miscelm = arena_run_to_miscelm(run);
rpages = arena_miscelm_to_rpages(miscelm);
return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE))); 22
ret = (void *)(
(uintptr_t)rpages +
(uintptr_t)bin_info->reg0_offset + 0
(uintptr_t)(bin_info->reg_interval * regind) 8 * 0,第二次8 * 1
);
run->nfree--;
return (ret);
end arena_bin_malloc_hard
end else
tbin->avail[nfill - 1 - i] = ptr; 100 - 1 - 0,
地址小的放在索引大的地方,分配时也是先分配索引大的
end for
malloc_mutex_unlock(&bin->lock);
tbin->ncached = i; 100
分配内存先从tcache的tbin的avail数组分配,再从位图分配
10k以内(SMALL_MAXCLASS)都是走tcache_alloc_small
10k以上到1792k(448页,large_maxclass)
在arena_boot函数中计算的,large_maxclass = index2size(size2index(chunksize)-1);
size2index(chunksize) = 67,large_maxclass = index2size(66)
根据SIZE_CLASS表推算
36-39是一组,公差是4,则36 + 4 * 7 = 64
64-67是一组,36的lg_grp是13,lg_delta是11,那64的是20,18
67的最大大小是2M,66的是2^20 + 3*2^18 = 1835008 = 1792k
tcache_alloc_large(tsd, arena, tcache, size, zero)
或者arena_malloc_large(arena, size, zero)
小于8页(32k)的tcache_alloc_large分配,大于的arena_malloc_large分配
tcache_alloc_large好像调用了arena_malloc_large分配内存后,没有填充tcache
struct arena_bin_s {
/*
* All operations on runcur, runs, and stats require that lock be
* locked. Run allocation/deallocation are protected by the arena lock,
* which may be acquired while holding one or more bin locks, but not
* vise versa.
*/ 在当前运行的run(对应一个misc)上,runs树,统计的所有信息,都需要用到这个锁
malloc_mutex_t lock;
/*
* Current run being used to service allocations of this bin's size
* class.
*/当前正在用来为bin的size class内存分配提供服务的run
arena_run_t *runcur;
/*
* Tree of non-full runs. This tree is used when looking for an
* existing run when runcur is no longer usable. We choose the
* non-full run that is lowest in memory; this policy tends to keep
* objects packed well, and it can also help reduce the number of
* almost-empty chunks.
*/还有剩余的runs组成的树,当当前的run不能再分配,需要寻找一个已经存在的run时,这棵树被使用
我们选择在内存中最低的可用run,这个策略的目的是有利于对象打包,缩减只用一点内存的chunk的数量
arena_run_tree_t runs;
/* Bin statistics. */
malloc_bin_stats_t stats; 统计信息
};
#10 je_imalloc (size=1, tsd=0x7ffff7fd8730) at include/jemalloc/internal/jemalloc_internal.h:906
#11 imalloc_body (usize=0x7fffffffc888, tsd=0x7fffffffc880, size=1) at src/jemalloc.c:1412
#12 je_malloc (size=1) at src/jemalloc.c:1427
#13 0x00000000004316b3 in zmalloc (size=1) at zmalloc.c:125
#14 0x000000000043197c in zstrdup (s=0x6cf8c5 "") at zmalloc.c:215
#15 0x000000000042824c in initServerConfig () at server.c:1476
#16 0x000000000042e55a in main (argc=1, argv=0x7fffffffe3f8) at server.c:3984
tcache分配完毕,会退回到je_imalloc函数,调用
iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)
先研究tcache_alloc_small(tsd, arena, tcache, size, zero)
binind = size2index(size); 由于每个bin都是固定大小的,size要和这些大小对齐,分配一个不小于size的内存块
tbin = &tcache->tbins[binind]; 得到tcache对应的这个size所属的tcache_bin_s,即在这个tcache_bin_s中分配这块内存
usize = index2size(binind); 实际要分配的大小
ret = tcache_alloc_easy(tbin);
第一次tbin->ncached == 0,令tbin->low_water = -1; return (NULL);
下次tbin->ncached不为0了,tbin->ncached--;返回tbin->avail[tbin->ncached];也就是先分配索引大的,再分配索引小的
如果分配后tbin->ncached < tbin->low_water,tbin->low_water = tbin->ncached; low_water总是小于等于ncached
tcache_alloc_easy分配失败,就要调用tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind);分配
arena_tcache_fill_small(arena, tbin, binind, config_prof ? tcache->prof_accumbytes : 0);先填充
arena_bin_t *bin = &arena->bins[binind]; 得到对应的arena_bin,规定了某个size应该分配多大内存
ret = tcache_alloc_easy(tbin);再分配
填充的过程,填充前先研究bitmap
malloc_mutex_lock(&bin->lock);
for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> tbin->lg_fill_div); i < nfill; i++)
je_tcache_bin_info[binind].ncached_max = 200,tbin->lg_fill_div = 1
初始化i = 0, nfill = 200 >> 1 = 100,连续填充100个
arena_run_t *run;
void *ptr;
if ((run = bin->runcur) != NULL && run->nfree > 0)
ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);第二次填充就会调用,第一次下面也会调
else
ptr = arena_bin_malloc_hard(arena, bin); 第一次分配走这里
binind = arena_bin_index(arena, bin);
szind_t binind = bin - arena->bins;带类型指针相减,结果是偏移,0
bin_info = &arena_bin_info[binind]; 全局的arena_bin_info,在arena_boot的bin_info_init中初始化
bin->runcur = NULL;
run = arena_bin_nonfull_run_get(arena, bin);
run = arena_bin_nonfull_run_tryget(bin);
arena_run_t *run = arena_bin_runs_first(bin);
arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs);
bin->runs是个rb树,树中节点是arena_chunk_map_misc_t,
最小的元素就是地址最小的arena_chunk_map_misc_t
第一次为NULL
if (run != NULL) {
arena_bin_runs_remove(bin, run);
if (config_stats)
bin->stats.reruns++;
}
return (run)第一次返回NULL
binind = arena_bin_index(arena, bin);
bin_info = &arena_bin_info[binind];
malloc_mutex_unlock(&bin->lock); 解释是下面的代码不需要area的bin的lock保护了,需要的时候再上锁
malloc_mutex_lock(&arena->lock);
run = arena_run_alloc_small(arena, bin_info->run_size, binind);
从area中获取一个可用的run,能找到一个还有490页的run(misc)(13页chunk元信息,9页cache元信息)
找到后空闲的第一页和最后一页记录剩余总大小,
arena_mapbits_small_set(chunk, run_ind+i, i, binind, flag_unzeroed);
对于small的run分配完后,没有设置大小,只设置了标志(large有大小)
说明small的run最多1页,前面本来表示大小的位用来表示页内偏移(和bitmap结构体里的注释一致)
binind = 0, bin_info->run_size = 4096
得到run后初始化run
/* Initialize run internals. */
run->binind = binind; 0
run->nfree = bin_info->nregs; 512
bitmap_init(run->bitmap, &bin_info->bitmap_info);
初始化位图
malloc_mutex_unlock(&arena->lock);
malloc_mutex_lock(&bin->lock);
bin->runcur = run;
bin->runcur->nfree = 512
return (arena_run_reg_alloc(bin->runcur, bin_info));
regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info);得到一个空闲的region
在位图中分配
miscelm = arena_run_to_miscelm(run);
rpages = arena_miscelm_to_rpages(miscelm);
return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE))); 22
ret = (void *)(
(uintptr_t)rpages +
(uintptr_t)bin_info->reg0_offset + 0
(uintptr_t)(bin_info->reg_interval * regind) 8 * 0,第二次8 * 1
);
run->nfree--;
return (ret);
end arena_bin_malloc_hard
end else
tbin->avail[nfill - 1 - i] = ptr; 100 - 1 - 0,
地址小的放在索引大的地方,分配时也是先分配索引大的
end for
malloc_mutex_unlock(&bin->lock);
tbin->ncached = i; 100
分配内存先从tcache的tbin的avail数组分配,再从位图分配
10k以内(SMALL_MAXCLASS)都是走tcache_alloc_small
10k以上到1792k(448页,large_maxclass)
在arena_boot函数中计算的,large_maxclass = index2size(size2index(chunksize)-1);
size2index(chunksize) = 67,large_maxclass = index2size(66)
根据SIZE_CLASS表推算
36-39是一组,公差是4,则36 + 4 * 7 = 64
64-67是一组,36的lg_grp是13,lg_delta是11,那64的是20,18
67的最大大小是2M,66的是2^20 + 3*2^18 = 1835008 = 1792k
tcache_alloc_large(tsd, arena, tcache, size, zero)
或者arena_malloc_large(arena, size, zero)
小于8页(32k)的tcache_alloc_large分配,大于的arena_malloc_large分配
tcache_alloc_large好像调用了arena_malloc_large分配内存后,没有填充tcache