问题描述
场景:
libmem.so提供一套新的内存malloc/free新内存接口用以取代glibc内存管理库。
内存库libmem.so和业务libtest.so都在构造函数
里面注册了pthread_atfork,但是libtest.so pthread_atfork(prepare,parent, child)的child函数中调用了malloc函数(内部需要加锁)。
若进程的x线程调用malloc期间,此时会加malloc锁,同时y线程进行了fork,此时会锁住libtest.so注册锁,在子进程时libtest.so 的pthread_atfork注册的child函数优先执行(malloc锁未进行init之前),子进程的child函数调用了free函数,那么就会出现ABBA死锁。
构造:
1, libmem.so、libtest.so在构造函数中注册pthread_atfork。在libtest.so注册pthread_atfork的prepare函数里面申请内存,在child释放内存。
2,app在ld时需要 -lmem -ltest,先链接libmem.so,再链接libtest.so,这样会优先执行libtest.so的构造函数。
glibc如何解决这种需求?
glibc也有这样的需求,那么为什么glibc内存管理的锁不会出现这种现象?查看fork函数的实现,可以看到其在调用pthread_atfork接口之前,会先调用glibc自己的如下三个函数。保证其他so万一需要使用malloc/free接口不至于出现死锁。
void
__malloc_fork_lock_parent (void);
void
__malloc_fork_unlock_parent (void);
void
__malloc_fork_unlock_child (void);
之后在fork时,glibc会调用所有用pthread_atfork注册的接口,那么我们只需要在执行这些回调函数之前优先调用AAA.so的就可以保证不会死锁
pid_t
__libc_fork (void)
{
/* Determine if we are running multiple threads. We skip some fork
handlers in the single-thread case, to make fork safer to use in
signal handlers. Although POSIX has dropped async-signal-safe
requirement for fork (Austin Group tracker issue #62) this is
best effort to make is async-signal-safe at least for single-thread
case. */
bool multiple_threads = __libc_single_threaded == 0;
__run_fork_handlers (atfork_run_prepare, multiple_threads);
struct nss_database_data nss_database_data;
/* If we are not running multiple threads, we do not have to
preserve lock state. If fork runs from a signal handler, only
async-signal-safe functions can be used in the child. These data
structures are only used by unsafe functions, so their state does
not matter if fork was called from a signal handler. */
if (multiple_threads)
{
call_function_static_weak (__nss_database_fork_prepare_parent,
&nss_database_data);
_IO_list_lock ();
/* Acquire malloc locks. This needs to come last because fork
handlers may use malloc, and the libio list lock has an
indirect malloc dependency as well (via the getdelim
function). */
call_function_static_weak (__malloc_fork_lock_parent);
}
pid_t pid = _Fork ();
if (pid == 0)
{
fork_system_setup ();
/* Reset the lock state in the multi-threaded case. */
if (multiple_threads)
{
__libc_unwind_link_after_fork ();
fork_system_setup_after_fork ();
/* Release malloc locks. */
call_function_static_weak (__malloc_fork_unlock_child);
/* Reset the file list. These are recursive mutexes. */
fresetlockfiles ();
/* Reset locks in the I/O code. */
_IO_list_resetlock ();
call_function_static_weak (__nss_database_fork_subprocess,
&nss_database_data);
}
/* Reset the lock the dynamic loader uses to protect its data. */
__rtld_lock_initialize (GL(dl_load_lock));
/* Reset the lock protecting dynamic TLS related data. */
__rtld_lock_initialize (GL(dl_load_tls_lock));
reclaim_stacks ();
/* Run the handlers registered for the child. */
__run_fork_handlers (atfork_run_child, multiple_threads);
}
else
{
/* If _Fork failed, preserve its errno value. */
int save_errno = errno;
/* Release acquired locks in the multi-threaded case. */
if (multiple_threads)
{
/* Release malloc locks, parent process variant. */
call_function_static_weak (__malloc_fork_unlock_parent);
/* We execute this even if the 'fork' call failed. */
_IO_list_unlock ();
}
/* Run the handlers registered for the parent. */
__run_fork_handlers (atfork_run_parent, multiple_threads);
if (pid < 0)
__set_errno (save_errno);
}
return pid;
}
解决方法:
这里提供一个简单的方法,可以再完善一下。
由glibc提供一个代替pthread_atfork的接口供libmem.so进行调用,主要作用记录一下prepare,parent,child接口,之后在fork时,glibc会调用所有用pthread_atfork注册的接口,那么我们只需要在执行这些回调函数之前优先调用libmem.so的就可以保证不会死锁
第一步:修改glibc register_atfork.c中的__run_fork_handlers 函数,在执行各个so注册的pthread_atfork的接口函数之前运行libmem.so的函数。如下begin,end之间的为新增代码。
/****************************begin**********************************/
static struct fork_handler __register_user_handle;
int
__register_user_atfork (void (*prepare) (void), void (*parent) (void),
void (*child) (void), void *dso_handle)
{
__register_user_handle.prepare_handler = prepare;
__register_user_handle.parent_handler = parent;
__register_user_handle.child_handler = child;
__register_user_handle.dso_handle = dso_handle;
}
/****************************begin**********************************/
void
__run_fork_handlers (enum __run_fork_handler_type who, _Bool do_locking)
{
struct fork_handler *runp;
struct fork_handler *runup = &__register_user_handle;
if (who == atfork_run_prepare)
{
if (do_locking)
lll_lock (atfork_lock, LLL_PRIVATE);
/****************************begin**********************************/
/*优先运行指定so的pthread_atfork 注册的函数*/
if (runup->parent_handler)
runup->parent_handler();
/*****************************end*********************************/
size_t sl = fork_handler_list_size (&fork_handlers);
for (size_t i = sl; i > 0; i--)
{
runp = fork_handler_list_at (&fork_handlers, i - 1);
if (runp->prepare_handler != NULL)
runp->prepare_handler ();
}
}
else
{
/****************************begin**********************************/
/*优先运行指定so的pthread_atfork 注册的函数*/
if (who == atfork_run_child && runup->child_handler)
runup->child_handler ();
else if (who == atfork_run_parent && runup->parent_handler)
runup->parent_handler ();
/*****************************end*********************************/
size_t sl = fork_handler_list_size (&fork_handlers);
for (size_t i = 0; i < sl; i++)
{
runp = fork_handler_list_at (&fork_handlers, i);
if (who == atfork_run_child && runp->child_handler)
runp->child_handler ();
else if (who == atfork_run_parent && runp->parent_handler)
runp->parent_handler ();
}
if (do_locking)
lll_unlock (atfork_lock, LLL_PRIVATE);
}
}
第二步:将libmem.so构造函数中的pthread_atfork替换为如下代码,主要是通过dlsym来查找__register_user_atfork函数符号,然后调用即可,注意该so需要添加-D_GNU_SOURCE宏。
typedef void (*_reg_para_func)(void);
typedef int (*_reg_func)(_reg_para_func, _reg_para_func,_reg_para_func,void*);
_reg_func reg_atfork_handle = NULL;
void __attribute__((constructor)) xx_Init(void){
reg_atfork_handle = dlsym(RTLD_NEXT, "__register_user_atfork");
assert(reg_atfork_handle);
reg_atfork_handle(prepare, parent, child);/*注册函数*/
}