我们来考察下pthread中锁的实现。
首先看下初始化宏:PTHREAD_MUTEX_INITIALIZER。
# define PTHREAD_MUTEX_INITIALIZER \
{ { 0, 0, 0, 0, 0, __PTHREAD_SPINS, { 0, 0 } } }
/* Data structures for mutex handling. The structure of the attribute
type is not exposed on purpose. */
/*删减了32位的代码*/
typedef union
{
struct __pthread_mutex_s
{
int __lock;
unsigned int __count;
int __owner;
#ifdef __x86_64__
unsigned int __nusers;
#endif
/* KIND must stay at this position in the structure to maintain
binary compatibility with static initializers. */
int __kind;
#ifdef __x86_64__
short __spins;
short __elision;
__pthread_list_t __list;
# define __PTHREAD_MUTEX_HAVE_PREV 1
/* Mutex __spins initializer used by PTHREAD_MUTEX_INITIALIZER. */
# define __PTHREAD_SPINS 0, 0
#else
#endif
} __data;
char __size[__SIZEOF_PTHREAD_MUTEX_T];
long int __align;
} pthread_mutex_t;
注意PTHREAD_MUTEX_INITIALIZER 是8个成员的结构体,与pthread_mutex_t定义相符。并且所有成为初始化为0。
初始化之后,我们接着看看pthread_mutex_lock操作:
#ifndef __pthread_mutex_lock
strong_alias (__pthread_mutex_lock, pthread_mutex_lock)
hidden_def (__pthread_mutex_lock)
#endif
int
__pthread_mutex_lock (pthread_mutex_t *mutex)
{
assert (sizeof (mutex->__size) >= sizeof (mutex->__data));
unsigned int type = PTHREAD_MUTEX_TYPE_ELISION (mutex);
LIBC_PROBE (mutex_entry, 1, mutex);
if (__builtin_expect (type & ~(PTHREAD_MUTEX_KIND_MASK_NP
| PTHREAD_MUTEX_ELISION_FLAGS_NP), 0))
return __pthread_mutex_lock_full (mutex);
if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_NP))
{
FORCE_ELISION (mutex, goto elision);
simple:
/* Normal mutex. */
LLL_MUTEX_LOCK (mutex);
assert (mutex->__data.__owner == 0);
}
#ifdef HAVE_ELISION
else if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_ELISION_NP))
{
elision: __attribute__((unused))
/* This case can never happen on a system without elision,
as the mutex type initialization functions will not
allow to set the elision flags. */
/* Don't record owner or users for elision case. This is a
tail call. */
return LLL_MUTEX_LOCK_ELISION (mutex);
}
#endif
else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
== PTHREAD_MUTEX_RECURSIVE_NP, 1))
{
/* Recursive mutex. */
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
/* Check whether we already hold the mutex. */
if (mutex->__data.__owner == id)
{
/* Just bump the counter. */
if (__glibc_unlikely (mutex->__data.__count + 1 == 0))
/* Overflow of the counter. */
return EAGAIN;
++mutex->__data.__count;
return 0;
}
/* We have to get the mutex. */
LLL_MUTEX_LOCK (mutex);
assert (mutex->__data.__owner == 0);
mutex->__data.__count = 1;
}
else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
== PTHREAD_MUTEX_ADAPTIVE_NP, 1))
{
if (! __is_smp)
goto simple;
if (LLL_MUTEX_TRYLOCK (mutex) != 0)
{
int cnt = 0;
int max_cnt = MIN (MAX_ADAPTIVE_COUNT,
mutex->__data.__spins * 2 + 10);
do
{
if (cnt++ >= max_cnt)
{
LLL_MUTEX_LOCK (mutex);
break;
}
atomic_spin_nop ();
}
while (LLL_MUTEX_TRYLOCK (mutex) != 0);
mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
}
assert (mutex->__data.__owner == 0);
}
else
{
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
assert (PTHREAD_MUTEX_TYPE (mutex) == PTHREAD_MUTEX_ERRORCHECK_NP);
/* Check whether we already hold the mutex. */
if (__glibc_unlikely (mutex->__data.__owner == id))
return EDEADLK;
goto simple;
}
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
/* Record the ownership. */
mutex->__data.__owner = id;
#ifndef NO_INCR
++mutex->__data.__nusers;
#endif
LIBC_PROBE (mutex_acquired, 1, mutex);
return 0;
}
首先看下第一句
assert (sizeof (mutex->__size) >= sizeof (mutex->__data));
这句的意思是成员_size和_data所占内存相同,我们来验证下。
char __size[__SIZEOF_PTHREAD_MUTEX_T]的字节数:40.
1 #ifdef __x86_64__
2 # if __WORDSIZE == 64
3 # define __SIZEOF_PTHREAD_ATTR_T 56
4 # define __SIZEOF_PTHREAD_MUTEX_T 40
另一方面_data中的字节数是int、short、unsigned、__pthread_list_t这些个加起来,刚好为40字节.
所以这个union在64位计算机上最大的空间为40个字节。
接着是:
1 unsigned int type = PTHREAD_MUTEX_TYPE_ELISION (mutex);
1 #define PTHREAD_MUTEX_TYPE_ELISION(m) \
2 ((m)->__data.__kind & (127|PTHREAD_MUTEX_ELISION_NP))
因为__kind为0,所以这里的type显然为0;
1 if (__builtin_expect (type & ~(PTHREAD_MUTEX_KIND_MASK_NP
2 | PTHREAD_MUTEX_ELISION_FLAGS_NP), 0))
3 return __pthread_mutex_lock_full (mutex);
这里的结果为0,所以显然不走这个分支。
PTHREAD_MUTEX_TIMED_NP值为0,所以我们的代码显然是进入如下第一行的分支。
根据注释/* Normal mutex. */,很可能是通过这里得到锁。我们继续探索下,
/* Mutex types. */
enum
{
PTHREAD_MUTEX_TIMED_NP,
PTHREAD_MUTEX_RECURSIVE_NP,
PTHREAD_MUTEX_ERRORCHECK_NP,
PTHREAD_MUTEX_ADAPTIVE_NP
#if defined __USE_UNIX98 || defined __USE_XOPEN2K8
if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_NP))
{
FORCE_ELISION (mutex, goto elision);
simple:
/* Normal mutex. */
LLL_MUTEX_LOCK (mutex);
assert (mutex->__data.__owner == 0);
}
#ifdef HAVE_ELISION
else if (__glibc_likely (type == PTHREAD_MUTEX_TIMED_ELISION_NP))
{
elision: __attribute__((unused))
/* This case can never happen on a system without elision,
as the mutex type initialization functions will not
allow to set the elision flags. */
/* Don't record owner or users for elision case. This is a
tail call. */
return LLL_MUTEX_LOCK_ELISION (mutex);
}
#endif
else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
== PTHREAD_MUTEX_RECURSIVE_NP, 1))
{
/* Recursive mutex. */
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
/* Check whether we already hold the mutex. */
if (mutex->__data.__owner == id)
{
/* Just bump the counter. */
if (__glibc_unlikely (mutex->__data.__count + 1 == 0))
/* Overflow of the counter. */
return EAGAIN;
++mutex->__data.__count;
return 0;
}
/* We have to get the mutex. */
LLL_MUTEX_LOCK (mutex);
assert (mutex->__data.__owner == 0);
mutex->__data.__count = 1;
}
else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
== PTHREAD_MUTEX_ADAPTIVE_NP, 1))
{
if (! __is_smp)
goto simple;
if (LLL_MUTEX_TRYLOCK (mutex) != 0)
{
int cnt = 0;
int max_cnt = MIN (MAX_ADAPTIVE_COUNT,
mutex->__data.__spins * 2 + 10);
do
{
if (cnt++ >= max_cnt)
{
LLL_MUTEX_LOCK (mutex);
break;
}
atomic_spin_nop ();
}
while (LLL_MUTEX_TRYLOCK (mutex) != 0);
mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
}
assert (mutex->__data.__owner == 0);
}
else
{
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
assert (PTHREAD_MUTEX_TYPE (mutex) == PTHREAD_MUTEX_ERRORCHECK_NP);
/* Check whether we already hold the mutex. */
if (__glibc_unlikely (mutex->__data.__owner == id))
return EDEADLK;
goto simple;
}
这里的意思:将_data中的__lock作为参数填入lll_lock,注意,这里是宏定义。
#ifndef LLL_MUTEX_LOCK
# define LLL_MUTEX_LOCK(mutex) \
lll_lock ((mutex) ->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
1 #if LLL_PRIVATE == 0 && LLL_SHARED == 128
2 # define PTHREAD_MUTEX_PSHARED(m) \
3 ((m)->__data.__kind & 128)
4 #else
这里的PTHREAD_MUTEX_PSHARED将__kind字段和128做&操作,推测是第8个标志位用来标识该锁是否共享。
既然如此, 我们这里两者填入的都是0,但是第一个__lock在后续使用中有取地址的可能。
我们接着看看lll_lock:
1 #define lll_lock(futex, private) \
2 __lll_lock (&(futex), private)
取了地址, 那么这里就是原mutex中__lock字段的地址和数值0.
#define __lll_lock(futex, private) \
((void) \
({ \
int *__futex = (futex); \
if (__glibc_unlikely \
(atomic_compare_and_exchange_bool_acq (__futex, 1, 0))) \
{ \
if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \
__lll_lock_wait_private (__futex); \
else \
__lll_lock_wait (__futex, private); \
} \
}))
此处atomic_compare_and_exchange_bool_acq 用于将_futex从0原子变为1,成功则返回0,从而获得锁退出。
失败则返回值>0(对应我们这里是1或者2),然后继续走分支。
根据值, 走__lll_lock_wait:
/* Note that we need no lock prefix. */
#define atomic_exchange_acq(mem, newvalue) \
({ __typeof (*mem) result; \
if (sizeof (*mem) == 1) \
__asm __volatile ("xchgb %b0, %1" \
: "=q" (result), "=m" (*mem) \
: "0" (newvalue), "m" (*mem)); \
else if (sizeof (*mem) == 2) \
__asm __volatile ("xchgw %w0, %1" \
: "=r" (result), "=m" (*mem) \
: "0" (newvalue), "m" (*mem)); \
else if (sizeof (*mem) == 4) \
__asm __volatile ("xchgl %0, %1" \
: "=r" (result), "=m" (*mem) \
: "0" (newvalue), "m" (*mem)); \
else \
__asm __volatile ("xchgq %q0, %1" \
: "=r" (result), "=m" (*mem) \
: "0" ((atomic64_t) cast_to_integer (newvalue)), \
"m" (*mem)); \
result; })
/* This function doesn't get included in libc. */
#if IS_IN (libpthread)
void
__lll_lock_wait (int *futex, int private)
{
if (*futex == 2)
lll_futex_wait (futex, 2, private); /* Wait if *futex == 2. */
while (atomic_exchange_acq (futex, 2) != 0)
lll_futex_wait (futex, 2, private); /* Wait if *futex == 2. */
}
#endif
所以到了关键的地方, 这里是将futex(&__lock)的值从0原子变为2就成功。否则调用lll_futex_wait,阻塞。这里的atomic_exchange_acq是一个返回旧值的原子操作,直接采用了内敛汇编(xchg)的方式,并且根据变量类型从而选取linux下不同的汇编指令。
到了这里,只要这个原子xchg的是正确的,并且阻塞与唤醒(wake up)之间的协议是正确的,那么这个mutex的语义就得到保证了。
我们接着看看lll_futex_wait是怎么样的(val = 2, private = 0):
1 /* Wait while *FUTEXP == VAL for an lll_futex_wake call on FUTEXP. */
2 #define lll_futex_wait(futexp, val, private) \
3 lll_futex_timed_wait (futexp, val, NULL, private)
参数多了个NULL(val = 2, timeout = NULL, private = 0),
1 #define lll_futex_timed_wait(futexp, val, timeout, private) \
2 lll_futex_syscall (4, futexp, \
3 __lll_private_flag (FUTEX_WAIT, private), \
4 val, timeout)
展开__lll_private_flag
1 # else
2 # define __lll_private_flag(fl, private) \
3 ((fl) | THREAD_GETMEM (THREAD_SELF, header.private_futex))
4 # endif
1 # define THREAD_SELF \
2 ({ struct pthread *__self; \
3 asm ("mov %%fs:%c1,%0" : "=r" (__self) \
4 : "i" (offsetof (struct pthread, header.self))); \
5 __self;})
这里是从struct pthread中取得private_futex来计算的,值为0。这里实际上只保留了FUTEX_WAIT的值,同样为0.
#define FUTEX_WAIT 0
#define FUTEX_WAKE 1
#define FUTEX_REQUEUE 3
#define FUTEX_CMP_REQUEUE 4
#define FUTEX_WAKE_OP 5
#define FUTEX_OP_CLEAR_WAKE_IF_GT_ONE ((4 << 24) | 1)
#define FUTEX_LOCK_PI 6
#define FUTEX_UNLOCK_PI 7
#define FUTEX_TRYLOCK_PI 8
#define FUTEX_WAIT_BITSET 9
#define FUTEX_WAKE_BITSET 10
#define FUTEX_WAIT_REQUEUE_PI 11
#define FUTEX_CMP_REQUEUE_PI 12
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
#define FUTEX_BITSET_MATCH_ANY 0xffffffff
所以这里的lll_futex_syscall调用简化为:
lll_futex_syscall (4, futexp, 0, 2, NULL)
我们接着看:
#define lll_futex_syscall(nargs, futexp, op, ...) \
({ \
INTERNAL_SYSCALL_DECL (__err); \
long int __ret = INTERNAL_SYSCALL (futex, __err, nargs, futexp, op, \
__VA_ARGS__); \
(__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (__ret, __err)) \
? -INTERNAL_SYSCALL_ERRNO (__ret, __err) : 0); \
})
这里的futex作为字符串字面量后续使用,__VA_ARGS__指代了2和NULL。
我们看一下INTERNAL_SYSCALL:
# define INTERNAL_SYSCALL(name, err, nr, args...) \
INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
# define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
({ \
unsigned long int resultvar; \
LOAD_ARGS_##nr (args) \
LOAD_REGS_##nr \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (name) ASM_ARGS_##nr : "memory", REGISTERS_CLOBBERED_BY_SYSCALL); \
(long int) resultvar; })
那么这里的INTERNAL_SYSCALL_NCS调用, 参数为( __NR_futex,err,4, futexp,0, 2, NULL)。第四个参数开始为futexp,0, 2, NULL。
# define LOAD_ARGS_4(a1, a2, a3, a4) \
LOAD_ARGS_TYPES_4 (long int, a1, long int, a2, long int, a3, \
long int, a4)
# define LOAD_REGS_4 \
LOAD_REGS_TYPES_4 (long int, a1, long int, a2, long int, a3, \
long int, a4)
# define ASM_ARGS_4 ASM_ARGS_3, "r" (_a4)
将LOAD_ARGS_##nr (args)、LOAD_REGS_##nr、ASM_ARGS_##nr、REGISTERS_CLOBBERED_BY_SYSCALL展开带入,之后可将INTERNAL_SYSCALL_NCS转换为如下:
unsigned long long int resultvar;
long int __arg4 = (long int) (NULL); \
long int __arg3 = (long int) (2); \
long int __arg2 = (long int) (0); \
long int __arg1 = (long int) (futexp); \
register long int _a4 asm ("r10") = __arg4; \
register long int _a3 asm ("rdx") = __arg3; \
register long int _a2 asm ("rsi") = __arg2; \
register long int _a1 asm ("rdi") = __arg1; \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (__NR_futex), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) : "memory", "cc", "r11", "cx"); \
(long long int) resultvar; })
这里的__NR_futex为找不到,这应该是个linux系统定义的系统调用号,并且由它来定义SYS_futex的值。
#define SYS_futex __NR_futex
那么上面的那段代码真的确定是使用(FUTEX_WAIT)futex来陷入了阻塞吗?
让我尝试将之前写的一段直接采用futex做同步区块的代码修改下做检验。
原代码:
#include <stdio.h>
#include <pthread.h>
#include <linux/futex.h>
#include <syscall.h>
#include <unistd.h>
#include <sys/time.h>
#define NUM 1000
int num = 0;
int futex_addr = 0;
int futex_wait(void* addr, int val){
return syscall(SYS_futex, addr, FUTEX_WAIT, val, NULL, NULL, 0);
}
int futex_wake(void* addr, int val){
return syscall(SYS_futex, addr, FUTEX_WAKE, val, NULL, NULL, 0);
}
void* thread_f(void* par){
int id = (int) par;
/*go to sleep*/
for(int i = 0; i < 1000; ++i){
while(1 == __sync_val_compare_and_swap(&futex_addr, 0, 1) ){
futex_wait(&futex_addr,1);
}
++num;
futex_addr = 0;
futex_wake(&futex_addr, NUM);
}
// printf("Thread %d starting to work!\n",id);
return NULL;
}
int main(){
pthread_t threads[NUM];
int i;
printf("Everyone go...\n");
float time_use=0;
struct timeval start;
struct timeval end;
gettimeofday(&start,NULL);
for (i=0;i<NUM;i++){
pthread_create(&threads[i],NULL,thread_f,(void *)i);
}
/*wake threads*/
/*give the threads time to complete their tasks*/
for (i=0;i<NUM;i++){
pthread_join(*(threads + i), NULL);
}
printf("Main is quitting...\n");
printf("and num is %d\n", num);
gettimeofday(&end,NULL);
time_use=(end.tv_sec-start.tv_sec)+(end.tv_usec-start.tv_usec) / 1000000.0;//微秒
printf("time_use is %f \n",time_use);
return 0;
}
执行输出为:
Everyone go...
Main is quitting...
and num is 1000000
time_use is 0.283753
1000个线程执行1000次+1,答案为1000000正确。
我们尝试将futex_wait中sys_call做一下修改:
int futex_wait(void* addr, int val){
// return syscall(SYS_futex, addr, FUTEX_WAIT, val, NULL, NULL, 0);
return INTERNAL_SYSCALL_NCS(addr, FUTEX_WAIT, val, NULL);
}
然后添加宏INTERNAL_SYSCALL_NCS:
#define INTERNAL_SYSCALL_NCS(a1, a2, a3, a4) \
({ \
unsigned long long int resultvar; \
long int __arg4 = (long int) (a4); \
long int __arg3 = (long int) (a3); \
long int __arg2 = (long int) (a2); \
long int __arg1 = (long int) (a1); \
register long int _a4 asm ("r10") = __arg4; \
register long int _a3 asm ("rdx") = __arg3; \
register long int _a2 asm ("rsi") = __arg2; \
register long int _a1 asm ("rdi") = __arg1; \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (SYS_futex), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) : "memory", "cc", "r11", "cx"); \
(long long int) resultvar; })
得到如下代码:
#include <stdio.h>
#include <pthread.h>
#include <linux/futex.h>
#include <syscall.h>
#include <unistd.h>
#include <sys/time.h>
#define NUM 1000
#define INTERNAL_SYSCALL_NCS(a1, a2, a3, a4) \
({ \
unsigned long long int resultvar; \
long int __arg4 = (long int) (a4); \
long int __arg3 = (long int) (a3); \
long int __arg2 = (long int) (a2); \
long int __arg1 = (long int) (a1); \
register long int _a4 asm ("r10") = __arg4; \
register long int _a3 asm ("rdx") = __arg3; \
register long int _a2 asm ("rsi") = __arg2; \
register long int _a1 asm ("rdi") = __arg1; \
asm volatile ( \
"syscall\n\t" \
: "=a" (resultvar) \
: "0" (SYS_futex), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4) : "memory", "cc", "r11", "cx"); \
(long long int) resultvar; })
int num = 0;
int futex_addr = 0;
int futex_wait(void* addr, int val){
// return syscall(SYS_futex, addr, FUTEX_WAIT, val, NULL, NULL, 0);
return INTERNAL_SYSCALL_NCS(addr, FUTEX_WAIT, val, NULL);
}
int futex_wake(void* addr, int val){
return syscall(SYS_futex, addr, FUTEX_WAKE, val, NULL, NULL, 0);
}
void* thread_f(void* par){
int id = (int) par;
/*go to sleep*/
for(int i = 0; i < 1000; ++i){
while(1 == __sync_val_compare_and_swap(&futex_addr, 0, 1) ){
futex_wait(&futex_addr,1);
}
++num;
futex_addr = 0;
futex_wake(&futex_addr, NUM);
}
// printf("Thread %d starting to work!\n",id);
return NULL;
}
int main(){
pthread_t threads[NUM];
int i;
printf("Everyone go...\n");
float time_use=0;
struct timeval start;
struct timeval end;
gettimeofday(&start,NULL);
for (i=0;i<NUM;i++){
pthread_create(&threads[i],NULL,thread_f,(void *)i);
}
/*wake threads*/
/*give the threads time to complete their tasks*/
for (i=0;i<NUM;i++){
pthread_join(*(threads + i), NULL);
}
printf("Main is quitting...\n");
printf("and num is %d\n", num);
gettimeofday(&end,NULL);
time_use=(end.tv_sec-start.tv_sec)+(end.tv_usec-start.tv_usec) / 1000000.0;//微秒
printf("time_use is %f \n",time_use);
return 0;
}
注意到我们这里与pthread不一样的地方在于
1 == __sync_val_compare_and_swap(&futex_addr, 0, 1)
注意到我们这里的和pthread_mutex不一样的地方在于我们是原子得将值futex_addr从0改为1.
执行如上代码,输出为:
Everyone go...
Main is quitting...
and num is 1000000
time_use is 0.254833
答案同样是1000000,所以这个采用汇编形式的调用符合了我们的预期,应该是和系统调用一致的。
最后我们看假如已经获得了锁,需要做什么:
pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
/* Record the ownership. */
mutex->__data.__owner = id;
#ifndef NO_INCR
++mutex->__data.__nusers;
#endif
知识简单地把__data中的__owner设置为id,已经++__nusers。从而代表这个锁的使用者人数+1,并且当前有用者为该id的线程。
我们之后接着来看看pthread_mutex_unlock的实现。