一、概述
在Linux系统里,假设有两处代码(比如不同线程的两个函数F1和F2)都要获取两个锁(分别为L1和L2),如果F1持有L1后再去获取L2,而此时恰好由F2持有L2且它也正在尝试获取L1,那么此时就是处于死锁的状态,这是一个最简单的死锁例子,也即所谓的AB-BA死锁。
死锁导致的最终结果无需多说,关于如何避免死锁在教科书上也有提到,最简单直观的做法就是按顺序上锁,以破坏死锁的环形等待条件。但对于拥有成千上万个锁的整个系统来说,完全定义它们之间的顺序是非常困难的,所以一种更可行的办法就是尽量提前发现这其中潜在的死锁风险,而不是等到最后真正出现死锁时给用户带来切实的困惑。
已有很多工具用于发现可能的死锁风险,而本文介绍的调试/检测模块lockdep,即是属于这一类工具的一种。调试模块lockdep从2006年引入内核,经过实践验证,其对提前发现死锁起到了巨大的效果
官方文档有介绍调试模块lockdep的设计原理,这里按照我自己的理解描述一下。
1,lockdep操作的基本单元并非单个的锁实例,而是锁类(lock-class)。比如,struct inode结构体中的自旋锁i_lock字段就代表了这一类锁,而具体每个inode节点的锁只是该类锁中的一个实例。对所有这些实例,lockdep会把它们当作一个整体做处理,即把判断粒度放大,否则对可能有成千上万个的实例进行逐一判断,那处理难度可想而知,而且也没有必要。当然,在具体的处理中,可能会记录某些特性情况下的实例的部分相关信息,以便提供事后问题排查。
2,lockdep跟踪每个锁类的自身状态,也跟踪各个锁类之间的依赖关系,通过一系列的验证规则,以确保锁类状态和锁类之间的依赖总是正确的。另外,锁类一旦在初次使用时被注册,那么后续就会一直存在,所有它的具体实例都会关联到它。
lockdep是linux内核的一个调试模块,用来检查内核互斥机制尤其是自旋锁潜在的死锁问题。自旋锁由于是查询方式等待,不释放处理器,比一般的互斥机制更容易死锁,故引入lockdep检查以下几种情况可能的死锁。
1.同一个进程递归地加锁同一把锁.
2.一把锁既在中断(或中断下半部)使能的情况下执行过加锁操作,又在中断(或中断下半部)里执行过加锁操作。这样该锁有可能在锁定时由于中断发生又试图在同一处理器上加锁,加锁后导致依赖图产生成闭环,这是典型的死锁现象。
二、 lockdep验证规则
(1)单锁状态规则(Single-lock state rules)
1,一个软中断不安全(softirq-unsafe)的锁类同样也是硬中断不安全(hardirq-unsafe)的。
2,对于任何一个锁类,它不可能同时是hardirq-safe和hardirq-unsafe,也不可能同时是softirq-safe和softirq-unsafe,即这两对对应状态是互斥的。
上面这两条就是lockdep判断单锁是否会发生死锁的检测规则。
(2)多锁依赖规则(Multi-lock dependency rules)
1,同一个锁类不能被获取两次,因为这会导致递归死锁。
2,不能以不同的顺序获取两个锁类,即如此这样:
->
->
是不行的。因为这会非常容易的导致本文最先提到的AB-BA死锁。当然,下面这样的情况也不行:
-> -> ->
-> -> ->
即在中间插入了其它正常顺序的锁也能被lockdep检测出来。
3,同一个锁实例在任何两个锁类之间不能出现这样的情况:
->
->
这意味着,如果同一个锁实例,在某些地方是hardirq-safe(即采用spin_lock_irqsave(…)),而在某些地方又是hardirq-unsafe(即采用spin_lock(…)),那么就存在死锁的风险。这应该容易理解,比如在进程上下文中持有锁A,并且锁A是hardirq-unsafe,如果此时触发硬中断,而硬中断处理函数又要去获取锁A,那么就导致了死锁。
在锁类状态发生变化时,进行如下几个规则检测,判断是否存在潜在死锁。比较简单,就是判断hardirq-safe和hardirq-unsafe以及softirq-safe和softirq-unsafe是否发生了碰撞.
三、相关结构体
1.struct held_lock
在每个进程的task_struct结构体中定义了struct held_lock held_locks[MAX_LOCK_DEPTH]成员,用来记录锁。
struct held_lock {
215 /*
216 * One-way hash of the dependency chain up to this point. We
217 * hash the hashes step by step as the dependency chain grows.
218 *
219 * We use it for dependency-caching and we skip detection
220 * passes and dependency-updates if there is a cache-hit, so
221 * it is absolutely critical for 100% coverage of the validator
222 * to have a unique key value for every unique dependency path
223 * that can occur in the system, to make a unique hash value
224 * as likely as possible - hence the 64-bit width.
225 *
226 * The task struct holds the current hash value (initialized
227 * with zero), here we store the previous hash value:
228 */
u64 prev_chain_key;
unsigned long acquire_ip;
struct lockdep_map *instance;
struct lockdep_map *nest_lock;
#ifdef CONFIG_LOCK_STAT
u64 waittime_stamp;
u64 holdtime_stamp;
#endif
unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS;
238 /*
239 * The lock-stack is unified in that the lock chains of interrupt
240 * contexts nest ontop of process context chains, but we 'separate'
241 * the hashes by starting with 0 if we cross into an interrupt
242 * context, and we also keep do not add cross-context lock
243 * dependencies - the lock usage graph walking covers that area
244 * anyway, and we'd just unnecessarily increase the number of
245 * dependencies otherwise. [Note: hardirq and softirq contexts
246 * are separated from each other too.]
247 *
248 * The following field is used to detect when we cross into an
249 * interrupt context:
250 */
unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */
unsigned int trylock:1; /* 16 bits */
unsigned int read:2; /* see lock_acquire() comment */
unsigned int check:2; /* see lock_acquire() comment */
unsigned int hardirqs_off:1;
unsigned int references:11; /* 32 bits */
};
2.lockdep_map
各种锁结构体中如mutex、rawspinlock、semaphore内嵌该结构体,用于对锁检测。
struct lockdep_map {
struct lock_class_key *key;
struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
const char *name;
#ifdef CONFIG_LOCK_STAT
int cpu; //对结构体初始化时所在的cpu号
unsigned long ip;
#endif
};
3.lock_class
struct lock_class {
struct list_head hash_entry;
struct list_head lock_entry;
struct lockdep_subclass_key *key;
unsigned int subclass;
unsigned int dep_gen_id;
unsigned long usage_mask;
struct stack_trace usage_traces[XXX_LOCK_USAGE_STATES];
struct list_head locks_after, locks_before;
unsigned int version;
unsigned long ops;
const char *name;
int name_version;
#ifdef CONFIG_LOCK_STAT
unsigned long contention_point[LOCKSTAT_POINTS];
unsigned long contending_point[LOCKSTAT_POINTS];
#endif
};
4.lock_class_key
struct lock_class_key {
struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
};
5.lockdep_subclass_key
struct lockdep_subclass_key {
char __one_byte;
} __attribute__ ((__packed__));
三、lockdep初始化
建立两个散列表calsshash_table和chainhash_table,并初始化全局变量lockdep_initialized,标志已初始化完成。
static struct list_head classhash_table[CLASSHASH_SIZE];
static struct list_head chainhash_table[CHAINHASH_SIZE];
void lockdep_init(void)
{
int i;
if (lockdep_initialized)
return;
for (i = 0; i < CLASSHASH_SIZE; i++)
INIT_LIST_HEAD(classhash_table + i);
for (i = 0; i < CHAINHASH_SIZE; i++)
INIT_LIST_HEAD(chainhash_table + i);
lockdep_initialized = 1;
}
四、提供接口
1. lockdep_init_map
用于初始化锁内嵌的lockdep_map结构体
static inline void sema_init(struct semaphore *sem, int val)
{
static struct lock_class_key __key;
*sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0);
}
void lockdep_init_map(struct lockdep_map *lock, const char *name,struct lock_class_key *key, int subclass)
{
int i;
//arm上是空函数
kmemcheck_mark_initialized(lock, sizeof(*lock));
//初始化lock_class结构体的class_cache成员
for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
lock->class_cache[i] = NULL;
#ifdef CONFIG_LOCK_STAT
lock->cpu = raw_smp_processor_id();
#endif
//name不能为空
if (DEBUG_LOCKS_WARN_ON(!name)) {
lock->name = "NULL";
return;
}
//设置name
lock->name = name;
//key不能为空
if (DEBUG_LOCKS_WARN_ON(!key))
return;
//对key的地址进行健康检查,确保在内核.data地址空间,percpu空间或者module空间
if (!static_obj(key)) {
printk("BUG: key %p not in .data!\n", key);
DEBUG_LOCKS_WARN_ON(1);
return;
}
//设置key
lock->key = key;
if (unlikely(!debug_locks))
return;
//subclass不为0,将lockdep_map注册到类中
if (subclass)
register_lock_class(lock, subclass, 1);
}
2.
void lock_acquire(struct lockdep_map *lock, unsigned int subclass,int trylock, int read, int check,struct lockdep_map *nest_lock, unsigned long ip)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
//空函数
trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
__lock_acquire(lock, subclass, trylock, read, check,irqs_disabled_flags(flags), nest_lock, ip, 0);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
2. debug_check_no_locks_freed
用于检测一个锁是不是被多次初始化,或者一块内存在释放时还持有锁。
void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
{
struct task_struct *curr = current;
struct held_lock *hlock;
unsigned long flags;
int i;
if (unlikely(!debug_locks))
return;
local_irq_save(flags);
//遍历当前进程所拥有的held_lock
for (i = 0; i < curr->lockdep_depth; i++) {
hlock = curr->held_locks + i;
//检查hlock是否在(mem_from,mem_from+mem_len)区间里,不在此区间则继续循环
if (not_in_range(mem_from, mem_len, hlock->instance,sizeof(*hlock->instance)))
continue;
print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock);
break;
}
local_irq_restore(flags);
}
static inline int not_in_range(const void* mem_from, unsigned long mem_len,
const void* lock_from, unsigned long lock_len)
{
return lock_from + lock_len <= mem_from || mem_from + mem_len <= lock_from;
}
static void print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
const void *mem_to, struct held_lock *hlock)
{
//如果关闭所有lock-debugging,则退出
if (!debug_locks_off())
return;
//
if (debug_locks_silent)
return;
printk("\n");
printk("=========================\n");
printk("[ BUG: held lock freed! ]\n");
print_kernel_ident();
printk("-------------------------\n");
printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
print_lock(hlock);//打印锁信息
lockdep_print_held_locks(curr);
printk("\nstack backtrace:\n");
dump_stack();//打印堆栈信息
}
//Generic 'turn off all lock debugging' function:
int debug_locks_off(void)
{
if (__debug_locks_off()) {
if (!debug_locks_silent) {
console_verbose();
return 1;
}
}
return 0;
}
//debug_locks为1表示打开lock-debugging,为0表示关闭所有lock-debugging
static inline int __debug_locks_off(void)
{
return xchg(&debug_locks, 0);
}
static void print_kernel_ident(void)
{
printk("%s %.*s %s\n", init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version,
print_tainted());
}
static void print_lock(struct held_lock *hlock)
{
print_lock_name(hlock_class(hlock));
printk(", at: ");
print_ip_sym(hlock->acquire_ip);
}
static inline struct lock_class *hlock_class(struct held_lock *hlock)
{
if (!hlock->class_idx) {
DEBUG_LOCKS_WARN_ON(1);
return NULL;
}
return lock_classes + hlock->class_idx - 1;
}
static void print_lock_name(struct lock_class *class)
{
529 char usage[LOCK_USAGE_CHARS];
530
531 get_usage_chars(class, usage);
532
533 printk(" (");
534 __print_lock_name(class);
535 printk("){%s}", usage);
}
static void __print_lock_name(struct lock_class *class)
{
511 char str[KSYM_NAME_LEN];
512 const char *name;
513
514 name = class->name;
515 if (!name) {
516 name = __get_key_name(class->key, str);
517 printk("%s", name);
518 } else {
519 printk("%s", name);
520 if (class->name_version > 1)
521 printk("#%d", class->name_version);
522 if (class->subclass)
523 printk("/%d", class->subclass);
524 }
}
static inline void print_ip_sym(unsigned long ip)
{
printk("[] %pS\n", (void *) ip, (void *) ip);
}
static void lockdep_print_held_locks(struct task_struct *curr)
{
int i, depth = curr->lockdep_depth;
if (!depth) {
printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
return;
}
printk("%d lock%s held by %s/%d:\n",
depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
for (i = 0; i < depth; i++) {
printk(" #%d: ", i);
print_lock(curr->held_locks + i);
}
}
2.
参考http://www.lenky.info/archives/2013/04/2253