module_init(hung_task_init); //Hung_task.c (c:\国嵌\code\linux-ok6410\kernel)
=>watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); //创建khungtaskd检测线程
=>static int watchdog(void *dummy)
=>for ( ; ; ) {
unsigned long timeout = sysctl_hung_task_timeout_secs; //默认120s,也就是2分钟
while (schedule_timeout_interruptible(timeout_jiffies(timeout))) //堵塞等待2分钟,khungtaskd线程处于TASK_INTERRUPTIBLE状态
timeout = sysctl_hung_task_timeout_secs;
check_hung_uninterruptible_tasks(timeout);
=>do_each_thread(g, t) {//遍历进程链表
if (t->state == TASK_UNINTERRUPTIBLE) //检测进程是否处于TASK_UNINTERRUPTIBLE
check_hung_task(t, timeout);
=>switch_count = t->nvcsw + t->nivcsw; //获取进程切换的计数
=>if (switch_count != t->last_switch_count) {//如果120s进程切换计数有变化那么证明进程120内有调度,没有死,返回
t->last_switch_count = switch_count;
return;
}
=>sched_show_task(t);//打印各种异常信息,包括挂死进程的调用栈
}while_each_thread(g, t); //不检测khungtaskd线程自己
}// for死循环结束
参考文档:
请问进程描述符中 nvcsw和nivcsw的区别是
http://bbs.chinaunix.net/thread-3688431-1-1.html
答案如下:
nvcsw: voluntary context switch
nivcsw: involuntary context switch
A voluntary context switch occurs when a thread blocks because it requires a resource that is unavailable. An involuntary context switch takes place when a thread executes for the duration of its time slice or when the system identifies a higher-priority thread to run.
kernel 3.10内核源码分析–hung task机制
http://blog.csdn.net/wh_19910525/article/details/50503269
案例分享:
http://lists.infradead.org/pipermail/linux-mtd-cvs/2012-November/008218.html
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/fs/jffs2/file.c?id=157078f64b8a9cd7011b6b900b2f2498df850748
如下案例A B形成死锁
1个核的进程在做如下操作
generic_file_aio_read
=>do_generic_file_read(filp, ppos, &desc, file_read_actor);
=>error = lock_page_killable(page); /* Get exclusive access to the page ... */ ////////////(+A)
=>error = mapping->a_ops->readpage(filp, page); /* Start the actual read. The read will unlock the page. */
=>static int jffs2_readpage (struct file *filp, struct page *pg)
=>mutex_lock(&f->sem); //////////////////////////////////////////(+B)
=>ret = jffs2_do_readpage_unlock(pg->mapping->host, pg);
另外一个核的进程做如下操作
jffs2_write_begin
=>mutex_lock(&f->sem); ///////////////////////////(+B)
=>pg = grab_cache_page_write_begin(mapping, index, flags);
=>page = find_lock_page(mapping, index);
=>page = find_get_page(mapping, offset);
if (page) {
lock_page(page); //(+A)
/* Has the page been truncated? */
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
page_cache_release(page);
goto repeat;
}
VM_BUG_ON(page->index != offset);
}
return page;
另外一种场景 A锁和C锁形成死锁
generic_file_aio_write
=>ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
=>written_buffered = generic_file_buffered_write(iocb, iov,
nr_segs, pos, ppos, count,
written);
=>status = generic_perform_write(file, &i, pos);
=>status = a_ops->write_begin(file, mapping, pos, bytes, flags,
&page, &fsdata);
=>jffs2_write_begin
=>ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
=>pg = grab_cache_page_write_begin(mapping, index, flags);
=>=>page = find_lock_page(mapping, index);
=>page = find_get_page(mapping, offset);
if (page) {
lock_page(page); //(+A)
/* Has the page been truncated? */
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
page_cache_release(page);
goto repeat;
}
VM_BUG_ON(page->index != offset);
}
return page;
=>status = a_ops->write_end(file, mapping, pos, bytes, copied,
page, fsdata);
=>static int jffs2_write_end(struct file *filp, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *pg, void *fsdata)
=>ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + aligned_start, (pg->index << PAGE_CACHE_SHIFT) + aligned_start, end - aligned_start, &writtenlen);
=>ret = jffs2_reserve_space(c, sizeof(*ri) + JFFS2_MIN_DATA_LEN, &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
=>mutex_lock(&c->alloc_sem); ///////////////////////////////////(C)
jffs2_write_begin
=>ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
=>mutex_lock(&c->alloc_sem); ///////////////////////////////(+C)
=>mutex_lock(&f->sem); ///////////////////////////(+B)
=>pg = grab_cache_page_write_begin(mapping, index, flags);
=>page = find_lock_page(mapping, index);
=>page = find_get_page(mapping, offset);
if (page) {
lock_page(page); //(+A)
/* Has the page been truncated? */
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
page_cache_release(page);
goto repeat;
}
VM_BUG_ON(page->index != offset);
}
return page;
第三个案例,还是jffs2
有如下调用栈:
第一个调用栈
__switch_to
schedule
inode_wait
__wait_on_bit
out_of_line_wait_on_bit
ifind_fast
iget_locked
jffs2_iget
jffs2_gc_fetch_inode
jffs2_garbage_collect_pass
jffs2_garbage_collect_thread
kthread
original_kernel_thread
第二个调用栈
__switch_to
schedule
__mutex_lock_slow_path
mutex_lock
jffs2_reserve_space
jffs2_write_inode_range
jffs2_write_end
generic_file_buffered_write
__generic_file_aio_write
generic_file_aio_write
generic_file_aio_write
vfs_write
sys_write
ret_from_syscall
第三个调用栈
__switch_to
schedule
__mutex_lock_interruptible_slowpatch
mutex_lock_interruptible
jffs2_garbage_collect_pass
jffs2_reserve_space
jffs2_do_create
jffs2_create
vfs_create
do_last
do_filp_open
do_sys_open
ret_from_syscall
根据第一个和第二个调用栈可以找到AB锁(根据第一个和第三个也可以找到AB锁),根据第三个调用栈可以找到AB BA死锁
AB锁
jffs2_garbage_collect_thread
=>if (jffs2_garbage_collect_pass(c) == -ENOSPC)
=>if (mutex_lock_interruptible(&c->alloc_sem)) /////////////////(+B)
=>f = jffs2_gc_fetch_inode(c, inum, !nlink);
=>inode = jffs2_iget(OFNI_BS_2SFFJ(c), inum);
=>inode = iget_locked(sb, ino);
=>inode = ifind_fast(sb, head, ino);
=>wait_on_inode(inode);
=>wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);////////////////////(+A)
注意查看里面代码和注释的内容,不要想当然 wait_on_bit - wait for a bit to be cleared,等待__I_NEW被清零
=>return out_of_line_wait_on_bit(word, bit, action, mode);
=>wait_queue_head_t *wq = bit_waitqueue(word, bit);//通过等待队列实现
DEFINE_WAIT_BIT(wait, word, bit);
return __wait_on_bit(wq, &wait, action, mode);
BA锁
jffs2_create
=>inode = jffs2_new_inode(dir_i, mode, ri);
=>if (insert_inode_locked(inode) < 0)
=>inode->i_state |= I_NEW; //////////////////////////////(+A) 设置NEW
=>ret = jffs2_do_create(c, dir_f, f, ri, &dentry->d_name);
=>ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
=>mutex_lock(&c->alloc_sem); ////////////////////(+B)
=>mutex_unlock(&c->alloc_sem); ///////////////////(-B)
=>unlock_new_inode(inode);
=>inode->i_state &= ~I_NEW; ////////////////////////////////(-A)
wake_up_bit(&inode->i_state, __I_NEW);
经验总结:
把各种调用栈全部看完,画出流程图,不要看到第二个调用栈就不往下看了。
分析wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE); 要细致,看一下到底是清除__I_NEW还是设置__I_NEW;
分析清楚是清除__I_NEW之后,分析一下__I_NEW在什么场景会设置,结合调用栈可以把ABBA锁找到