hung task机制学习

module_init(hung_task_init);     //Hung_task.c (c:\国嵌\code\linux-ok6410\kernel)
    =>watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");  //创建khungtaskd检测线程
        =>static int watchdog(void *dummy)
            =>for ( ; ; ) {
                unsigned long timeout = sysctl_hung_task_timeout_secs; //默认120s,也就是2分钟

                while (schedule_timeout_interruptible(timeout_jiffies(timeout))) //堵塞等待2分钟,khungtaskd线程处于TASK_INTERRUPTIBLE状态
                    timeout = sysctl_hung_task_timeout_secs;

                check_hung_uninterruptible_tasks(timeout);
                    =>do_each_thread(g, t)  {//遍历进程链表
                        if (t->state == TASK_UNINTERRUPTIBLE) //检测进程是否处于TASK_UNINTERRUPTIBLE
                            check_hung_task(t, timeout);
                                =>switch_count = t->nvcsw + t->nivcsw; //获取进程切换的计数
                                =>if (switch_count != t->last_switch_count) {//如果120s进程切换计数有变化那么证明进程120内有调度,没有死,返回
                                    t->last_switch_count = switch_count;
                                    return;
                                }
                                =>sched_show_task(t);//打印各种异常信息,包括挂死进程的调用栈
                    }while_each_thread(g, t);   //不检测khungtaskd线程自己
            }// for死循环结束

参考文档:
请问进程描述符中 nvcsw和nivcsw的区别是
http://bbs.chinaunix.net/thread-3688431-1-1.html
答案如下:
nvcsw: voluntary context switch
nivcsw: involuntary context switch
A voluntary context switch occurs when a thread blocks because it requires a resource that is unavailable. An involuntary context switch takes place when a thread executes for the duration of its time slice or when the system identifies a higher-priority thread to run.

kernel 3.10内核源码分析–hung task机制
http://blog.csdn.net/wh_19910525/article/details/50503269

案例分享:
http://lists.infradead.org/pipermail/linux-mtd-cvs/2012-November/008218.html
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/fs/jffs2/file.c?id=157078f64b8a9cd7011b6b900b2f2498df850748

如下案例A B形成死锁

1个核的进程在做如下操作
generic_file_aio_read
    =>do_generic_file_read(filp, ppos, &desc, file_read_actor);
        =>error = lock_page_killable(page);  /* Get exclusive access to the page ... */  ////////////(+A)
        =>error = mapping->a_ops->readpage(filp, page);  /* Start the actual read. The read will unlock the page. */
            =>static int jffs2_readpage (struct file *filp, struct page *pg)
                =>mutex_lock(&f->sem);  //////////////////////////////////////////(+B)
                =>ret = jffs2_do_readpage_unlock(pg->mapping->host, pg);

另外一个核的进程做如下操作
jffs2_write_begin
    =>mutex_lock(&f->sem);  ///////////////////////////(+B)
    =>pg = grab_cache_page_write_begin(mapping, index, flags);
        =>page = find_lock_page(mapping, index);
            =>page = find_get_page(mapping, offset);
            if (page) {
                lock_page(page);  //(+A)
                /* Has the page been truncated? */
                if (unlikely(page->mapping != mapping)) {
                    unlock_page(page);
                    page_cache_release(page);
                    goto repeat;
                }
                VM_BUG_ON(page->index != offset);
            }
            return page;

另外一种场景 A锁和C锁形成死锁

generic_file_aio_write
    =>ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
        =>written_buffered = generic_file_buffered_write(iocb, iov,
                        nr_segs, pos, ppos, count,
                        written);
            =>status = generic_perform_write(file, &i, pos);
                =>status = a_ops->write_begin(file, mapping, pos, bytes, flags,
                        &page, &fsdata);
                    =>jffs2_write_begin
                        =>ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
                        =>pg = grab_cache_page_write_begin(mapping, index, flags);
                            =>=>page = find_lock_page(mapping, index);
                                =>page = find_get_page(mapping, offset);
                                if (page) {
                                    lock_page(page);  //(+A)
                                    /* Has the page been truncated? */
                                    if (unlikely(page->mapping != mapping)) {
                                        unlock_page(page);
                                        page_cache_release(page);
                                        goto repeat;
                                    }
                                    VM_BUG_ON(page->index != offset);
                                }
                                return page;
                =>status = a_ops->write_end(file, mapping, pos, bytes, copied,
                        page, fsdata);
                    =>static int jffs2_write_end(struct file *filp, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *pg, void *fsdata)
                        =>ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + aligned_start, (pg->index << PAGE_CACHE_SHIFT) + aligned_start, end - aligned_start, &writtenlen);
                            =>ret = jffs2_reserve_space(c, sizeof(*ri) + JFFS2_MIN_DATA_LEN, &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
                                =>mutex_lock(&c->alloc_sem);  ///////////////////////////////////(C)





jffs2_write_begin
    =>ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
        =>mutex_lock(&c->alloc_sem);  ///////////////////////////////(+C)
    =>mutex_lock(&f->sem);  ///////////////////////////(+B)
    =>pg = grab_cache_page_write_begin(mapping, index, flags);
        =>page = find_lock_page(mapping, index);
            =>page = find_get_page(mapping, offset);
            if (page) {
                lock_page(page);  //(+A)
                /* Has the page been truncated? */
                if (unlikely(page->mapping != mapping)) {
                    unlock_page(page);
                    page_cache_release(page);
                    goto repeat;
                }
                VM_BUG_ON(page->index != offset);
            }
            return page;

第三个案例,还是jffs2
有如下调用栈:

第一个调用栈
__switch_to
schedule
inode_wait
__wait_on_bit
out_of_line_wait_on_bit
ifind_fast
iget_locked
jffs2_iget
jffs2_gc_fetch_inode
jffs2_garbage_collect_pass
jffs2_garbage_collect_thread
kthread
original_kernel_thread


第二个调用栈
__switch_to
schedule
__mutex_lock_slow_path
mutex_lock
jffs2_reserve_space
jffs2_write_inode_range
jffs2_write_end
generic_file_buffered_write
__generic_file_aio_write
generic_file_aio_write
generic_file_aio_write
vfs_write
sys_write
ret_from_syscall

第三个调用栈
__switch_to
schedule
__mutex_lock_interruptible_slowpatch
mutex_lock_interruptible
jffs2_garbage_collect_pass
jffs2_reserve_space
jffs2_do_create
jffs2_create
vfs_create
do_last
do_filp_open
do_sys_open
ret_from_syscall

根据第一个和第二个调用栈可以找到AB锁(根据第一个和第三个也可以找到AB锁),根据第三个调用栈可以找到AB BA死锁

AB锁
jffs2_garbage_collect_thread
    =>if (jffs2_garbage_collect_pass(c) == -ENOSPC)
        =>if (mutex_lock_interruptible(&c->alloc_sem)) /////////////////(+B)
        =>f = jffs2_gc_fetch_inode(c, inum, !nlink);
            =>inode = jffs2_iget(OFNI_BS_2SFFJ(c), inum);
                =>inode = iget_locked(sb, ino);
                    =>inode = ifind_fast(sb, head, ino);
                        =>wait_on_inode(inode);
                            =>wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);////////////////////(+A)
                            注意查看里面代码和注释的内容,不要想当然 wait_on_bit - wait for a bit to be cleared,等待__I_NEW被清零
                                =>return out_of_line_wait_on_bit(word, bit, action, mode);
                                    =>wait_queue_head_t *wq = bit_waitqueue(word, bit);//通过等待队列实现
                                    DEFINE_WAIT_BIT(wait, word, bit);

                                    return __wait_on_bit(wq, &wait, action, mode);

BA锁
jffs2_create
    =>inode = jffs2_new_inode(dir_i, mode, ri);
        =>if (insert_inode_locked(inode) < 0)
            =>inode->i_state |= I_NEW; //////////////////////////////(+A)  设置NEW
        =>ret = jffs2_do_create(c, dir_f, f, ri, &dentry->d_name);
            =>ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
                =>mutex_lock(&c->alloc_sem); ////////////////////(+B)
                =>mutex_unlock(&c->alloc_sem); ///////////////////(-B)
    =>unlock_new_inode(inode);
    =>inode->i_state &= ~I_NEW; ////////////////////////////////(-A)
    wake_up_bit(&inode->i_state, __I_NEW);

经验总结:
把各种调用栈全部看完,画出流程图,不要看到第二个调用栈就不往下看了。
分析wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE); 要细致,看一下到底是清除__I_NEW还是设置__I_NEW;
分析清楚是清除__I_NEW之后,分析一下__I_NEW在什么场景会设置,结合调用栈可以把ABBA锁找到

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值