引子:
运行一个tty相关的应用程序,出现softlockup,有关软件看门狗的原理,看另一文章。有解决这个问题,有些信息是必须的:
1.log 要打印出时间戳;
2.判断是否死锁在某个信号量,或者说看某个进程卡在哪里不调度,ps -l可以看出没个进程的进入和退出时间,不知为什么不支持;
另外的方法,打开ftrace让它监测sched_switch 事件也是可以的。
<idle>-0 [000] 52.214280: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 52.214299: sched_switch: 7:0:S ==> 0:120: swapper/0
<idle>-0 [000] 56.214278: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 56.214295: sched_switch: 7:0:S ==> 0:120: swapper/0
<idle>-0 [000] 60.214274: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 60.214290: sched_switch: 7:0:S ==> 0:120: swapper/0
<idle>-0 [000] 64.214280: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 64.214299: sched_switch: 7:0:S ==> 0:120: swapper/0
<idle>-0 [000] 68.214282: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 68.214302: sched_switch: 7:0:S ==> 0:120: swapper/0
<idle>-0 [000] 72.214282: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 72.214301: sched_switch: 7:0:S ==> 0:120: swapper/0
<idle>-0 [000] 76.214300: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 76.214315: sched_switch: 7:0:S ==> 644:120: krpcserversd
<idle>-0 [000] 80.214283: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 80.214302: sched_switch: 7:0:S ==> 0:120: swapper/0
<idle>-0 [000] 84.214279: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 84.214298: sched_switch: 7:0:S ==> 0:120: swapper/0
<idle>-0 [000] 88.214424: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 88.214531: sched_switch: 7:0:S ==> 0:120: swapper/0
<idle>-0 [000] 92.214418: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 92.214523: sched_switch: 7:0:S ==> 0:120: swapper/0
<idle>-0 [000] 96.214417: sched_switch: 0:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 96.214515: sched_switch: 7:0:S ==> 0:120: swapper/0
tty_trans-4114 [000] 100.214588: sched_switch: 4114:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 100.227769: sched_switch: 7:0:S ==> 765:120: kworker/0:2
tty_trans-4114 [000] 104.221093: sched_switch: 4114:120:R ==> 7:0: watchdog/0
watchdog/0-7 [000] 104.224154: sched_switch: 7:0:S ==> 765:120: kworker/0:2
看上面的 sched_switch, watchdog/0每4s被调度一次,不是因为某个进程运行时间过程导致watchdog/0没被调度。
从back trace 看:在中断处理函数smd_ch_irq_tasklet_handler运行时,软件看门狗超时函数watchdog_timer_fn被调用。
是不是在smd_ch_irq_tasklet_handler引起的超时?加入log验证一下。
打印信息的方法,是最原始的,也是最能解决问题的。
[ 132.223135:0] [<c007be2c>] (watchdog_timer_fn+0x0/0x1c8) from [<c004bfb0>] (__run_hrtimer+0x158/0x258)
[ 132.223152:0] [<c004be58>] (__run_hrtimer+0x0/0x258) from [<c004c34c>] (hrtimer_interrupt+0x11c/0x270)
[ 132.223170:0] [<c004c230>] (hrtimer_interrupt+0x0/0x270) from [<c001480c>] (twd_handler+0x40/0x4c)
[ 132.223188:0] [<c00147cc>] (twd_handler+0x0/0x4c) from [<c007f710>] (handle_percpu_devid_irq+0x108/0x1b4)
[ 132.223199:0] r4:c0727320 r3:00000000
[ 132.223216:0] [<c007f608>] (handle_percpu_devid_irq+0x0/0x1b4) from [<c007c408>] (generic_handle_irq+0x38/0x40)
[ 132.223234:0] [<c007c3d0>] (generic_handle_irq+0x0/0x40) from [<c000f4a0>] (handle_IRQ+0x88/0xc8)
[ 132.223245:0] r4:0000001d r3:00000108
[ 132.223261:0] [<c000f418>] (handle_IRQ+0x0/0xc8) from [<c00084a4>] (gic_handle_irq+0x4c/0x6c)
[ 132.223271:0] r5:ed97dce8 r4:fc040100
[ 132.223287:0] [<c0008458>] (gic_handle_irq+0x0/0x6c) from [<c000df40>] (__irq_svc+0x40/0x70)
[ 132.223299:0] Exception stack(0xed97dce8 to 0xed97dd30)
[ 132.223310:0] dce0: c08c2fb0 20000113 00000100 00000000 00000005 20000113
[ 132.223325:0] dd00: ee22d168 ee22d000 c0737658 00000000 009ad000 ed97dd3c ed97dd40 ed97dd30
[ 132.223337:0] dd20: c029d438 c0550a00 60000113 ffffffff
[ 132.223345:0] r5:60000113 r4:c0550a00
[ 132.223368:0] [<c05509d4>] (_raw_spin_unlock_irqrestore+0x0/0x54) from [<c029d438>] (smd_send_intr+0x98/0x1e0)
[ 132.223386:0] [<c029d3a0>] (smd_send_intr+0x0/0x1e0) from [<c029dc2c>] (smd_ch_irq_tasklet_handler+0x88/0xc0)
[ 132.223398:0] r5:ed541800 r4:00000002
[ 132.223420:0] [<c029dba4>] (smd_ch_irq_tasklet_handler+0x0/0xc0) from [<c002ffd8>] (tasklet_action+0xa8/0x12c)
加入log打印信息如下:
[ 110.925525:0] card was SDHCI_INT_CARD_INSERT
[ 119.074283:1] mmc1: Timeout waiting for hardware interrupt.
[ 132.214372:0] BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:2:765]
在中断处理函数的下半部smd_ch_irq_tasklet_handler阻塞了132-107=25s, 软件看门狗超时:
BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:2:765].
mvsd_timeout_timer:
pr_err("%s: Timeout waiting for hardware interrupt.\n",mmc_hostname(host->mmc));
* tty_prepare_flip_string -> tty_buffer_request_room -> tty_buffer_find -> tty_buffer_alloc
* 函数tty_buffer_alloc里判断tty_bufhead:memory_used的大小
* if (tty->buf.memory_used + size > 65536) return NULL;就是tty_prepare_flip_string返回值是0
* smd_stream_read读出的数据大小是0,当再次smd_stream_read_avail原来的大小,这样就形成了dead loop
**/
smd_ch_irq_tasklet_handler{
for (;;) {
avail = smd_stream_read_avail(tty_info->ch);
if (avail == 0)
break;
avail = tty_prepare_flip_string(tty, &ptr, avail);
smd_stream_read(tty_info->ch,ptr,avail);
smd_send_intr(tty_info->a2b_int_rx);
tty_flip_buffer_push(tty);
}
}
到这里我们终于找到了发生问题的位置和原因,下面看怎样解决这个问题。
现在的问题是看tty_bufhead:memory_used变量的变化
/*tty->buf.memory_used的生命周期*这里对应的数据结构是tty_bufhead,不是tty_buffer
**/
tty_buffer_init(struct tty_struct *tty) -> tty->buf.memory_used = 0;
struct tty_bufhead {
struct work_struct work;
spinlock_t lock;
struct tty_buffer *head; /* Queue head */
struct tty_buffer *tail; /* Active buffer */
struct tty_buffer *free; /* Free queue head */
int memory_used; /* Buffer space used excluding free queue */
};
struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
-> tty->buf.memory_used += t->size;
struct tty_buffer *tty_buffer_alloc(struct tty_struct *tty, size_t size)
-> tty->buf.memory_used += size;
/*哪里触发memory_used的增加?*/
tty_prepare_flip_string -> tty_buffer_request_room -> tty_buffer_find{
1. find a free; or
2. create a new memory: tty_buffer_alloc
}
/*哪里触发memory_used的减少?*/
tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b)-> tty->buf.memory_used -= b->size;
/*问题变成了哪里调用了tty_buffer_free
*在work function:flush_to_ldisc中通过变量tty_bufhead: head遍历tty_buffer*当某个tty_buffer的commit == read时,释放 该tty_buffer and 减少memory_used
**/
void flush_to_ldisc(struct work_struct *work)
{
struct tty_buffer *head;
while ((head = tty->buf.head) != NULL) {
int count;
char *char_buf;
unsigned char *flag_buf;
/*当每个buffer的commit = read时,释放memory,memory_used减少*/
count = head->commit - head->read;
if (!count) {
if (head->next == NULL)
break;
tty->buf.head = head->next;
tty_buffer_free(tty, head);
continue;
}
}
}
/*是不是tty_buffer的 commit, read的问题?
**//*tty_buffer的commit的变化
*在分配 tty_buffer时,从free tty_buffer申请时,初始化为 0
*commit变量表示提交行规的大小,used表示buffer使用了多少,当flip到行规时 used -> commit
*且提交是总是使用 tty_bufhead: tail 指向的tty_buffer
**/
struct tty_buffer {
struct tty_buffer *next;
char *char_buf_ptr;
unsigned char *flag_buf_ptr;
int used;
int size;
int commit;
int read;
/* Data points here */
unsigned long data[0];
};
static struct tty_buffer *tty_buffer_alloc(struct tty_struct *tty, size_t size)
{
struct tty_buffer *p;
if (tty->buf.memory_used + size > 65536){
pr_err(">64K\n");
return NULL;
}
p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC);
if (p == NULL)
return NULL;
p->used = 0;
p->size = size;
p->next = NULL;
p->commit = 0;
p->read = 0;
p->char_buf_ptr = (char *)(p->data);
p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size;
tty->buf.memory_used += size;
return p;
}
static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size)
{
struct tty_buffer **tbh = &tty->buf.free;
while ((*tbh) != NULL) {
struct tty_buffer *t = *tbh;
if (t->size >= size) {
*tbh = t->next;
t->next = NULL;
t->used = 0;
t->commit = 0;
t->read = 0;
tty->buf.memory_used += t->size;
return t;
}
tbh = &((*tbh)->next);
}
/* Round the buffer size out */
size = (size + 0xFF) & ~0xFF;
return tty_buffer_alloc(tty, size);
/* Should possibly check if this fails for the largest buffer we
have queued and recycle that ? */
}
/*这里可以看出不管是从 free中寻找还是create new,不管怎样最后得到tty_bufhead:tail
* t->used = 0;
* t->commit = 0;
*/
int tty_buffer_request_room(struct tty_struct *tty, size_t size)
{
struct tty_buffer *b, *n;
/*如果当前tty_struct:tty_bufhead:tail满足要求*/
if ((b = tty->buf.tail) != NULL)
left = b->size - b->used;
else
left = 0;
/*不满足则从free中寻找或创建*/
if (left < size) {
tty_buffer_find(tty, size));
}
int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars,
size_t size)
{
int space = tty_buffer_request_room(tty, size);
if (likely(space)) {/*tty_buffer_request_room得到一个tty_buffer*/
struct tty_buffer *tb = tty->buf.tail;
*chars = tb->char_buf_ptr + tb->used;
memset(tb->flag_buf_ptr + tb->used, TTY_NORMAL, space);
tb->used += space;/*改变了tty_buffer: used*/
}
return space;
}
/*在把tty_buffer的数据push到行规之前,used -> commit:used表示 buffer使用了多少。commit
*表示提交到行规多少。如果没有提交flip动作,两者是不等的;用途也不同。
*/
tty_flip_buffer_push(struct tty_struct *tty) ->
tty->buf.tail->commit = tty->buf.tail->used;
当调用tty_buffer_free时,把buffer释放掉或者放入free queue, 当再从free queue中取出时,就初始化为0.
/*tty_buffer: read
*tty_buffer: head初始化为0,在这里head->read += count;表示flush多少数据到行规
*哪里阻碍了head->read += count导致 [head->commit-head->read != 0]?
*/
void flush_to_ldisc(struct work_struct *work){
while ((head = tty->buf.head) != NULL) {
int count;
char *char_buf;
unsigned char *flag_buf;
count = head->commit - head->read;
if (!count) {
if (head->next == NULL)
break;
tty->buf.head = head->next;
tty_buffer_free(tty, head);
continue;
}
/* Ldisc or user is trying to flush the buffers
we are feeding to the ldisc, stop feeding the
line discipline as we want to empty the queue */
if (test_bit(TTY_FLUSHPENDING, &tty->flags))
break;
if (!tty->receive_room)
break;
if (count > tty->receive_room)
count = tty->receive_room;
char_buf = head->char_buf_ptr + head->read;
flag_buf = head->flag_buf_ptr + head->read;
head->read += count;
spin_unlock_irqrestore(&tty->buf.lock, flags);
disc->ops->receive_buf(tty, char_buf,flag_buf, count);
spin_lock_irqsave(&tty->buf.lock, flags);
}
}
/*这里有几个位置阻碍flush到行规的过程*/
tty_struct: receive_roomif (!tty->receive_room) break;
/*tty->receive_room: 赋值*/
void n_tty_set_room(struct tty_struct *tty)
{
int left = N_TTY_BUF_SIZE - tty->read_cnt - 1;
tty->receive_room = left;
}
/*tty_struct: receive_buf关联read_cnt
*read_cnt在函数n_tty_receive_buf中增大
**/
static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp,
char *fp, int count)
{
const unsigned char *p;
char *f, flags = TTY_NORMAL;
int i;
char buf[64];
if (tty->real_raw) {
spin_lock_irqsave(&tty->read_lock, cpuflags);
i = min(N_TTY_BUF_SIZE - tty->read_cnt,
N_TTY_BUF_SIZE - tty->read_head);
i = min(count, i);
memcpy(tty->read_buf + tty->read_head, cp, i);
tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
tty->read_cnt += i;
cp += i;
count -= i;
i = min(N_TTY_BUF_SIZE - tty->read_cnt,
N_TTY_BUF_SIZE - tty->read_head);
i = min(count, i);
memcpy(tty->read_buf + tty->read_head, cp, i);
tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1);
/*这里增加 read_head and read_cnt*/
tty->read_cnt += i;
spin_unlock_irqrestore(&tty->read_lock, cpuflags);
}
n_tty_set_room(tty);
if (tty->receive_room < TTY_THRESHOLD_THROTTLE)
tty_throttle(tty);
}
#define N_TTY_BUF_SIZE 4096
/*n_tty_read->copy_from_read_buf艰辛read_cnt*/
n_tty_read(struct tty_struct *tty, struct file *file,
unsigned char __user *buf, size_t nr)
-> copy_from_read_buf(tty, &b, &nr);
copy_from_read_buf(struct tty_struct *tty,unsigned char __user **b,size_t *nr){
copy_to_user(*b, &tty->read_buf[tty->read_tail], n);
tty->read_tail = (tty->read_tail + n) & (N_TTY_BUF_SIZE-1);
tty->read_cnt -= n;
}
结论
/*现在问题清楚了:如果没有及时调用read function,tty_struct:receive_room==0,使tty_buffer不能释放,memory_used不变小;*另外,如果需要申请的 tty_buffer很大,且所在的位置在中断上下文等,使没有机会释放memory,导致 memory_used大于规定的阀值。
**/
解决的办法:
1】把数据及时读走;
2】修改死循环的函数,使其在有问题的情况下也能跳出。