Linux内核进程详解之三:flush-x:y

上一篇文章《设备文件与设备号》当然不是突然穿插而来的自言自语,而是理解本文的前提,下面来看。flush-x:y是一类进程,这在系列的上一篇文章里已经讲到过,系统的绝大部分的bdi设备都会有对应的flush-x:y内核进程,而这个x:y是对应bdi设备的设备号。
先看一下系统当前挂载的文件系统:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
[root@localhost lenky]# cat /proc/mounts
rootfs / rootfs rw 0 0
/proc /proc proc rw,relatime 0 0
/sys /sys sysfs rw,seclabel,relatime 0 0
udev /dev devtmpfs rw,seclabel,relatime,size=502568k,nr_inodes=125642,mode=755 0 0
devpts /dev/pts devpts rw,seclabel,relatime,gid=5,mode=620,ptmxmode=000 0 0
tmpfs /dev/shm tmpfs rw,seclabel,relatime 0 0
/dev/mapper/VolGroup-lv_root / ext4 rw,seclabel,relatime,barrier=1,data=ordered 0 0
none /selinux selinuxfs rw,relatime 0 0
udev /dev devtmpfs rw,seclabel,relatime,size=502568k,nr_inodes=125642,mode=755 0 0
/proc/bus/usb /proc/bus/usb usbfs rw,relatime 0 0
/dev/sda1 /boot ext4 rw,seclabel,relatime,barrier=1,data=ordered 0 0
/dev/mapper/VolGroup-lv_home /home ext4 rw,seclabel,relatime,barrier=1,data=ordered 0 0
none /proc/sys/fs/binfmt_misc binfmt_misc rw,relatime 0 0
cgroup /cgroup/cpuset cgroup rw,relatime,cpuset 0 0
cgroup /cgroup/cpu cgroup rw,relatime,cpu 0 0
cgroup /cgroup/cpuacct cgroup rw,relatime,cpuacct 0 0
cgroup /cgroup/memory cgroup rw,relatime,memory 0 0
cgroup /cgroup/devices cgroup rw,relatime,devices 0 0
cgroup /cgroup/freezer cgroup rw,relatime,freezer 0 0
cgroup /cgroup/net_cls cgroup rw,relatime,net_cls 0 0
cgroup /cgroup/blkio cgroup rw,relatime,blkio 0 0
sunrpc /var/lib/nfs/rpc_pipefs rpc_pipefs rw,relatime 0 0
/etc/auto.misc /misc autofs rw,relatime,fd=7,pgrp=1393,timeout=300,minproto=5,maxproto=5,indirect 0 0
-hosts /net autofs rw,relatime,fd=13,pgrp=1393,timeout=300,minproto=5,maxproto=5,indirect 0 0
/dev/sdb1 /home/lenky/sdb/sdb1 ext4 rw,seclabel,relatime,barrier=1,data=ordered 0 0
/dev/sdc1 /home/lenky/sdc/sdc1 ext4 rw,seclabel,relatime,barrier=1,data=ordered 0 0
/dev/sdc2 /home/lenky/sdc/sdc2 ext4 rw,seclabel,relatime,barrier=1,data=ordered 0 0
[root@localhost lenky]#

注意需要关注的重点:

/dev/mapper/VolGroup-lv_root / ext4
/dev/mapper/VolGroup-lv_home /home ext4
/dev/sdb1 /home/lenky/sdb/sdb1 ext4
/dev/sdc1 /home/lenky/sdc/sdc1 ext4
/dev/sdc2 /home/lenky/sdc/sdc2 ext4

对应的设备号分别为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
[root@localhost lenky]# ls -l /dev/dm-*
brw-rw----. 1 root disk 253, 0 Jan 12 06:24 /dev/dm-0
brw-rw----. 1 root disk 253, 1 Jan 12 06:24 /dev/dm-1
brw-rw----. 1 root disk 253, 2 Jan 12 06:24 /dev/dm-2
[root@localhost lenky]#
[root@localhost lenky]# ls -l /dev/mapper/*
crw-rw----. 1 root root 10, 236 Jan 12 06:24 /dev/mapper/control
lrwxrwxrwx. 1 root root       7 Jan 12 06:24 /dev/mapper/VolGroup-lv_home -> ../dm-2
lrwxrwxrwx. 1 root root       7 Jan 12 06:24 /dev/mapper/VolGroup-lv_root -> ../dm-0
lrwxrwxrwx. 1 root root       7 Jan 12 06:24 /dev/mapper/VolGroup-lv_swap -> ../dm-1
[root@localhost lenky]#
[root@localhost lenky]# ls -l /dev/sda*
brw-rw----. 1 root disk 8, 0 Jan 12 06:24 /dev/sda
brw-rw----. 1 root disk 8, 1 Jan 12 06:24 /dev/sda1
brw-rw----. 1 root disk 8, 2 Jan 12 06:24 /dev/sda2
[root@localhost lenky]#
[root@localhost lenky]# ls -l /dev/sdb*
brw-rw----. 1 root disk 8, 16 Jan 12 06:25 /dev/sdb
brw-rw----. 1 root disk 8, 17 Jan 12 06:25 /dev/sdb1
[root@localhost lenky]#
[root@localhost lenky]# ls -l /dev/sdc*
brw-rw----. 1 root disk 8, 32 Jan 12 06:29 /dev/sdc
brw-rw----. 1 root disk 8, 33 Jan 12 06:39 /dev/sdc1
brw-rw----. 1 root disk 8, 34 Jan 12 06:29 /dev/sdc2
brw-rw----. 1 root disk 8, 35 Jan 12 06:29 /dev/sdc3
[root@localhost lenky]#

在任意时刻,我们能看到的flush-x:y内核进程并不固定,原因之前已经说过:

1
2
3
4
5
6
[root@localhost lenky]# ps aux | grep flush-
root      1250  0.0  0.0      0     0 ?        S    06:24   0:00 [flush-253:0]
root      2180  0.0  0.0      0     0 ?        S    06:39   0:00 [flush-253:2]
root      2186  2.0  0.0      0     0 ?        S    06:39   0:07 [flush-8:32]
root      2329  0.0  0.0 103204   800 pts/3    S+   06:45   0:00 grep flush-
[root@localhost lenky]#

调用sync命令,强制同步操作会创建所有对应的flush-x:y内核进程:

1
2
3
4
5
6
7
8
9
[root@localhost lenky]# sync
[root@localhost lenky]# ps aux | grep flush-
root      1250  0.0  0.0      0     0 ?        S    06:24   0:00 [flush-253:0]
root      2180  0.0  0.0      0     0 ?        S    06:39   0:00 [flush-253:2]
root      2186  2.0  0.0      0     0 ?        S    06:39   0:07 [flush-8:32]
root      2331  0.0  0.0      0     0 ?        S    06:45   0:00 [flush-8:0]
root      2332  0.0  0.0      0     0 ?        S    06:45   0:00 [flush-8:16]
root      2334  0.0  0.0 103204   800 pts/3    S+   06:45   0:00 grep flush-
[root@localhost lenky]#

可以看到flush-x:y内核进程是对应bdi整个设备的,比如这里的单个磁盘,而不是各个磁盘分区。
最后来看代码,flush-x:y内核进程的主体函数是bdi_writeback_thread(…)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/*
  * Handle writeback of dirty data for the device backed by this bdi. Also
  * wakes up periodically and does kupdated style flushing.
  */
int bdi_writeback_thread( void *data)
{
     struct bdi_writeback *wb = data;
     struct backing_dev_info *bdi = wb->bdi;
     long pages_written;
 
     current->flags |= PF_SWAPWRITE;
     set_freezable();
     wb->last_active = jiffies;
 
     /*
      * Our parent may run at a different priority, just set us to normal
      */
     set_user_nice(current, 0);
 
     trace_writeback_thread_start(bdi);
 
     while (!kthread_should_stop()) {
         /*
          * Remove own delayed wake-up timer, since we are already awake
          * and we'll take care of the preriodic write-back.
          */
         del_timer(&wb->wakeup_timer);
 
         pages_written = wb_do_writeback(wb, 0);
 
         trace_writeback_pages_written(pages_written);
 
         if (pages_written)
             wb->last_active = jiffies;
 
         set_current_state(TASK_INTERRUPTIBLE);
         if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
             __set_current_state(TASK_RUNNING);
             continue ;
         }
 
         if (wb_has_dirty_io(wb) && dirty_writeback_interval)
             schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
         else {
             /*
              * We have nothing to do, so can go sleep without any
              * timeout and save power. When a work is queued or
              * something is made dirty - we will be woken up.
              */
             schedule();
         }
 
         try_to_freeze();
     }
 
     /* Flush any work that raced with us exiting */
     if (!list_empty(&bdi->work_list))
         wb_do_writeback(wb, 1);
 
     trace_writeback_thread_stop(bdi);
     return 0;
}

函数主体是一个while循环,while语句调用一个判断函数决定是否该结束循环:

1
2
3
4
5
6
7
8
9
10
11
12
/**
  * kthread_should_stop - should this kthread return now?
  *
  * When someone calls kthread_stop() on your kthread, it will be woken
  * and this will return true.  You should then return, and your return
  * value will be passed through to kthread_stop().
  */
int kthread_should_stop( void )
{
     return to_kthread(current)->should_stop;
}
EXPORT_SYMBOL(kthread_should_stop);

而这个should_stop标记字段会在bdi-default内核进程的KILL_THREAD动作里进行修改(上一篇文章提到过),也就是通过这个字段实现bdi-default内核进程对flush-x:y内核进程的控制:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
         case KILL_THREAD:
             __set_current_state(TASK_RUNNING);
             kthread_stop(task);
             break ;
 
/**
  * kthread_stop - stop a thread created by kthread_create().
  * @k: thread created by kthread_create().
  *
  * Sets kthread_should_stop() for @k to return true, wakes it, and
  * waits for it to exit. This can also be called after kthread_create()
  * instead of calling wake_up_process(): the thread will exit without
  * calling threadfn().
  *
  * If threadfn() may call do_exit() itself, the caller must ensure
  * task_struct can't go away.
  *
  * Returns the result of threadfn(), or %-EINTR if wake_up_process()
  * was never called.
  */
int kthread_stop( struct task_struct *k)
{
     struct kthread *kthread;
     int ret;
 
     trace_sched_kthread_stop(k);
     get_task_struct(k);
 
     kthread = to_kthread(k);
     barrier(); /* it might have exited */
     if (k->vfork_done != NULL) {
         kthread->should_stop = 1;
         wake_up_process(k);
         wait_for_completion(&kthread->exited);
     }
     ret = k->exit_code;
 
     put_task_struct(k);
     trace_sched_kthread_stop_ret(ret);
 
     return ret;
}
EXPORT_SYMBOL(kthread_stop);

while循环内的工作,除去其它细节,值得关注的主要有三点:第一,修改最后活动时间(语句:wb->last_active = jiffies;),这样bdi-default内核进程才能通过last_active这个字段来判断flush-x:y内核进程的活动状态,如果很久没有活动(比较的就是last_active字段)则把它kill掉;第二,当然就是进程的主要工作,调用函数wb_do_writeback(…)进行同步操作;第三,如果在进行一次同步操作之后,又有新的脏数据需要同步,那么先睡眠,等间隔时间(默认5秒)后超时醒来继续工作;如果已经没有脏数据需要同步,那么直接schedule()调度其它进程,而进程本身进入可中断睡眠状态(注意前面的语句:set_current_state(TASK_INTERRUPTIBLE);),等待后续被唤醒继续工作或被kill掉。
整个bdi-default和flush-x:y内核进程讲完了,为什么会有这样的设计?在这里有很好的说明:http://lwn.net/Articles/396757/,相比以前的多个pdflush间隔醒来,改进之后只需bdi-default一个内核进程间隔醒来就行了,这在电池供电设备上明显比较省电。

转载请保留地址:http://lenky.info/2012/02/18/linux%e5%86%85%e6%a0%b8%e8%bf%9b%e7%a8%8b%e8%af%a6%e8%a7%a3%e4%b9%8b%e4%b8%89%ef%bc%9aflush-xy/http://lenky.info/?p=1138


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值