resync:由成员磁盘计算出校验磁盘的过程,即初始化时需要同步;
recovery:当成员磁盘出现故障,由其它的磁盘和校验盘进行计算获得该故障磁盘数据的过程;
md_ioctl()函数中对SET_DISK_FAULTY的处理会调用set_disk_faulty()函数,该函数的参数:mddev:指向md设备的描述符指针;dev:md设备的设备号。从函数名可以看出,该函数是要设置该成员磁盘的故障标志位;
static int set_disk_faulty(mddev_t *mddev, dev_t dev)
{
mdk_rdev_t *rdev;
if (mddev->pers == NULL)
return -ENODEV;
rdev = find_rdev(mddev, dev); //在MD设备的成员磁盘链表中查找设备号为dev的成员磁盘
if (!rdev)
return -ENODEV;
md_error(mddev, rdev); //
return 0;
}
void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
{
if (!mddev) {
MD_BUG();
return;
}
if (!rdev || test_bit(Faulty, &rdev->flags)) //如果该成员磁盘已经被标识为故障设备,则直接返回
return;
if (mddev->external) //如果该MD设备是外部管理,则设置该成员磁盘的Blocked标志位,表示该成员磁盘不能写,
set_bit(Blocked, &rdev->flags); //直到该标志位被清除,也就是阻塞的意思
if (!mddev->pers) //没有定义个性,直接返回
return;
if (!mddev->pers->error_handler) //个性中没有定义错误处理的方法,直接返回
return;
mddev->pers->error_handler(mddev,rdev); //调用个性的错误处理方法
if (mddev->degraded)
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(StateChanged, &rdev->flags);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread); //唤醒管理线程,对于raid5,该线程就是raid5d
md_new_event_inintr(mddev);
}
md_error()函数的参数:mddev:指向MD设备的描述符指针;rdev:指向故障成员磁盘的描述符指针。该函数首先会调用个性的错误处理函数进行处理,然后设置该故障磁盘的StateChanged标志位,同时设置->recovery的MD_RECOVERY_INTR和MD_RECOVERY_NEEDED标志位,然后唤醒管理线程进行下一步处理。对于raid5,pers->error_handler被实例化为error()函数。管理线程被实例化为raid5d线程。下面分别看这两个函数。
static void error(mddev_t *mddev, mdk_rdev_t *rdev)
{
char b[BDEVNAME_SIZE];
raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
pr_debug("raid5: error called\n");
if (!test_bit(Faulty, &rdev->flags)) { //如果该成员磁盘还没有设置Faulty标志位
set_bit(MD_CHANGE_DEVS, &mddev->flags); //设置MD设备的MD_CHANGE_DEVS标志位,该位表示阵列中有些磁盘状态改变了
if (test_and_clear_bit(In_sync, &rdev->flags)) { //如果该成员磁盘是In_sync(表示和阵列中的其他磁盘同步),则清除之
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++; //降级磁盘计数器加1
spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery was running, make sure it aborts.
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery); //设置->recovery的MD_RECOVERY_INTR标志位,要中断修复的操作
}
set_bit(Faulty, &rdev->flags); //设置该成员磁盘Faulty
printk(KERN_ALERT
"raid5: Disk failure on %s, disabling device.\n"
"raid5: Operation continuing on %d devices.\n",
bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
}
}
error()函数的参数:mddev:指向MD设备的描述符指针;rdev:指向故障成员磁盘的描述符指针。该函数主要就是设置一些标志位,标志该成员磁盘故障、阵列中的磁盘状态变化、以及及时中断正在执行的修复操作。而raid5d线程则会调用md_check_recovery()函数,我们来看看这个函数:
/*该函数主要是处理同步和super-block的更新。
* This routine is regularly called by all per-raid-array threads to
* deal with generic issues like resync and super-block update.
* Raid personalities that don't have a thread (linear/raid0) do not
* need this as they never do any recovery or update the superblock.
*它其实并没有自己进行同步处理,而是通过创建其它的线程来处理
* It does not do any resync itself, but rather "forks" off other threads
* to do that as needed.
* When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in
* "->recovery" and create a thread at ->sync_thread.
* When the thread finishes it sets MD_RECOVERY_DONE
* and wakeups up this thread which will reap the thread and finish up.
* This thread also removes any faulty devices (with nr_pending == 0).
*
* The overall approach is:
* 1/ if the superblock needs updating, update it. 1.更新super-block
* 2/ If a recovery thread is running, don't do anything else.
* 3/ If recovery has finished, clean up, possibly marking spares active.
* 4/ If there are any faulty devices, remove them.
* 5/ If array is degraded, try to add spares devices
* 6/ If array has spares or is not in-sync, start a resync thread.
*/
void md_check_recovery(mddev_t *mddev)
{
mdk_rdev_t *rdev;
if (mddev->bitmap)
bitmap_daemon_work(mddev->bitmap); //可能是完成bitmap的冲刷进磁盘
if (mddev->ro)
return;
if (signal_pending(current)) {
if (mddev->pers->sync_request && !mddev->external) {
printk(KERN_INFO "md: %s in immediate safe mode\n",
mdname(mddev));
mddev->safemode = 2;
}
flush_signals(current);
}
if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return;
if ( ! (
(mddev->flags && !mddev->external) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
(mddev->external == 0 && mddev->safemode == 1) ||
(mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
&& !mddev->in_sync && mddev->recovery_cp == MaxSector)
))
return;
if (mddev_trylock(mddev)) {
int spares = 0;
if (mddev->ro) {
/* Only thing we do on a ro array is remove
* failed devices.
*/
remove_and_add_spares(mddev);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock;
}