1、校验和检查,如果不一致,则通过计算获取正确值,再写入到磁盘中,这里只根据状态设置相应的标志位,以便后续的操作:
static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
struct stripe_head_state *s,
struct r6_state *r6s, int disks)
{
int pd_idx = sh->pd_idx;
int qd_idx = sh->qd_idx;
struct r5dev *dev;
set_bit(STRIPE_HANDLE, &sh->state);
BUG_ON(s->failed > 2);
/* Want to check and possibly repair P and Q. 想检查P,可能修复P和Q
* However there could be one 'failed' device, in which 但是,可能只有一个坏盘,这样的情况下,我们能检查P和Q中的一个,
* case we can only check one of them, possibly using the 可能使用另一个来恢复丢失的数据。
* other to generate missing data
*/
switch (sh->check_state) {
case check_state_idle: /*起始状态*/
/* start a new check operation if there are < 2 failures */
/*如果没有坏盘或者坏盘在该stripe中包含的是Q,则执行XOR校验*/
if (s->failed == r6s->q_failed) { /*no failed disk , or the only failed disk is Q*/
/* The only possible failed device holds Q, so it
* makes sense to check P (If anything else were failed,
* we would have used P to recreate it).
*/
sh->check_state = check_state_run; /*XOR校验,即进行P校验*/
}
/*如果没有坏盘或是坏盘在该stripe中包含的不是Q*/
if (!r6s->q_failed && s->failed < 2) { /*no failed disk, or the only failed disk is not Q*/
/* Q is not failed, and we didn't use it to generate
* anything, so it makes sense to check it
*/
if (sh->check_state == check_state_run) /*no failed disk*/
sh->check_state = check_state_run_pq; /*没有坏盘,则进行P和Q的双重校验*/
else /*the only failed disk is not Q*/
sh->check_state = check_state_run_q; /*坏盘在该stripe中包含的不是Q,则只进行Q校验,通过Q校验来修复坏盘数据*/
}
/* discard potentially stale zero_sum_result */
sh->ops.zero_sum_result = 0;
/*如果坏盘在该stripe中包含的是Q,则清除P校验盘的更新状态*/
if (sh->check_state == check_state_run) {
/* async_xor_zero_sum destroys the contents of P */
clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
s->uptodate--;
}
/*只要进行校验,则设置请求为检查校验*/
if (sh->check_state >= check_state_run &&
sh->check_state <= check_state_run_pq) {
/* async_syndrome_zero_sum preserves P and Q, so
* no need to mark them !uptodate here
*/
set_bit(STRIPE_OP_CHECK, &s->ops_request);
break;
}
/* we have 2-disk failure */
BUG_ON(s->failed != 2);
/* fall through */
case check_state_compute_result: /*计算完成状态*/
sh->check_state = check_state_idle;
/* check that a write has not made the stripe insync */
if (test_bit(STRIPE_INSYNC, &sh->state))
break;
/* now write out any block on a failed drive,
* or P or Q if they were recomputed
*/
BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
if (s->failed == 2) { /*如果有两盘失效,则设置失效盘1为Wantwrite*/
dev = &sh->dev[r6s->failed_num[1]];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
}
if (s->failed >= 1) { /*如果失效盘大于1,则设置失效盘0为Wantwrite,如果两盘失效,上面已经设置了另一个失效的Wantwrite操作,不需要再次设置,简化代码*/
dev = &sh->dev[r6s->failed_num[0]];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
}
if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { /*如果失效盘为P,则设置P盘为Wantwrite*/
dev = &sh->dev[pd_idx];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
}
if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { /*如果失效盘为Q,则设置Q盘为Wantwrite*/
dev = &sh->dev[qd_idx];
s->locked++;
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
}
clear_bit(STRIPE_DEGRADED, &sh->state); /*清除降级标志*/
set_bit(STRIPE_INSYNC, &sh->state); /*设置该stripe为同步状态*/
break;
case check_state_run:
case check_state_run_q:
case check_state_run_pq: /*正在检查状态*/
break; /* we will be called again upon completion */
case check_state_check_result: /*检查完成状态*/
sh->check_state = check_state_idle;
/* handle a successful check operation, if parity is correct
* we are done. Otherwise update the mismatch count and repair
* parity if !MD_RECOVERY_CHECK
*/
if (sh->ops.zero_sum_result == 0) { /*如果检查状态为0,说明数据磁盘数据是正确的*/
/* both parities are correct */
if (!s->failed) /*在没有坏盘的情况下,则设置该stripe为同步状态*/
set_bit(STRIPE_INSYNC, &sh->state);
else { /*在有坏盘的情况下,则有可能是别的读写操作已经同步了该stripe的数据,因此,设置为计算完成,将数据写入到磁盘中*/
/* in contrast to the raid5 case we can validate
* parity, but still have a failure to write
* back
*/
sh->check_state = check_state_compute_result;
/* Returning at this point means that we may go
* off and bring p and/or q uptodate again so
* we make sure to check zero_sum_result again
* to verify if p or q need writeback
*/
}
} else { /*如果检查状态不为0,说明数据磁盘数据是有错误,需要通过计算来获取P和Q,从而同步该stripe*/
conf->mddev->resync_mismatches += STRIPE_SECTORS;
if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) /*如果正在做修复,则不需要修复*/
/* don't try to repair!! */
set_bit(STRIPE_INSYNC, &sh->state);
else { /*否则通过计算来同步整个stripe*/
int *target = &sh->ops.target;
sh->ops.target = -1;
sh->ops.target2 = -1;
sh->check_state = check_state_compute_run;
set_bit(STRIPE_COMPUTE_RUN, &sh->state);
set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { /*compute P*/
set_bit(R5_Wantcompute,
&sh->dev[pd_idx].flags);
*target = pd_idx;
target = &sh->ops.target2;
s->uptodate++;
}
if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { /*compute Q,如果P和Q都失效,由于上面设置了计算P,因此不需要再次设置,简化代码*/
set_bit(R5_Wantcompute,
&sh->dev[qd_idx].flags);
*target = qd_idx;
s->uptodate++;
}
}
}
break;
case check_state_compute_run: /*正在计算状态*/
break;
default:
printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
__func__, sh->check_state,
(unsigned long long) sh->sector);
BUG();
}
}
注意:
起始状态下:1)如果无坏盘,则进行P和Q的双重校验;
2)如果坏盘为Q,则进行P校验;
3)如果坏盘为P,则进行Q校验;
2、读取或是通过计算来获取该stripe中成员磁盘上的数据
/* fetch_block6 - checks the given member device to see if its data needs
* to be read or computed to satisfy a request.
*
* Returns 1 when no more member devices need to be checked, otherwise returns
* 0 to tell the lo