J版本recover机制
整体逻辑
回馈消息处理
入口函数
bool ReplicatedBackend::handle_message(
OpRequestRef op
)
{
dout(10) << __func__ << ": " << op << dendl;
switch (op->get_req()->get_type()) {
case MSG_OSD_PG_PUSH:
do_push(op);
return true;
case MSG_OSD_PG_PULL:
do_pull(op);
return true;
case MSG_OSD_PG_PUSH_REPLY:
do_push_reply(op);
return true;
case MSG_OSD_SUBOP: {
MOSDSubOp *m = static_cast<MOSDSubOp*>(op->get_req());
if (m->ops.size() >= 1) {
OSDOp *first = &m->ops[0];
switch (first->op.op) {
case CEPH_OSD_OP_PULL:
sub_op_pull(op);
return true;
case CEPH_OSD_OP_PUSH:
sub_op_push(op);
return true;
default:
break;
}
} else {
sub_op_modify(op);
return true;
}
break;
}
case MSG_OSD_REPOP: {
sub_op_modify(op);
return true;
}
case MSG_OSD_SUBOPREPLY: {
MOSDSubOpReply *r = static_cast<MOSDSubOpReply*>(op->get_req());
if (r->ops.size() >= 1) {
OSDOp &first = r->ops[0];
switch (first.op.op) {
case CEPH_OSD_OP_PUSH:
// continue peer recovery
sub_op_push_reply(op);
return true;
}
}
break;
}
case MSG_OSD_REPOPREPLY: {
sub_op_modify_reply(op);
return true;
}
default:
break;
}
return false;
}
故障osd主pg处理逻辑
回调函数逻辑
norecover导致io hang结论
当故障osd拥有主pg时,处理从发过来的pull消息后会加读锁,进行完一些操作后会注册回调函数C_ReplicatedBackend_OnPullComplete来释放锁,而该函数会在recover_tp中被调用,因此导致了新的io到do_op后无法获取锁就会一直阻塞
do_op卡锁函数
else if (!get_rw_locks(write_ordered, ctx)) {
dout(20) << __func__ << " waiting for rw locks " << dendl;
op->mark_delayed("waiting for rw locks");
close_op_ctx(ctx);