概述
在monitor节点中,存在着Leader和Peon两种角色。在monitor采用了一种lease机制,保证了副本在一定时间内可读写。同时lease机制也保证了整个集群中的monitor当前都是可用状态。
Leader节点会向所有的Peon节点定时的发送lease消息,延长各个节点的lease时间,同时收集所有节点的ack消息。只要有一个节点没有回复ack消息。就会重新发起选举。
同理,Peon节点一直在等待Leader向自己发送lease消息。超时之后,也会重新发起选举。
这样就保证了整个monitor集群的可用性。
源码分析
从lease的发起者extend_lease()开始讲
void Paxos::extend_lease()
{
//断言lease是由Leader节点发起
assert(mon->is_leader());
//assert(is_active());
//当前时间+5s作为租期
lease_expire = ceph_clock_now();
lease_expire += g_conf->mon_lease;
//已经收到的lease回复集合清空。将leader节点加入集合
acked_lease.clear();
acked_lease.insert(mon->rank);
dout(7) << "extend_lease now+" << g_conf->mon_lease
<< " (" << lease_expire << ")" << dendl;
// bcast
for (set<int>::const_iterator p = mon->get_quorum().begin();
p != mon->get_quorum().end(); ++p) {
//向quorum中的所有peon节点发送lease消息
if (*p == mon->rank) continue;
MMonPaxos *lease = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_LEASE,
ceph_clock_now());
lease->last_committed = last_committed;
lease->lease_timestamp = lease_expire;
lease->first_committed = first_committed;
mon->messenger->send_message(lease, mon->monmap->get_inst(*p));
}
// set timeout event.
// if old timeout is still in place, leave it.
if (!lease_ack_timeout_event) {
lease_ack_timeout_event = mon->timer.add_event_after(
//2*5=10超时时间为10s
g_conf->mon_lease_ack_timeout_factor * g_conf->mon_lease,
new C_MonContext(mon, [this](int r) {
if (r == -ECANCELED)
return;
//超时后发起选举
lease_ack_timeout();
}));
}
// set renew event
//继续发起下一轮extend_lease
utime_t at = lease_expire;
at -= g_conf->mon_lease;
at += g_conf->mon_lease_renew_interval_factor * g_conf->mon_lease;
lease_renew_event = mon->timer.add_event_at(
at, new C_MonContext(mon, [this](int r) {
if (r == -ECANCELED)
return;
lease_renew_timeout();
}));
}
<