在osd的main函数中有新建三个用于heatbeat 发送和接收的message
C:\Users\Administrator\Desktop\source\ceph-master\ceph-master\src\ceph_osd.cc
Messenger *ms_public = Messenger::create(g_ceph_context, public_msgr_type,
entity_name_t::OSD(whoami), "client",
getpid(),
Messenger::HAS_HEAVY_TRAFFIC |
Messenger::HAS_MANY_CONNECTIONS);
Messenger *ms_hb_back_client = Messenger::create(g_ceph_context, cluster_msgr_type,
entity_name_t::OSD(whoami), "hb_back_client",
getpid(), Messenger::HEARTBEAT);
Messenger *ms_hb_front_client = Messenger::create(g_ceph_context, public_msgr_type,
entity_name_t::OSD(whoami), "hb_front_client",
getpid(), Messenger::HEARTBEAT);
在osd中有定义一个内部类 用于发送heatbeat的message
struct T_Heartbeat : public Thread {
OSD *osd;
explicit T_Heartbeat(OSD *o) : osd(o) {}
void *entry() override {
osd->heartbeat_entry();
return 0;
}
} heartbeat_thread;
这个内部类会单独建立一个thread,这个thread的回调函数会调用osd的heartbeat_entry
void OSD::heartbeat_entry()
{
Mutex::Locker l(heartbeat_lock);
if (is_stopping())
return;
while (!heartbeat_stop) {
#发送消息
heartbeat();
}
void OSD::heartbeat()
{
dout(30) << "heartbeat" << dendl;
vector<int> hb_peers;
for (map<int,HeartbeatInfo>::iterator p = heartbeat_peers.begin();
p != heartbeat_peers.end();
++p)
hb_peers.push_back(p->first);
service.update_osd_stat(hb_peers);
dout(5) << "heartbeat: " << service.get_osd_stat() << dendl;
utime_t now = ceph_clock_now();
// send heartbeats
for (map<int,HeartbeatInfo>::iterator i = heartbeat_peers.begin();
i != heartbeat_peers.end();
++i) {
int peer = i->first;
i->second.last_tx = now;
if (i->second.first_tx == utime_t())
i->second.first_tx = now;
dout(30) << "heartbeat sending ping to osd." << peer << dendl;
#发送back和front 信息,注意这里的发送message类型是MOSDPing::PING
i->second.con_back->send_message(new MOSDPing(monc->get_fsid(),
service.get_osdmap()->get_epoch(),
MOSDPing::PING, now,
cct->_conf->osd_heartbeat_min_size));
if (i->second.con_front)
i->second.con_front->send_message(new MOSDPing(monc->get_fsid(),
service.get_osdmap()->get_epoch(),
MOSDPing::PING, now,
cct->_conf->osd_heartbeat_min_size));
}
}
而对应的接收函数也是OSD中定义的
int OSD::init()
{
hb_front_client_messenger->add_dispatcher_head(&heartbeat_dispatcher);
hb_back_client_messenger->add_dispatcher_head(&heartbeat_dispatcher);
hb_front_server_messenger->add_dispatcher_head(&heartbeat_dispatcher);
hb_back_server_messenger->add_dispatcher_head(&heartbeat_dispatcher);
}
接收也是一个单独的线程其回调函数为
bool OSD::heartbeat_dispatch(Message *m)
{
dout(30) << "heartbeat_dispatch " << m << dendl;
switch (m->get_type()) {
case CEPH_MSG_PING:
dout(10) << "ping from " << m->get_source_inst() << dendl;
m->put();
break;
#和我们前面讲的发送的类型一致
case MSG_OSD_PING:
handle_osd_ping(static_cast<MOSDPing*>(m));
break;
}
void OSD::handle_osd_ping(MOSDPing *m)
{
switch (m->op) {
case MOSDPing::PING:
{
#这里收到消息后发送回复的message,主要的message的类型是MOSDPing::PING_REPLY
Message *r = new MOSDPing(monc->get_fsid(),
curmap->get_epoch(),
MOSDPing::PING_REPLY, m->stamp,
cct->_conf->osd_heartbeat_min_size);
m->get_connection()->send_message(r);
break;
case MOSDPing::PING_REPLY:
{
map<int,HeartbeatInfo>::iterator i = heartbeat_peers.find(from);
if (i != heartbeat_peers.end()) {
if (m->get_connection() == i->second.con_back) {
i->second.last_rx_back = m->stamp;
#收到MOSDPing::PING_REPLY 的处理就是通过m->stamp 来更新时间戳
if (i->second.con_front == NULL)
i->second.last_rx_front = m->stamp;
} else if (m->get_connection() == i->second.con_front) {
i->second.last_rx_front = m->stamp;
}
}
除了发送和接受外,osd还新建了一个time来检查heatbeat是否超时
int OSD::init()
{
Mutex::Locker l(tick_timer_lock);
#首先在osd的init函数中新建一个timer,这个timer的回调函数是C_Tick_WithoutOSDLock,timer的到期时间是cct->_conf->osd_heartbeat_interval
tick_timer_without_osd_lock.add_event_after(cct->_conf->osd_heartbeat_interval, new C_Tick_WithoutOSDLock(this));
}
class OSD::C_Tick_WithoutOSDLock : public Context {
OSD *osd;
public:
explicit C_Tick_WithoutOSDLock(OSD *o) : osd(o) {}
void finish(int r) override {
#这里有调用osd提供的函数
osd->tick_without_osd_lock();
}
};
void OSD::tick_without_osd_lock()
{
// osd_lock is not being held, which means the OSD state
// might change when doing the monitor report
#这里会调用heartbeat_check 来check,前提条件是这个osd是active 或者是healthy的
if (is_active() || is_waiting_for_healthy()) {
heartbeat_lock.Lock();
heartbeat_check();
heartbeat_lock.Unlock();
}
void OSD::heartbeat_check()
{
#检查是否超时
if (p->second.is_unhealthy(cutoff)) {
if (p->second.last_rx_back == utime_t() ||
p->second.last_rx_front == utime_t()) {
#如果超时的话,则放入到failure_queue 这个数组中,上报给moniter
failure_queue[p->first] = p->second.last_tx;
} else {
// fail
failure_queue[p->first] = MIN(p->second.last_rx_back, p->second.last_rx_front);
}
}
}
}
osd的心跳机制
最新推荐文章于 2023-04-26 11:06:54 发布