vrrp 线程会不断调用 vrrp_dispatcher_read() 获取 vrrp 通告:
1、调用 recvmsg(),获取 vrrp 通告;
2、获取 vrrp 头;
3、根据 vrrp 头中的 VRID,判断本端 vrrp 与 对端 vrrp 是否处于同一个虚拟路由;
4、若两端不处于同一个虚拟路由下,则丢弃该通告;
5、根据本端 vrrp 当前状态(master/back),对通告进行分析处理。若当前状态是 ,则需判断是否切换到 back。
/* Our read packet dispatcher */
static int
vrrp_read_dispatcher_thread(thread_ref_t thread)
{
sock_t *sock;
int fd;
/* Fetch thread arg */
sock = THREAD_ARG(thread);
/* Dispatcher state handler */
if (thread->type == THREAD_READ_TIMEOUT || sock->fd_in == -1)
fd = vrrp_dispatcher_read_timeout(sock);
else
fd = vrrp_dispatcher_read(sock);/* 接收 vrrp 通告 */
/* register next dispatcher thread */
if (fd != -1)
/* 调用 epoll_ctl 将套接字 sock->fd_in 注册到 master->epoll_fd */
sock->thread = thread_add_read_sands(thread->master, vrrp_read_dispatcher_thread,
sock, fd, vrrp_compute_timer(sock), false);
return 0;
}
/* Handle dispatcher read packet */
static int
vrrp_dispatcher_read(sock_t *sock)
{
vrrp_t *vrrp;
const vrrphdr_t *hd;
ssize_t len = 0;
int prev_state = 0;
struct sockaddr_storage src_addr = { .ss_family = AF_UNSPEC };
vrrp_t vrrp_lookup;
#ifdef _NETWORK_TIMESTAMP_
char control_buf[128];
#else
char control_buf[64];
#endif
struct iovec iovec = { .iov_base = vrrp_buffer, .iov_len = vrrp_buffer_len };
struct msghdr msghdr = { .msg_name = &src_addr, .msg_namelen = sizeof(src_addr),
.msg_iov = &iovec, .msg_iovlen = 1,
.msg_control = control_buf, .msg_controllen = sizeof(control_buf) };
struct cmsghdr *cmsg;
bool expected_cmsg;
unsigned eintr_count;
unsigned long rx_vrid_map[BIT_WORD(256 + BIT_PER_LONG - 1)] = { 0 };
bool terminate_receiving = false;
#ifdef DEBUG_RECVMSG
unsigned recv_data_count = 0;
#endif
/* Strategy here is to handle incoming adverts pending into socket recvq
* but stop if receive 2nd advert for a VRID on socket (this applies to
* both configured and unconfigured VRIDs).
* Seems a good tradeoff while simulating */
while (!terminate_receiving) {
/* read & affect received buffer */
eintr_count = 0;
/*
MSG_TRUNC:若用户的缓冲区大小不足以完全复制缓冲区里的数据,则将数据截掉,仅复制用户缓冲区大小的数据,其他的数据将丢弃。
MSG_CTRUNC:若缓冲区空间不足,则一些控制数据已被丢弃。
*/
while ((len = recvmsg(sock->fd_in, &msghdr, MSG_TRUNC | MSG_CTRUNC)) == -1 &&
check_EINTR(errno) && eintr_count++ < 10);
/* 读取数据出错,errno表明错误原因 */
if (len < 0) {
#ifdef DEBUG_RECVMSG
if (check_EINTR(errno))
log_message(LOG_INFO, "recvmsg(%d) looped %u times due to EINTR before terminating loop"
, sock->fd_in, eintr_count);
#endif
/*
#define check_EAGAIN(xx) ((xx) == EAGAIN || (xx) == EWOULDBLOCK)
EAGAIN:提示你的应用程序现在没有数据可读请稍后再试。
EWOULDBLOCK:进行异步socket操作时,比如connect、recv、send等操作时,由于异步操作当时没有完成,
而返回的一种状态。此状态并不是表示当前操作失败,而是表示该操作正处于进行中。
*/
if (!check_EAGAIN(errno))
log_message(LOG_INFO, "recvmsg(%d) returned %d (%m)"
, sock->fd_in, errno);
#ifdef DEBUG_RECVMSG
else if (recv_data_count == 0)
log_message(LOG_INFO, "recvmsg(%d) returned EAGAIN without any data being received"
, sock->fd_in);
if (recv_data_count != 1)
log_message(LOG_INFO, "recvmsg(%d) loop received %u packets"
, sock->fd_in, recv_data_count);
#endif
break;
}
#ifdef DEBUG_RECVMSG
if (eintr_count)
log_message(LOG_INFO, "recvmsg(%d) looped %u times due to EINTR before returning %ld"
, sock->fd_in, eintr_count, len);
#endif
/* 未读取到数据 */
if (len == 0) {
log_message(LOG_INFO, "recvmsg(%d) returned data length 0", sock->fd_in);
continue;
}
/* 读取到数据 */
#ifdef DEBUG_RECVMSG
recv_data_count++;
#endif
/* 数据被截断,忽略收到的数据 */
if (msghdr.msg_flags & MSG_TRUNC) {
log_message(LOG_INFO, "recvmsg(%d) message truncated from %zd to %zu bytes"
, sock->fd_in, len, vrrp_buffer_len);
continue;
}
/* 控制数据被截断 */
if (msghdr.msg_flags & MSG_CTRUNC) {
log_message(LOG_INFO, "recvmsg(%d), control message truncated from %zu to %" PRI_MSG_CONTROLLEN " bytes"
, sock->fd_in, sizeof(control_buf), msghdr.msg_controllen);
msghdr.msg_controllen = 0;
}
/* 获取 VRRP header */
if (!(hd = vrrp_get_header(sock->family, vrrp_buffer, len)))
break;
/* Defense strategy here is to handle no more than one advert
* per VRID in order to flush socket rcvq...
* This is a best effort mitigation */
if (__test_and_set_bit(hd->vrid, rx_vrid_map))
terminate_receiving = true;
/* 虚拟路由器的标识。同一虚拟路由器内的路由器有着相同的VRID。*/
vrrp_lookup.vrid = hd->vrid;
vrrp = rb_search(&sock->rb_vrid, &vrrp_lookup, rb_vrid, vrrp_vrid_cmp);
/* No instance found => ignore the advert */
if (!vrrp) {
if (global_data->log_unknown_vrids)
log_message(LOG_INFO, "Unknown VRID(%d) received on interface(%s). ignoring..."
, hd->vrid, IF_NAME(sock->ifp));
continue;
}
/* vrrp 处于故障或初始化状态时 忽略收到的数据 */
if (vrrp->state == VRRP_STATE_FAULT || vrrp->state == VRRP_STATE_INIT) {
/* We just ignore a message received when we are in fault state or
* not yet fully initialised */
continue;
}
/* Save non packet data */
vrrp->pkt_saddr = src_addr;
vrrp->hop_limit = -1; /* Default to not received */
vrrp->multicast_pkt = false;
/* 遍历附属数据对象 */
for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
expected_cmsg = false;
if (cmsg->cmsg_level == IPPROTO_IPV6) {
expected_cmsg = true;
#ifdef IPV6_RECVHOPLIMIT
if (cmsg->cmsg_type == IPV6_HOPLIMIT &&
cmsg->cmsg_len - sizeof(struct cmsghdr) == sizeof(unsigned int))
vrrp->hop_limit = *(unsigned int *)CMSG_DATA(cmsg);
else
#endif
#ifdef IPV6_RECVPKTINFO
if (cmsg->cmsg_type == IPV6_PKTINFO &&
cmsg->cmsg_len - sizeof(struct cmsghdr) == sizeof(struct in6_pktinfo))
vrrp->multicast_pkt = IN6_IS_ADDR_MULTICAST(&((struct in6_pktinfo *)CMSG_DATA(cmsg))->ipi6_addr);
else
#endif
expected_cmsg = false;
}
#ifdef _NETWORK_TIMESTAMP_
else if (do_network_timestamp && cmsg->cmsg_level == SOL_SOCKET) {
struct timespec *ts = (void *)CMSG_DATA(cmsg);
char time_buf[9];
expected_cmsg = true;
if (cmsg->cmsg_type == SO_TIMESTAMPNS) {
strftime(time_buf, sizeof time_buf, "%T", localtime(&ts->tv_sec));
log_message(LOG_INFO, "TIMESTAMPNS (socket %d - VRID %u) %s.%9.9ld"
, sock->fd_in, hd->vrid, time_buf, ts->tv_nsec);
}
#if 0
if (cmsg->cmsg_type == SO_TIMESTAMP) {
struct timeval *tv = (void *)CMSG_DATA(cmsg);
log_message(LOG_INFO, "TIMESTAMP message (%d - %u) %ld.%9.9ld"
, sock->fd_in, hd->vrid, tv->tv_sec, tv->tv_usec);
}
else if (cmsg->cmsg_type == SO_TIMESTAMPING) {
struct timespec *ts = (void *)CMSG_DATA(cmsg);
log_message(LOG_INFO, "TIMESTAMPING message (%d - %u) %ld.%9.9ld, raw %ld.%9.9ld"
, sock->fd_in, hd->vrid, ts->tv_sec, ts->tv_nsec, (ts+2)->tv_sec, (ts+2)->tv_nsec);
}
#endif
else
expected_cmsg = false;
}
#endif
if (!expected_cmsg)
log_message(LOG_INFO, "fd %d, unexpected control msg len %" PRI_MSG_CONTROLLEN ", level %d, type %d"
, sock->fd_in, cmsg->cmsg_len
, cmsg->cmsg_level, cmsg->cmsg_type);
}
prev_state = vrrp->state;
if (vrrp->state == VRRP_STATE_BACK)
/* 若本端 vrrp 为 back */
vrrp_state_backup(vrrp, hd, vrrp_buffer, len);
else if (vrrp->state == VRRP_STATE_MAST) {
/* 若本端 vrrp 为 master,则分析 vrrp 包,判断是否需要切换到 back */
if (vrrp_state_master_rx(vrrp, hd, vrrp_buffer, len))
vrrp_state_leave_master(vrrp, false);
} else
log_message(LOG_INFO, "(%s) In dispatcher_read with state %d"
, vrrp->iname, vrrp->state);
/* handle instance synchronization */
#ifdef _TSM_DEBUG_
if (do_tsm_debug)
log_message(LOG_INFO, "Read [%s] TSM transition : [%d,%d] Wantstate = [%d]"
, vrrp->iname, prev_state, vrrp->state, vrrp->wantstate);
#endif
VRRP_TSM_HANDLE(prev_state, vrrp);
/* If we have sent an advert, reset the timer */
if (vrrp->state != VRRP_STATE_MAST || !vrrp->lower_prio_no_advert)
vrrp_init_instance_sands(vrrp);
}
return sock->fd_in;
}