keepalived源码解析 —— vrrp_dispatcher_read()

vrrp 线程会不断调用 vrrp_dispatcher_read() 获取 vrrp 通告:
1、调用 recvmsg(),获取 vrrp 通告;
2、获取 vrrp 头;
3、根据 vrrp 头中的 VRID,判断本端 vrrp 与 对端 vrrp 是否处于同一个虚拟路由;
4、若两端不处于同一个虚拟路由下,则丢弃该通告;
5、根据本端 vrrp 当前状态(master/back),对通告进行分析处理。若当前状态是 ,则需判断是否切换到 back。

/* Our read packet dispatcher */
static int
vrrp_read_dispatcher_thread(thread_ref_t thread)
{
	sock_t *sock;
	int fd;

	/* Fetch thread arg */
	sock = THREAD_ARG(thread);

	/* Dispatcher state handler */
	if (thread->type == THREAD_READ_TIMEOUT || sock->fd_in == -1)
		fd = vrrp_dispatcher_read_timeout(sock);
	else
		fd = vrrp_dispatcher_read(sock);/* 接收 vrrp 通告 */

	/* register next dispatcher thread */
	if (fd != -1)
		/* 调用 epoll_ctl 将套接字 sock->fd_in 注册到 master->epoll_fd */
		sock->thread = thread_add_read_sands(thread->master, vrrp_read_dispatcher_thread,
						     sock, fd, vrrp_compute_timer(sock), false);

	return 0;
}
/* Handle dispatcher read packet */
static int
vrrp_dispatcher_read(sock_t *sock)
{
	vrrp_t *vrrp;
	const vrrphdr_t *hd;
	ssize_t len = 0;
	int prev_state = 0;
	struct sockaddr_storage src_addr = { .ss_family = AF_UNSPEC };
	vrrp_t vrrp_lookup;
#ifdef _NETWORK_TIMESTAMP_
	char control_buf[128];
#else
	char control_buf[64];
#endif
	struct iovec iovec = { .iov_base = vrrp_buffer, .iov_len = vrrp_buffer_len };
	struct msghdr msghdr = { .msg_name = &src_addr, .msg_namelen = sizeof(src_addr),
				 .msg_iov = &iovec, .msg_iovlen = 1,
				 .msg_control = control_buf, .msg_controllen = sizeof(control_buf) };
	struct cmsghdr *cmsg;
	bool expected_cmsg;
	unsigned eintr_count;
	unsigned long rx_vrid_map[BIT_WORD(256 + BIT_PER_LONG - 1)] = { 0 };
	bool terminate_receiving = false;
#ifdef DEBUG_RECVMSG
	unsigned recv_data_count = 0;
#endif

	/* Strategy here is to handle incoming adverts pending into socket recvq
	 * but stop if receive 2nd advert for a VRID on socket (this applies to
	 * both configured and unconfigured VRIDs).
	 * Seems a good tradeoff while simulating */
	while (!terminate_receiving) {
		/* read & affect received buffer */
		eintr_count = 0;
		/*
		 MSG_TRUNC:若用户的缓冲区大小不足以完全复制缓冲区里的数据,则将数据截掉,仅复制用户缓冲区大小的数据,其他的数据将丢弃。
		 MSG_CTRUNC:若缓冲区空间不足,则一些控制数据已被丢弃。
		*/
		while ((len = recvmsg(sock->fd_in, &msghdr, MSG_TRUNC | MSG_CTRUNC)) == -1 &&
		       check_EINTR(errno) && eintr_count++ < 10);
		
		/* 读取数据出错,errno表明错误原因 */
		if (len < 0) {
#ifdef DEBUG_RECVMSG
			if (check_EINTR(errno))
				log_message(LOG_INFO, "recvmsg(%d) looped %u times due to EINTR before terminating loop"
						    , sock->fd_in, eintr_count);
#endif
			/*
			#define check_EAGAIN(xx)      ((xx) == EAGAIN || (xx) == EWOULDBLOCK)
			EAGAIN:提示你的应用程序现在没有数据可读请稍后再试。
			EWOULDBLOCK:进行异步socket操作时,比如connect、recv、send等操作时,由于异步操作当时没有完成,
			而返回的一种状态。此状态并不是表示当前操作失败,而是表示该操作正处于进行中。
			*/
			if (!check_EAGAIN(errno))
				log_message(LOG_INFO, "recvmsg(%d) returned %d (%m)"
						    , sock->fd_in, errno);
#ifdef DEBUG_RECVMSG
			else if (recv_data_count == 0)
				log_message(LOG_INFO, "recvmsg(%d) returned EAGAIN without any data being received"
						    , sock->fd_in);

			if (recv_data_count != 1)
				log_message(LOG_INFO, "recvmsg(%d) loop received %u packets"
						    , sock->fd_in, recv_data_count);
#endif
			break;
		}

#ifdef DEBUG_RECVMSG
		if (eintr_count)
			log_message(LOG_INFO, "recvmsg(%d) looped %u times due to EINTR before returning %ld"
					    , sock->fd_in, eintr_count, len);
#endif

		/* 未读取到数据 */
		if (len == 0) {
			log_message(LOG_INFO, "recvmsg(%d) returned data length 0", sock->fd_in);
			continue;
		}
		
		/* 读取到数据 */
#ifdef DEBUG_RECVMSG
		recv_data_count++;
#endif
		
		/* 数据被截断,忽略收到的数据 */
		if (msghdr.msg_flags & MSG_TRUNC) {
			log_message(LOG_INFO, "recvmsg(%d) message truncated from %zd to %zu bytes"
					    , sock->fd_in, len, vrrp_buffer_len);
			continue;
		}
		
		/* 控制数据被截断 */
		if (msghdr.msg_flags & MSG_CTRUNC) {
			log_message(LOG_INFO, "recvmsg(%d), control message truncated from %zu to %" PRI_MSG_CONTROLLEN " bytes"
					    , sock->fd_in, sizeof(control_buf), msghdr.msg_controllen);
			msghdr.msg_controllen = 0;
		}

		/* 获取 VRRP header */
		if (!(hd = vrrp_get_header(sock->family, vrrp_buffer, len)))
			break;

		/* Defense strategy here is to handle no more than one advert
		 * per VRID in order to flush socket rcvq...
		 * This is a best effort mitigation */
		if (__test_and_set_bit(hd->vrid, rx_vrid_map))
			terminate_receiving = true;
		
		/* 虚拟路由器的标识。同一虚拟路由器内的路由器有着相同的VRID。*/
		vrrp_lookup.vrid = hd->vrid;
		vrrp = rb_search(&sock->rb_vrid, &vrrp_lookup, rb_vrid, vrrp_vrid_cmp);

		/* No instance found => ignore the advert */
		if (!vrrp) {
			if (global_data->log_unknown_vrids)
				log_message(LOG_INFO, "Unknown VRID(%d) received on interface(%s). ignoring..."
						    , hd->vrid, IF_NAME(sock->ifp));
			continue;
		}
		
		/* vrrp 处于故障或初始化状态时 忽略收到的数据 */
		if (vrrp->state == VRRP_STATE_FAULT || vrrp->state == VRRP_STATE_INIT) {
			/* We just ignore a message received when we are in fault state or
			 * not yet fully initialised */
			continue;
		}

		/* Save non packet data */
		vrrp->pkt_saddr = src_addr;
		vrrp->hop_limit = -1;           /* Default to not received */
		vrrp->multicast_pkt = false;
		
		/* 遍历附属数据对象 */
		for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
			expected_cmsg = false;
			if (cmsg->cmsg_level == IPPROTO_IPV6) {
				expected_cmsg = true;

#ifdef IPV6_RECVHOPLIMIT
				if (cmsg->cmsg_type == IPV6_HOPLIMIT &&
				    cmsg->cmsg_len - sizeof(struct cmsghdr) == sizeof(unsigned int))
					vrrp->hop_limit = *(unsigned int *)CMSG_DATA(cmsg);
				else
#endif
#ifdef IPV6_RECVPKTINFO
				if (cmsg->cmsg_type == IPV6_PKTINFO &&
				    cmsg->cmsg_len - sizeof(struct cmsghdr) == sizeof(struct in6_pktinfo))
					vrrp->multicast_pkt = IN6_IS_ADDR_MULTICAST(&((struct in6_pktinfo *)CMSG_DATA(cmsg))->ipi6_addr);
				else
#endif
					expected_cmsg = false;
			}
#ifdef _NETWORK_TIMESTAMP_
			else if (do_network_timestamp && cmsg->cmsg_level == SOL_SOCKET) {
				struct timespec *ts = (void *)CMSG_DATA(cmsg);
				char time_buf[9];

				expected_cmsg = true;
				if (cmsg->cmsg_type == SO_TIMESTAMPNS) {
					strftime(time_buf, sizeof time_buf, "%T", localtime(&ts->tv_sec));
					log_message(LOG_INFO, "TIMESTAMPNS (socket %d - VRID %u) %s.%9.9ld"
							    , sock->fd_in, hd->vrid, time_buf, ts->tv_nsec);
				}
#if 0
				if (cmsg->cmsg_type == SO_TIMESTAMP) {
					struct timeval *tv = (void *)CMSG_DATA(cmsg);
					log_message(LOG_INFO, "TIMESTAMP message (%d - %u)  %ld.%9.9ld"
							    , sock->fd_in, hd->vrid, tv->tv_sec, tv->tv_usec);
				}
				else if (cmsg->cmsg_type == SO_TIMESTAMPING) {
					struct timespec *ts = (void *)CMSG_DATA(cmsg);
					log_message(LOG_INFO, "TIMESTAMPING message (%d - %u)  %ld.%9.9ld, raw %ld.%9.9ld"
							    , sock->fd_in, hd->vrid, ts->tv_sec, ts->tv_nsec, (ts+2)->tv_sec, (ts+2)->tv_nsec);
				}
#endif
				else
					expected_cmsg = false;
			}
#endif

			if (!expected_cmsg)
				log_message(LOG_INFO, "fd %d, unexpected control msg len %" PRI_MSG_CONTROLLEN ", level %d, type %d"
						    , sock->fd_in, cmsg->cmsg_len
						    , cmsg->cmsg_level, cmsg->cmsg_type);
		}

		prev_state = vrrp->state;

		if (vrrp->state == VRRP_STATE_BACK)
			/* 若本端 vrrp 为 back */
			vrrp_state_backup(vrrp, hd, vrrp_buffer, len);
		else if (vrrp->state == VRRP_STATE_MAST) {
			/* 若本端 vrrp 为 master,则分析 vrrp 包,判断是否需要切换到 back */
			if (vrrp_state_master_rx(vrrp, hd, vrrp_buffer, len))
				vrrp_state_leave_master(vrrp, false);
		} else
			log_message(LOG_INFO, "(%s) In dispatcher_read with state %d"
					    , vrrp->iname, vrrp->state);


		/* handle instance synchronization */
#ifdef _TSM_DEBUG_
		if (do_tsm_debug)
			log_message(LOG_INFO, "Read [%s] TSM transition : [%d,%d] Wantstate = [%d]"
					    , vrrp->iname, prev_state, vrrp->state, vrrp->wantstate);
#endif
		VRRP_TSM_HANDLE(prev_state, vrrp);

		/* If we have sent an advert, reset the timer */
		if (vrrp->state != VRRP_STATE_MAST || !vrrp->lower_prio_no_advert)
			vrrp_init_instance_sands(vrrp);
	}

	return sock->fd_in;
}
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值