keepalived源码解析 —— vrrp_dispatcher_init()

/*
 * We create & allocate a socket pool here. The soft design
 * can be sum up by the following sketch :
 *
 *    fd1  fd2    fd3  fd4          fdi  fdi+1
 * -----\__/--------\__/---........---\__/---
 *    | ETH0 |    | ETH1 |          | ETHn |
 *    +------+    +------+          +------+
 *
 * TODO TODO - this description is way out of date
 * Here we have n physical NIC. Each NIC own a maximum of 2 fds.
 * (one for VRRP the other for IPSEC_AH). All our VRRP instances
 * are multiplexed through this fds. So our design can handle 2*n
 * multiplexing points.
 */
int
vrrp_dispatcher_init(__attribute__((unused)) thread_ref_t thread)
{
	/* 遍历 vrrp_data->vrrp,创建 vrrp socket(自定义类型 sock_t),并添加至双向链表 vrrp_data->vrrp_socket_pool 中 */
	vrrp_create_sockpool(vrrp_data->vrrp_socket_pool);

	/* 遍历 vrrp_data->vrrp_socket_pool,创建 VRRP 接收与接收 socket */
	vrrp_open_sockpool(vrrp_data->vrrp_socket_pool);

	/* set VRRP instance fds to sockpool */
	vrrp_set_fds(vrrp_data->vrrp_socket_pool);

	/* create the VRRP socket pool list */
	/* register read dispatcher worker thread */
	vrrp_register_workers(vrrp_data->vrrp_socket_pool);

	/* Dump socket pool */
	if (__test_bit(LOG_DETAIL_BIT, &debug))
		dump_list(NULL, vrrp_data->vrrp_socket_pool);

	vrrp_initialised = true;

	return 1;
}
static void
vrrp_create_sockpool(list l)
{
	vrrp_t *vrrp;
	element e;
	interface_t *ifp;
	int proto;
	bool unicast;
	sock_t *sock;

	LIST_FOREACH(vrrp_data->vrrp, vrrp, e) {
		ifp =
#ifdef _HAVE_VRRP_VMAC_
			  (__test_bit(VRRP_VMAC_XMITBASE_BIT, &vrrp->vmac_flags)) ? vrrp->ifp->base_ifp :
#endif
										    vrrp->ifp;
		unicast = !LIST_ISEMPTY(vrrp->unicast_peer);
		proto = IPPROTO_VRRP;
#if defined _WITH_VRRP_AUTH_
		if (vrrp->auth_type == VRRP_AUTH_AH)
			proto = IPPROTO_AH;
#endif

		/* add the vrrp element if not exist */
		if (!(sock = already_exist_sock(l, vrrp->family, proto, ifp, unicast)))
			sock = alloc_sock(vrrp->family, l, proto, ifp, unicast);

		/* Add the vrrp_t indexed by vrid to the socket */
		rb_insert_sort(&sock->rb_vrid, vrrp, rb_vrid, vrrp_vrid_cmp);

		if (vrrp->kernel_rx_buf_size)
			sock->rx_buf_size += vrrp->kernel_rx_buf_size;
		else if (global_data->vrrp_rx_bufs_policy & RX_BUFS_SIZE)
			sock->rx_buf_size += global_data->vrrp_rx_bufs_size;
		else if (global_data->vrrp_rx_bufs_policy & RX_BUFS_POLICY_ADVERT)
			sock->rx_buf_size += global_data->vrrp_rx_bufs_multiples * vrrp_adv_len(vrrp);
		else if (global_data->vrrp_rx_bufs_policy & RX_BUFS_POLICY_MTU)
			sock->rx_buf_size += global_data->vrrp_rx_bufs_multiples * vrrp->ifp->mtu;
	}
}
static void
vrrp_open_sockpool(list l)
{
	sock_t *sock;
	element e;

	LIST_FOREACH(l, sock, e) {
		if (!sock->ifp->ifindex) {
			sock->fd_in = sock->fd_out = -1;
			continue;
		}
		/*
		1、创建 VRRP 接收 socket(SOCK_RAW 类型)
		2、将其加入 IPV4(224.0.0.18)或 IPV6(ff02::12) vrrp 组播
		3、将socket 绑定要特定的 device,如 “eth0”
		*/
		sock->fd_in = open_vrrp_read_socket(sock->family, sock->proto,
					       sock->ifp, sock->unicast, sock->rx_buf_size);
		if (sock->fd_in == -1)
			sock->fd_out = -1;
		else
			/*
			1、创建 VRRP 发送 socket(SOCK_RAW 类型),关闭接收缓冲区,
			2、设置 IP_HDRINCL选项(自定义IP数据包首部)
			3、绑定到特定 device(如:“eth0”)
			*/
			sock->fd_out = open_vrrp_send_socket(sock->family, sock->proto,
							     sock->ifp, sock->unicast);
	}
}


/* open a VRRP socket and join the multicast group. */
int
open_vrrp_read_socket(sa_family_t family, int proto, interface_t *ifp, bool unicast, int rx_buf_size)
{
	int fd = -1;
	int val = rx_buf_size;
	socklen_t len = sizeof(val);
	int on = 1;

	/* 创建socket */
	fd = socket(family, SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, proto);
	if (fd < 0) {
		int err = errno;
		log_message(LOG_INFO, "cant open raw socket. errno=%d", err);
		return -1;
	}
#if !HAVE_DECL_SOCK_CLOEXEC
	set_sock_flags(fd, F_SETFD, FD_CLOEXEC);
#endif
#if !HAVE_DECL_SOCK_NONBLOCK
	set_sock_flags(fd, F_SETFL, O_NONBLOCK);
#endif
	
	/*
	int setsockopt(int sockfd, int level, int optname,const void *optval, socklen_t optlen):
	sockfd:套接字
	evel:选项定义的层次,支持 SOL_SOCKET(在套接字级别上设置选项)、IPPROTO_TCP、IPPROTO_IP 和 IPPROTO_IPV6
	optname:需设置的选项
	optval:指向存放选项待设置的新值的缓冲区
	optlen:optval缓冲区长度
	*/
	/* 设置接收缓冲区的大小 */
	if (rx_buf_size) {
		if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, len))
			log_message(LOG_INFO, "vrrp set receive socket buffer size error %d", errno);
	}

#if HAVE_DECL_IP_MULTICAST_ALL  /* Since Linux 2.6.31 */
	/* ipv4 不接收组播包 */
	if (family == AF_INET)
		if_setsockopt_mcast_all(family, &fd);
#endif

	if (!unicast) {
		/* 将 fd 加入 IPV4(224.0.0.18)或 IPV6(ff02::12)组播 */
		if_join_vrrp_group(family, &fd, ifp);

#ifdef IPV6_RECVHOPLIMIT	/* Since Linux 2.6.14 */
		/* IPv6 任何接收到的跳限字段都将由 recvmsg 作为辅助数据返回 */
		if (family == AF_INET6) {
			if (setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &on, sizeof on))
				log_message(LOG_INFO, "fd %d - set IPV6_RECVHOPLIMIT error %d (%m)", fd, errno);
		}
#endif
	}

#ifdef IPV6_RECVPKTINFO		/* Since Linux 2.6.14 */
	/* Receive the destination address as ancillary data to determine if packet multicast */
	if (family == AF_INET6) {
		if (setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &on, sizeof on))
			log_message(LOG_INFO, "fd %d - set IPV6_RECVPKTINFO error %d (%m)", fd, errno);
	}
#endif

#ifdef _NETWORK_TIMESTAMP_
	if (do_network_timestamp) {
#if 0
		int flags   = SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RX_SOFTWARE ;
		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &flags, sizeof(flags)) < 0)
			log_message(LOG_INFO, "ERROR: setsockopt %d SO_TIMESTAMPING", fd);
		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP, &on, sizeof(on)) < 0)
			log_message(LOG_INFO, "ERROR: setsockopt %d SO_TIMESTAMP", fd);
#endif
		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS, &on, sizeof(on)) < 0)	// This overrides SO_TIMESTAMP
			log_message(LOG_INFO, "ERROR: setsockopt %d SO_TIMESTAMPNS", fd);
	}
#endif

	/* Need to bind read socket so only process packets for interface we're
	 * interested in.
	 *
	 * This is applicable for both unicast and multicast operation as well as
	 * IPv4 and IPv6.
	 */
	 
	/* Bind this socket to a particular device like “eth0”, as specified in the passed interface name. */
	if_setsockopt_bindtodevice(&fd, ifp);

	if (fd < 0)
		return -1;

	if (family == AF_INET6) {
		/* 指定用户数据中检验和所处位置的偏移 */
		if_setsockopt_ipv6_checksum(&fd);
	}

	return fd;
}


/* open a VRRP sending socket */
int
open_vrrp_send_socket(sa_family_t family, int proto, interface_t *ifp, bool unicast)
{
	int fd = -1;
	int val = 0;
	socklen_t len = sizeof(val);

	if (family != AF_INET && family != AF_INET6) {
		log_message(LOG_INFO, "cant open raw socket. unknown family=%d"
				    , family);
		return -1;
	}

	/* Create and init socket descriptor */
	fd = socket(family, SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, proto);
	if (fd < 0) {
		log_message(LOG_INFO, "cant open raw socket. errno=%d", errno);
		return -1;
	}
#if !HAVE_DECL_SOCK_CLOEXEC
	set_sock_flags(fd, F_SETFD, FD_CLOEXEC);
#endif
#if !HAVE_DECL_SOCK_NONBLOCK
	set_sock_flags(fd, F_SETFL, O_NONBLOCK);
#endif

	/* We are not receiving on the send socket, there is no
	 * point allocating any buffers to it */
	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, len))
		log_message(LOG_INFO, "vrrp set send socket buffer size error %d", errno);

	if (family == AF_INET) {
		/* Set v4 related */

		/* It doesn't really matter if IP_MULTICAST_ALL is not supported
		 * since we set a BPF filter to stop any packet being received
		 * on the send socket */
#if HAVE_DECL_IP_MULTICAST_ALL  /* Since Linux 2.6.31 */
		/* ipv4 不接收组播包 */
		if_setsockopt_mcast_all(family, &fd);
#endif
		/* IP_HDRINCL:自定义IP数据包首部。不设置这个选项时,IP协议自动填充IP数据包的首部。 */
		if_setsockopt_hdrincl(&fd);
		if (unicast)
			/* Bind this socket to a particular device like “eth0”, as specified in the passed interface name. */
			if_setsockopt_bindtodevice(&fd, ifp);
	} else if (family == AF_INET6) {
		/* Set v6 related */
		/* 指定用户数据中检验和所处位置的偏移 */
		if_setsockopt_ipv6_checksum(&fd);
		if (!unicast)
			if_setsockopt_mcast_hops(family, &fd);
	}

	if (!unicast) {
		/* 设置组播的默认默认网络接口,会从给定的网络接口发送 */
		if_setsockopt_mcast_if(family, &fd, ifp);
		/* 禁止将数据回送到本地回环接口 */
		if_setsockopt_mcast_loop(family, &fd);
	}
	
	/* 设置服务的优先级 */
	if_setsockopt_priority(&fd, family);

	if_setsockopt_no_receive(&fd);

	if (fd < 0)
		return -1;

	return fd;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值