/*
* We create & allocate a socket pool here. The soft design
* can be sum up by the following sketch :
*
* fd1 fd2 fd3 fd4 fdi fdi+1
* -----\__/--------\__/---........---\__/---
* | ETH0 | | ETH1 | | ETHn |
* +------+ +------+ +------+
*
* TODO TODO - this description is way out of date
* Here we have n physical NIC. Each NIC own a maximum of 2 fds.
* (one for VRRP the other for IPSEC_AH). All our VRRP instances
* are multiplexed through this fds. So our design can handle 2*n
* multiplexing points.
*/intvrrp_dispatcher_init(__attribute__((unused)) thread_ref_t thread){/* 遍历 vrrp_data->vrrp,创建 vrrp socket(自定义类型 sock_t),并添加至双向链表 vrrp_data->vrrp_socket_pool 中 */vrrp_create_sockpool(vrrp_data->vrrp_socket_pool);/* 遍历 vrrp_data->vrrp_socket_pool,创建 VRRP 接收与接收 socket */vrrp_open_sockpool(vrrp_data->vrrp_socket_pool);/* set VRRP instance fds to sockpool */vrrp_set_fds(vrrp_data->vrrp_socket_pool);/* create the VRRP socket pool list *//* register read dispatcher worker thread */vrrp_register_workers(vrrp_data->vrrp_socket_pool);/* Dump socket pool */if(__test_bit(LOG_DETAIL_BIT,&debug))dump_list(NULL, vrrp_data->vrrp_socket_pool);
vrrp_initialised = true;return1;}
staticvoidvrrp_create_sockpool(list l){
vrrp_t *vrrp;
element e;
interface_t *ifp;int proto;
bool unicast;
sock_t *sock;LIST_FOREACH(vrrp_data->vrrp, vrrp, e){
ifp =#ifdef _HAVE_VRRP_VMAC_(__test_bit(VRRP_VMAC_XMITBASE_BIT,&vrrp->vmac_flags))? vrrp->ifp->base_ifp :#endif
vrrp->ifp;
unicast =!LIST_ISEMPTY(vrrp->unicast_peer);
proto = IPPROTO_VRRP;#if defined _WITH_VRRP_AUTH_if(vrrp->auth_type == VRRP_AUTH_AH)
proto = IPPROTO_AH;#endif/* add the vrrp element if not exist */if(!(sock =already_exist_sock(l, vrrp->family, proto, ifp, unicast)))
sock =alloc_sock(vrrp->family, l, proto, ifp, unicast);/* Add the vrrp_t indexed by vrid to the socket */rb_insert_sort(&sock->rb_vrid, vrrp, rb_vrid, vrrp_vrid_cmp);if(vrrp->kernel_rx_buf_size)
sock->rx_buf_size += vrrp->kernel_rx_buf_size;elseif(global_data->vrrp_rx_bufs_policy & RX_BUFS_SIZE)
sock->rx_buf_size += global_data->vrrp_rx_bufs_size;elseif(global_data->vrrp_rx_bufs_policy & RX_BUFS_POLICY_ADVERT)
sock->rx_buf_size += global_data->vrrp_rx_bufs_multiples *vrrp_adv_len(vrrp);elseif(global_data->vrrp_rx_bufs_policy & RX_BUFS_POLICY_MTU)
sock->rx_buf_size += global_data->vrrp_rx_bufs_multiples * vrrp->ifp->mtu;}}
staticvoidvrrp_open_sockpool(list l){
sock_t *sock;
element e;LIST_FOREACH(l, sock, e){if(!sock->ifp->ifindex){
sock->fd_in = sock->fd_out =-1;continue;}/*
1、创建 VRRP 接收 socket(SOCK_RAW 类型)
2、将其加入 IPV4(224.0.0.18)或 IPV6(ff02::12) vrrp 组播
3、将socket 绑定要特定的 device,如 “eth0”
*/
sock->fd_in =open_vrrp_read_socket(sock->family, sock->proto,
sock->ifp, sock->unicast, sock->rx_buf_size);if(sock->fd_in ==-1)
sock->fd_out =-1;else/*
1、创建 VRRP 发送 socket(SOCK_RAW 类型),关闭接收缓冲区,
2、设置 IP_HDRINCL选项(自定义IP数据包首部)
3、绑定到特定 device(如:“eth0”)
*/
sock->fd_out =open_vrrp_send_socket(sock->family, sock->proto,
sock->ifp, sock->unicast);}}/* open a VRRP socket and join the multicast group. */intopen_vrrp_read_socket(sa_family_t family,int proto, interface_t *ifp, bool unicast,int rx_buf_size){int fd =-1;int val = rx_buf_size;
socklen_t len =sizeof(val);int on =1;/* 创建socket */
fd =socket(family, SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, proto);if(fd <0){int err = errno;log_message(LOG_INFO,"cant open raw socket. errno=%d", err);return-1;}#if !HAVE_DECL_SOCK_CLOEXECset_sock_flags(fd, F_SETFD, FD_CLOEXEC);#endif#if !HAVE_DECL_SOCK_NONBLOCKset_sock_flags(fd, F_SETFL, O_NONBLOCK);#endif/*
int setsockopt(int sockfd, int level, int optname,const void *optval, socklen_t optlen):
sockfd:套接字
evel:选项定义的层次,支持 SOL_SOCKET(在套接字级别上设置选项)、IPPROTO_TCP、IPPROTO_IP 和 IPPROTO_IPV6
optname:需设置的选项
optval:指向存放选项待设置的新值的缓冲区
optlen:optval缓冲区长度
*//* 设置接收缓冲区的大小 */if(rx_buf_size){if(setsockopt(fd, SOL_SOCKET, SO_RCVBUF,&val, len))log_message(LOG_INFO,"vrrp set receive socket buffer size error %d", errno);}#if HAVE_DECL_IP_MULTICAST_ALL /* Since Linux 2.6.31 *//* ipv4 不接收组播包 */if(family == AF_INET)if_setsockopt_mcast_all(family,&fd);#endifif(!unicast){/* 将 fd 加入 IPV4(224.0.0.18)或 IPV6(ff02::12)组播 */if_join_vrrp_group(family,&fd, ifp);#ifdef IPV6_RECVHOPLIMIT /* Since Linux 2.6.14 *//* IPv6 任何接收到的跳限字段都将由 recvmsg 作为辅助数据返回 */if(family == AF_INET6){if(setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,&on,sizeof on))log_message(LOG_INFO,"fd %d - set IPV6_RECVHOPLIMIT error %d (%m)", fd, errno);}#endif}#ifdef IPV6_RECVPKTINFO /* Since Linux 2.6.14 *//* Receive the destination address as ancillary data to determine if packet multicast */if(family == AF_INET6){if(setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,&on,sizeof on))log_message(LOG_INFO,"fd %d - set IPV6_RECVPKTINFO error %d (%m)", fd, errno);}#endif#ifdef _NETWORK_TIMESTAMP_if(do_network_timestamp){#if 0int flags = SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RX_SOFTWARE ;if(setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,&flags,sizeof(flags))<0)log_message(LOG_INFO,"ERROR: setsockopt %d SO_TIMESTAMPING", fd);if(setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP,&on,sizeof(on))<0)log_message(LOG_INFO,"ERROR: setsockopt %d SO_TIMESTAMP", fd);#endifif(setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS,&on,sizeof(on))<0)// This overrides SO_TIMESTAMPlog_message(LOG_INFO,"ERROR: setsockopt %d SO_TIMESTAMPNS", fd);}#endif/* Need to bind read socket so only process packets for interface we're
* interested in.
*
* This is applicable for both unicast and multicast operation as well as
* IPv4 and IPv6.
*//* Bind this socket to a particular device like “eth0”, as specified in the passed interface name. */if_setsockopt_bindtodevice(&fd, ifp);if(fd <0)return-1;if(family == AF_INET6){/* 指定用户数据中检验和所处位置的偏移 */if_setsockopt_ipv6_checksum(&fd);}return fd;}/* open a VRRP sending socket */intopen_vrrp_send_socket(sa_family_t family,int proto, interface_t *ifp, bool unicast){int fd =-1;int val =0;
socklen_t len =sizeof(val);if(family != AF_INET && family != AF_INET6){log_message(LOG_INFO,"cant open raw socket. unknown family=%d", family);return-1;}/* Create and init socket descriptor */
fd =socket(family, SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, proto);if(fd <0){log_message(LOG_INFO,"cant open raw socket. errno=%d", errno);return-1;}#if !HAVE_DECL_SOCK_CLOEXECset_sock_flags(fd, F_SETFD, FD_CLOEXEC);#endif#if !HAVE_DECL_SOCK_NONBLOCKset_sock_flags(fd, F_SETFL, O_NONBLOCK);#endif/* We are not receiving on the send socket, there is no
* point allocating any buffers to it */if(setsockopt(fd, SOL_SOCKET, SO_RCVBUF,&val, len))log_message(LOG_INFO,"vrrp set send socket buffer size error %d", errno);if(family == AF_INET){/* Set v4 related *//* It doesn't really matter if IP_MULTICAST_ALL is not supported
* since we set a BPF filter to stop any packet being received
* on the send socket */#if HAVE_DECL_IP_MULTICAST_ALL /* Since Linux 2.6.31 *//* ipv4 不接收组播包 */if_setsockopt_mcast_all(family,&fd);#endif/* IP_HDRINCL:自定义IP数据包首部。不设置这个选项时,IP协议自动填充IP数据包的首部。 */if_setsockopt_hdrincl(&fd);if(unicast)/* Bind this socket to a particular device like “eth0”, as specified in the passed interface name. */if_setsockopt_bindtodevice(&fd, ifp);}elseif(family == AF_INET6){/* Set v6 related *//* 指定用户数据中检验和所处位置的偏移 */if_setsockopt_ipv6_checksum(&fd);if(!unicast)if_setsockopt_mcast_hops(family,&fd);}if(!unicast){/* 设置组播的默认默认网络接口,会从给定的网络接口发送 */if_setsockopt_mcast_if(family,&fd, ifp);/* 禁止将数据回送到本地回环接口 */if_setsockopt_mcast_loop(family,&fd);}/* 设置服务的优先级 */if_setsockopt_priority(&fd, family);if_setsockopt_no_receive(&fd);if(fd <0)return-1;return fd;}