-
dp_vs_service结构体
/* virtual service */ //lvs服务 struct dp_vs_service { struct list_head s_list; /* node for normal service table */ struct list_head f_list; /* node for fwmark service table */ struct list_head m_list; /* node for match service table */ //引用计数 rte_atomic32_t refcnt; /* svc is per core, conn will not refer to svc, but dest will. * while conn will refer to dest */ /* * to identify a service * 1. <af, proto, vip, vport> * 2. fwmark (no use now). * 3. match. */ //协议族 int af; //传输层协议 TCP,UDP等 uint8_t proto; /* TCP/UDP/... */ //vip地址 union inet_addr addr; /* virtual IP address */ uint16_t port; uint32_t fwmark; //snat查找相关 struct dp_vs_match *match; unsigned flags; unsigned timeout; unsigned conn_timeout; unsigned bps; unsigned limit_proportion; uint32_t netmask; //后端real server列表 struct list_head dests; /* real services (dp_vs_dest{}) */ uint32_t num_dests; long weight; /* sum of servers weight */ struct dp_vs_scheduler *scheduler; void *sched_data; struct dp_vs_stats stats; /* FNAT only */ struct list_head laddr_list; /* local address (LIP) pool */ struct list_head *laddr_curr; uint32_t num_laddrs; /* ... flags, timer ... */ } __rte_cache_aligned;
-
dp_vs_dest
- 后端real server
struct dp_vs_dest { //通过该list node链接在dp_vs_service->dests链表中 struct list_head n_list; /* for the dests in the service */ //协议族 int af; /* address family */ /* * normally, addr/port is for Real Server, * but for SNAT, addr/port is the "to-source" * (the target source ip/port translated to). */ //<ip,port>, snat模式时不同 union inet_addr addr; /* IP address of the server */ uint16_t port; /* port number of the server */ //dest相关标志 volatile unsigned flags; /* dest status flags */ rte_atomic16_t conn_flags; /* flags to copy to conn */ rte_atomic16_t weight; /* server weight */ rte_atomic32_t refcnt; /* reference counter */ struct dp_vs_stats stats; /* Use per-cpu statistics for destination server */ //转发模式 enum dpvs_fwd_mode fwdmode; /* connection counters and thresholds */ rte_atomic32_t actconns; /* active connections */ rte_atomic32_t inactconns; /* inactive connections */ rte_atomic32_t persistconns; /* persistent connections */ uint32_t max_conn; /* upper threshold */ uint32_t min_conn; /* lower threshold */ /* for virtual service */ uint16_t proto; /* which protocol (TCP/UDP) */ uint16_t vport; /* virtual port number */ uint32_t vfwmark; /* firewall mark of service */ //所属的dp_vs_service struct dp_vs_service *svc; /* service it belongs to */ union inet_addr vaddr; /* virtual IP address */ unsigned conn_timeout; /* conn timeout copied from svc*/ unsigned limit_proportion; /* limit copied from svc*/ } __rte_cache_aligned;
init
- dp_vs_service_init
- 初始化用于查找vs_service的hash表
- 注册控制平面相关的msg_type,消息基本为multicast类型
int dp_vs_service_init(void) { int idx, cid, err; struct dpvs_msg_type msg_type; for (cid = 0; cid < DPVS_MAX_LCORE; cid++) { for (idx = 0; idx < DP_VS_SVC_TAB_SIZE; idx++) { INIT_LIST_HEAD(&dp_vs_svc_table[cid][idx]); INIT_LIST_HEAD(&dp_vs_svc_fwm_table[cid][idx]); } INIT_LIST_HEAD(&dp_vs_svc_match_list[cid]); rte_atomic16_init(&dp_vs_num_services[cid]); } dp_vs_dest_init(); sockopt_register(&sockopts_svc); memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_SET_FLUSH; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_NORM; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = flush_msg_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_SET_ZERO; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_NORM; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = zero_msg_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_SET_ADD; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_NORM; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = add_msg_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_SET_EDIT; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_NORM; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = edit_msg_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_SET_DEL; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_NORM; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = del_msg_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_SET_ADDDEST; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_NORM; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = adddest_msg_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_SET_EDITDEST; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_NORM; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = editdest_msg_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_SET_DELDEST; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_NORM; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = deldest_msg_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_GET_SERVICES; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_LOW; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = dp_vs_services_get_uc_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_GET_SERVICE; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_LOW; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = dp_vs_service_get_uc_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } memset(&msg_type, 0, sizeof(struct dpvs_msg_type)); msg_type.type = MSG_TYPE_SVC_GET_DESTS; msg_type.mode = DPVS_MSG_MULTICAST; msg_type.prio = MSG_PRIO_LOW; msg_type.cid = rte_lcore_id(); msg_type.unicast_msg_cb = dp_vs_dests_get_uc_cb; err = msg_type_mc_register(&msg_type); if (err != EDPVS_OK) { RTE_LOG(ERR, SERVICE, "%s: fail to register msg.\\n", __func__); return err; } return EDPVS_OK; }
dp_vs_vip_lookup
- 主要通过vip查找相关的dp_vs_service
-
#define DP_VS_SVC_TAB_BITS 8 #define DP_VS_SVC_TAB_SIZE (1 << DP_VS_SVC_TAB_BITS) #define DP_VS_SVC_TAB_MASK (DP_VS_SVC_TAB_SIZE - 1) //第一维数组对应lcore id,第二维数组为<af,proto,vip>对应的hashkey static struct list_head dp_vs_svc_table[DPVS_MAX_LCORE][DP_VS_SVC_TAB_SIZE]; static struct list_head dp_vs_svc_fwm_table[DPVS_MAX_LCORE][DP_VS_SVC_TAB_SIZE]; static struct list_head dp_vs_svc_match_list[DPVS_MAX_LCORE]; struct dp_vs_service *dp_vs_vip_lookup(int af, uint16_t protocol, const union inet_addr *vaddr, lcoreid_t cid) { struct dp_vs_service *svc; int hash; //计算hash hash = dp_vs_service_hashkey(af, protocol, vaddr); if (hash < 0) return NULL; //遍历hash表头链表,查找匹配的dp_vs_service list_for_each_entry(svc, &dp_vs_svc_table[cid][hash], s_list) { if ((svc->af == af) && inet_addr_equal(af, &svc->addr, vaddr) && (svc->proto == protocol)) { /* HIT */ return svc; } } return NULL; }
注册
-
dp_vs_service注册
static int dp_vs_service_hash(struct dp_vs_service *svc, lcoreid_t cid) { int hash; //service已经加入进hash了 if (svc->flags & DP_VS_SVC_F_HASHED) { RTE_LOG(DEBUG, SERVICE, "%s: request for already hashed.\\n", __func__); return(EDPVS_EXIST); } //fwmark目前未使用 if (svc->fwmark) { //如果fwmark设置了,同时加入dp_vs_svc_fwm_table对应hash表中 hash = dp_vs_service_fwm_hashkey(svc->fwmark); list_add(&svc->f_list, &dp_vs_svc_fwm_table[cid][hash]); } else if (svc->match) { //如果match指定了,加入dp_vs_svc_match_list链表中,match应用在类似snat模式中 list_add(&svc->m_list, &dp_vs_svc_match_list[cid]); } else { /* * Hash it by <protocol,addr,port> in dp_vs_svc_table */ //否则添加至所在lcore对应的hash表中 hash = dp_vs_service_hashkey(svc->af, svc->proto, &svc->addr); if (hash < 0) { return(EDPVS_INVAL); } list_add(&svc->s_list, &dp_vs_svc_table[cid][hash]); } //设置DP_VS_SVC_F_HASHED标志 svc->flags |= DP_VS_SVC_F_HASHED; return(EDPVS_OK); }
dp_vs_service_lookup
-
dp_vs_service_lookup
struct dp_vs_service *dp_vs_service_lookup(int af, uint16_t protocol, const union inet_addr *vaddr, uint16_t vport, uint32_t fwmark, const struct rte_mbuf *mbuf, const struct dp_vs_match *match, bool *outwall, lcoreid_t cid) { struct dp_vs_service *svc = NULL; //当设置fwmark标志时,在dp_vs_svc_fwm_table hash表中查找 if (fwmark && (svc = __dp_vs_service_fwm_get(af, fwmark, cid))) { goto out; } //接着dp_vs_svc_table中根据<af,protocol,vaddr,vport>四元组查找对应的service if ((svc = __dp_vs_service_get(af, protocol, vaddr, vport, cid))) { goto out; } //如果在上面没有查找到,并且指定了match,通过match查找对应的service if (match && !is_empty_match(match)) { if ((svc = __dp_vs_service_match_find(af, protocol, match, cid))) { goto out; } } //上面都未找到情况下,根据路由和mbuf中四元组,查看是否有匹配的match service,主要是针对于snat if (mbuf) /* lowest priority */ { svc = __dp_vs_service_match_get(af, mbuf, outwall, cid); } out: #ifdef CONFIG_DPVS_MBUF_DEBUG if (!svc && mbuf) { dp_vs_mbuf_dump("found service failed.", af, mbuf); } #endif return(svc); }
-
__dp_vs_service_fwm_get
- 根据fwmark标记查找对应的service
static struct dp_vs_service *__dp_vs_service_fwm_get(int af, uint32_t fwmark, lcoreid_t cid) { unsigned hash; struct dp_vs_service *svc; /* Check for fwmark addressed entries */ hash = dp_vs_service_fwm_hashkey(fwmark); list_for_each_entry(svc, &dp_vs_svc_fwm_table[cid][hash], f_list) { if (svc->fwmark == fwmark && svc->af == af) { /* HIT */ return(svc); } } return(NULL); }
-
__dp_vs_service_get
- 根据<af,protocol,vaddr,vport>四元组查找对应的service
static struct dp_vs_service *__dp_vs_service_get(int af, uint16_t protocol, const union inet_addr *vaddr, uint16_t vport, lcoreid_t cid) { int hash; struct dp_vs_service *svc; hash = dp_vs_service_hashkey(af, protocol, vaddr); if (hash < 0) { return(NULL); } list_for_each_entry(svc, &dp_vs_svc_table[cid][hash], s_list) { if ((svc->af == af) && inet_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) && (svc->proto == protocol)) { return(svc); } } return(NULL); }
-
__dp_vs_service_match_find
- 完全匹配match
static struct dp_vs_service * __dp_vs_service_match_find(int af, uint8_t proto, const struct dp_vs_match *match, lcoreid_t cid) { struct dp_vs_service *svc; if (!match || is_empty_match(match)) { return(NULL); } list_for_each_entry(svc, &dp_vs_svc_match_list[cid], m_list) { assert(svc->match); if (af == svc->af && proto == svc->proto && memcmp(match, svc->match, sizeof(struct dp_vs_match)) == 0) { return(svc); } } return(NULL); }
-
__dp_vs_service_match_get
- 查找match类型的service
- 根据mbuf中四元组信息和路由信息查找service
- 主要用于snat情况下
static struct dp_vs_service * __dp_vs_service_match_get(int af, const struct rte_mbuf *mbuf, bool *outwall, lcoreid_t cid) { if (af == AF_INET) { return(__dp_vs_service_match_get4(mbuf, outwall, cid)); } else if (af == AF_INET6) { return(__dp_vs_service_match_get6(mbuf, cid)); } else { return(NULL); } } static struct dp_vs_service * __dp_vs_service_match_get4(const struct rte_mbuf *mbuf, bool *outwall, lcoreid_t cid) { //获取路由缓存 struct route_entry * rt = mbuf->userdata; struct ipv4_hdr * iph = ip4_hdr(mbuf); /* ipv4 only */ struct dp_vs_service *svc; union inet_addr saddr, daddr; __be16 _ports[2], *ports; portid_t oif = NETIF_PORT_ID_ALL; saddr.in.s_addr = iph->src_addr; daddr.in.s_addr = iph->dst_addr; //将目的端口和源端口存放在ports数组中 ports = mbuf_header_pointer(mbuf, ip4_hdrlen(mbuf), sizeof(_ports), _ports); if (!ports) { return(NULL); } /* snat is handled at pre-routing to check if oif * is match perform route here. */ if (rt) { if ((rt->flag & RTF_KNI) || (rt->flag & RTF_LOCALIN)) { return(NULL); } oif = rt->port->id; } else if (outwall != NULL && (NULL != ipset_addr_lookup(AF_INET, &daddr)) && (rt = route_gfw_net_lookup(&daddr.in))) { char dst[64]; RTE_LOG(DEBUG, IPSET, "%s: IP %s is in the gfwip set, found route in the outwall table.\\n", __func__, inet_ntop(AF_INET, &daddr, dst, sizeof(dst)) ? dst : ""); //设置出口oif oif = rt->port->id; route4_put(rt); *outwall = true; } else { //查找input路由 rt = route4_input(mbuf, &daddr.in, &saddr.in, iph->type_of_service, netif_port_get(mbuf->port)); if (!rt) { return(NULL); } //如果路由类型为RTF_KNI或者RTF_LOCALIN,则是发往本机的数据 if ((rt->flag & RTF_KNI) || (rt->flag & RTF_LOCALIN)) { route4_put(rt); return(NULL); } //设置出口oif oif = rt->port->id; route4_put(rt); } list_for_each_entry(svc, &dp_vs_svc_match_list[cid], m_list) { struct dp_vs_match *m = svc->match; struct netif_port * idev, *odev; assert(m); if (!strlen(m->oifname)) { oif = NETIF_PORT_ID_ALL; } idev = netif_port_get_by_name(m->iifname); odev = netif_port_get_by_name(m->oifname); if (svc->af == AF_INET && svc->proto == iph->next_proto_id && __service_in_range(AF_INET, &saddr, ports[0], &m->srange) && __service_in_range(AF_INET, &daddr, ports[1], &m->drange) && (!idev || idev->id == mbuf->port) && (!odev || odev->id == oif) ) { return(svc); } } return(NULL); }