tcp新建连接调度

 

  • tcp_conn_sched

    /**
      * set @verdict if failed to schedule 
      * 新建的连接由conn_sched进行调度,对于tcp服务调用tcp_conn_sched,在__dp_vs_in中被调用
      * 返回值:
      * EDPVS_OK: 继续lvs处理
      * 其余: 结束lvs处理,并且将设置的verdict作为hook返回结果返回
      */
    static int tcp_conn_sched(struct dp_vs_proto *proto,
                              const struct dp_vs_iphdr *iph,
                              struct rte_mbuf *mbuf,
                              struct dp_vs_conn **conn,
                              int *verdict)
    {
        struct tcphdr *       th, _tcph;
        struct dp_vs_service *svc;
        bool outwall = false;
    
        //校验
        assert(proto && iph && mbuf && conn && verdict);
        //获取tcp header,只是指针操作,不涉及数据复制
        th = mbuf_header_pointer(mbuf, iph->len, sizeof(_tcph), &_tcph);
        //如果获取tcp header失败,主要是数据包长不对,直接丢弃
        if (unlikely(!th))
        {
            *verdict = INET_DROP;
            return(EDPVS_INVPKT);
        }
    
        /* Syn-proxy step 2 logic: receive client's 3-handshacke ack packet */
    
        /* When synproxy disabled, only SYN packets can arrive here.
         * So don't judge SYNPROXY flag here! If SYNPROXY flag judged, and syn_proxy
         * got disbled and keepalived reloaded, SYN packets for RS may never be sent. */
        //如果是syn cookies 连接建立第三次握手数据包,则返回EDPVS_PKTSTOLEN
        if (dp_vs_synproxy_ack_rcv(iph->af, mbuf, th, proto, conn, iph, verdict) == 0)
        {
            /* Attention: First ACK packet is also stored in conn->ack_mbuf */
            return(EDPVS_PKTSTOLEN);
        }
    
        /* only TCP-SYN without other flag can be scheduled */
        //对于新建立的连接,只允许syn请求,其余的都丢弃,此处说明不是syn的数据包
        if (!th->syn || th->ack || th->fin || th->rst)
        {
    #ifdef CONFIG_DPVS_IPVS_DEBUG
            char        dbuf[64], sbuf[64];
            const char *daddr, *saddr;
    
            daddr = inet_ntop(iph->af, &iph->daddr, dbuf, sizeof(dbuf)) ? dbuf : "::";
            saddr = inet_ntop(iph->af, &iph->saddr, sbuf, sizeof(sbuf)) ? sbuf : "::";
            RTE_LOG(DEBUG, IPVS,
                    "%s: [%d] try sched non-SYN packet: [%c%c%c%c] %s/%d->%s/%d\\n",
                    __func__, rte_lcore_id(),
                    th->syn ? 'S' : '.', th->fin ? 'F' : '.',
                    th->ack ? 'A' : '.', th->rst ? 'R' : '.',
                    saddr, ntohs(th->source), daddr, ntohs(th->dest));
    #endif
    
            /* Drop tcp packet which is send to vip and !vport */
            //如果是发往vip,但不是vport的数据报,则丢弃
            if (g_defence_tcp_drop &&
                (svc = dp_vs_vip_lookup(iph->af, iph->proto,
                                        &iph->daddr, rte_lcore_id())))
            {
                dp_vs_estats_inc(DEFENCE_TCP_DROP);
                *verdict = INET_DROP;
                return(EDPVS_INVPKT);
            }
            //找不到对应的dp_vs_service,则设置返回结果为INET_ACCEPT,并且返回EDPVS_INVAL
            *verdict = INET_ACCEPT;
            return(EDPVS_INVAL);
        }
        //根据请求目的地址和端口来查找dp_vs_service,如果找不到丢弃
        svc = dp_vs_service_lookup(iph->af, iph->proto, &iph->daddr, th->dest,
                                   0, mbuf, NULL, &outwall, rte_lcore_id());
        if (!svc)
        {
            /* Drop tcp packet which is send to vip and !vport */
            //如果是发往vip,但不是vport的数据报,则丢弃
            if (g_defence_tcp_drop &&
                (svc = dp_vs_vip_lookup(iph->af, iph->proto,
                                        &iph->daddr, rte_lcore_id())))
            {
                dp_vs_estats_inc(DEFENCE_TCP_DROP);
                *verdict = INET_DROP;
                return(EDPVS_INVPKT);
            }
            *verdict = INET_ACCEPT;
            return(EDPVS_NOSERV);
        }
        //根据dp_vs_service来选择后端real server建立连接
        *conn = dp_vs_schedule(svc, iph, mbuf, false, outwall);
        if (!*conn)
        {
            *verdict = INET_DROP;
            return(EDPVS_RESOURCE);
        }
    
        return(EDPVS_OK);
    }
    

 

  • dp_vs_schedule

    /**
      * select an RS by service's scheduler and create a connection 
      * dp_vs_schedule 新建立连接后端调度,选择一个real server
      */
    struct dp_vs_conn *dp_vs_schedule(struct dp_vs_service *svc,
                                      const struct dp_vs_iphdr *iph,
                                      struct rte_mbuf *mbuf,
                                      bool is_synproxy_on,
                                      bool outwall)
    {
        uint16_t                _ports[2], *ports; /* sport, dport */
        struct dp_vs_dest *     dest;
        struct dp_vs_conn *     conn;
        struct dp_vs_conn_param param;
    
        assert(svc && iph && mbuf);
        //ports指向目的端口,源端口,非copy
        ports = mbuf_header_pointer(mbuf, iph->len, sizeof(_ports), _ports);
        if (!ports)
        {
            return(NULL);
        }
    
        /* persistent service */
        //长连接请求
        if (svc->flags & DP_VS_SVC_F_PERSISTENT)
        {
            return(dp_vs_sched_persist(svc, iph, mbuf, is_synproxy_on));
        }
        //根据特定算法选择 real server, 常用的有 rr, wrr, wlc 以后再分析。返回 dest 结构体是后端 rs
        dest = svc->scheduler->schedule(svc, mbuf);
        if (!dest)
        {
            RTE_LOG(WARNING, IPVS, "%s: no dest found.\\n", __func__);
    #ifdef CONFIG_DPVS_MBUF_DEBUG
            dp_vs_mbuf_dump("found dest failed.", iph->af, mbuf);
    #endif
            return(NULL);
        }
        //snat特殊处理
        if (dest->fwdmode == DPVS_FWD_MODE_SNAT)
        {
            return(dp_vs_snat_schedule(dest, iph, ports, mbuf, outwall));
        }
        //icmp处理
        if (unlikely(iph->proto == IPPROTO_ICMP))
        {
            struct icmphdr *ich, _icmph;
            ich = mbuf_header_pointer(mbuf, iph->len, sizeof(_icmph), &_icmph);
            if (!ich)
            {
                return(NULL);
            }
    
            ports     = _ports;
            _ports[0] = icmp4_id(ich);
            _ports[1] = ich->type << 8 | ich->code;
    
            dp_vs_conn_fill_param(iph->af, iph->proto,
                                  &iph->saddr, &iph->daddr,
                                  ports[0], ports[1], 0, &param);
        }
        else if (unlikely(iph->proto == IPPROTO_ICMPV6))
        {
            struct icmp6_hdr *ic6h, _ic6hp;
            ic6h = mbuf_header_pointer(mbuf, iph->len, sizeof(_ic6hp), &_ic6hp);
            if (!ic6h)
            {
                return(NULL);
            }
    
            ports     = _ports;
            _ports[0] = icmp6h_id(ic6h);
            _ports[1] = ic6h->icmp6_type << 8 | ic6h->icmp6_code;
    
            dp_vs_conn_fill_param(iph->af, iph->proto,
                                  &iph->daddr, &dest->addr,
                                  ports[1], ports[0],
                                  0, &param);
        }
        else
        {
            //填充proto,caddr,vaddr,cport,vport供新建立连接使用
            dp_vs_conn_fill_param(iph->af, iph->proto,
                                  &iph->saddr, &iph->daddr,
                                  ports[0], ports[1], 0, &param);
        }
        //根据参数,目标机器信息建立代理连接
        conn = dp_vs_conn_new(mbuf, iph, &param, dest,
                              is_synproxy_on ? DPVS_CONN_F_SYNPROXY : 0);
        if (!conn)
        {
            return(NULL);
        }
    
        dp_vs_stats_conn(conn);
        return(conn);
    }
    

 

  • dp_vs_conn缓存池

    /*
     * memory pool for dp_vs_conn{}
     */
     //dp_vs_conn缓存池,per-socket数组
    static struct rte_mempool *dp_vs_conn_cache[DPVS_MAX_SOCKET];
    #define this_conn_count    (RTE_PER_LCORE(dp_vs_conn_count))
    #define this_conn_cache    (dp_vs_conn_cache[rte_socket_id()])
    //连接hash表
    #define this_conn_tbl      (RTE_PER_LCORE(dp_vs_conn_tbl))
    
  • dp_vs_conn_init

    • 初始化用于查找的this_conn_tbl,per-lcore hash表
    • 初始化conn缓存池,per-socket数组
    int dp_vs_conn_init(void)
    {
        int       i, err;
        lcoreid_t lcore;
        char      poolname[32];
    
        /* init connection template table */
        //persistent持续调度相关,模板连接查找表
        dp_vs_ct_tbl = rte_malloc_socket(NULL, sizeof(struct list_head) * DPVS_CONN_TBL_SIZE,
                                         RTE_CACHE_LINE_SIZE, rte_socket_id());
    
        for (i = 0; i < DPVS_CONN_TBL_SIZE; i++)
        {
            INIT_LIST_HEAD(&dp_vs_ct_tbl[i]);
        }
        rte_spinlock_init(&dp_vs_ct_lock);
    
        /*
         * unlike linux per_cpu() which can assign CPU number,
         * RTE_PER_LCORE() can only access own instances.
         * it make codes looks strange.
         */
         //主要初始化用于查找conn的this_conn_tbl,per-lcore hash表
        rte_eal_mp_remote_launch(conn_init_lcore, NULL, SKIP_MASTER);
        RTE_LCORE_FOREACH_SLAVE(lcore)
        {
            if ((err = rte_eal_wait_lcore(lcore)) < 0)
            {
                RTE_LOG(WARNING, IPVS, "%s: lcore %d: %s.\\n",
                        __func__, lcore, dpvs_strerror(err));
            }
        }
    
        conn_ctrl_init();
    
        /* connection cache on each NUMA socket */
        //初始化conn连接池,per-socket数组
        for (i = 0; i < get_numa_nodes(); i++)
        {
            snprintf(poolname, sizeof(poolname), "dp_vs_conn_%d", i);
            dp_vs_conn_cache[i] = rte_mempool_create(poolname,
                                                     conn_pool_size,
                                                     sizeof(struct dp_vs_conn),
                                                     conn_pool_cache,
                                                     0, NULL, NULL, NULL, NULL,
                                                     i, 0);
            if (!dp_vs_conn_cache[i])
            {
                err = EDPVS_NOMEM;
                goto cleanup;
            }
        }
    
        dp_vs_conn_rnd = (uint32_t)random();
    
        return(EDPVS_OK);
    
    cleanup:
        dp_vs_conn_term();
        return(err);
    }
    
    static int conn_init_lcore(void *arg)
    {
        int i;
    
        if (!rte_lcore_is_enabled(rte_lcore_id()))
        {
            return(EDPVS_DISABLED);
        }
    
        if (netif_lcore_is_idle(rte_lcore_id()))
        {
            return(EDPVS_IDLE);
        }
        //创建用于查找conn的hash表头
        this_conn_tbl = rte_malloc_socket(NULL,
                                          sizeof(struct list_head) * DPVS_CONN_TBL_SIZE,
                                          RTE_CACHE_LINE_SIZE, rte_socket_id());
        if (!this_conn_tbl)
        {
            return(EDPVS_NOMEM);
        }
    
        for (i = 0; i < DPVS_CONN_TBL_SIZE; i++)
        {
            INIT_LIST_HEAD(&this_conn_tbl[i]);
        }
    
    #ifdef CONFIG_DPVS_IPVS_CONN_LOCK
        rte_spinlock_init(&this_conn_lock);
    #endif
        this_conn_count = 0;
    
        return(EDPVS_OK);
    }
    
  • dp_vs_conn_alloc

    static struct dp_vs_conn *dp_vs_conn_alloc(enum dpvs_fwd_mode fwdmode,
                                               uint32_t flags)
    {
        struct dp_vs_conn *    conn;
        struct dp_vs_redirect *r = NULL;
        //从当前所在socket对应的conn cache中分配一个conn
        if (unlikely(rte_mempool_get(this_conn_cache, (void **)&conn) != 0))
        {
            RTE_LOG(ERR, IPVS, "%s: no memory for connection\\n", __func__);
            return(NULL);
        }
        //清零,并设置conn所在的connpool,主要用于在释放时归还至正确的connpool
        memset(conn, 0, sizeof(struct dp_vs_conn));
        conn->connpool = this_conn_cache;
        this_conn_count++;
    
        /* no need to create redirect for the global template connection */
        //根据flag,分配dp_vs_redirect,主要在FNAT,SNAT和NAT模式下
        if (likely((flags & DPVS_CONN_F_TEMPLATE) == 0))
        {
            r = dp_vs_redirect_alloc(fwdmode);
        }
    
        conn->redirect = r;
    
        return(conn);
    }
    
  • dp_vs_conn_new

    /**
     * 创建新的连接
     * mbuf: 接收到的数据包
     * iph: 主要保存IP层相关数据,IP层首部长度,源/目的地址
     * param: 创建连接的相关信息,主要是tcp四元组信息
     * dest: dp_vs_service选出出来的real server相关信息
     */
    struct dp_vs_conn *dp_vs_conn_new(struct rte_mbuf *mbuf,
                                      const struct dp_vs_iphdr *iph,
                                      struct dp_vs_conn_param *param,
                                      struct dp_vs_dest *dest,
                                      uint32_t flags)
    {
        struct dp_vs_conn *     new;
        struct conn_tuple_hash *t;
        uint16_t rport;
        __be16   _ports[2], *ports;
        int      err;
    
        assert(mbuf && param && dest);
        //分配dp_vs_conn,从内存池中分配
        new = dp_vs_conn_alloc(dest->fwdmode, flags);
        if (unlikely(!new))
        {
            return(NULL);
        }
        //设置连接的flag
        new->flags = flags;
    
        /* set proper RS port */
        if (dp_vs_conn_is_template(new) || param->ct_dport != 0)
        {
            rport = param->ct_dport;
        }
        else if (dest->fwdmode == DPVS_FWD_MODE_SNAT)
        {
            //如果是ICMP相关报文,rport从icmp信息从param中获取(icmp应用数据中设置params)
            if (unlikely(param->proto == IPPROTO_ICMP ||
                         param->proto == IPPROTO_ICMPV6))
            {
                rport = param->vport;
            }
            else
            {
                //否则从mbuf中获取rport
                ports = mbuf_header_pointer(mbuf, iph->len, sizeof(_ports), _ports);
                if (unlikely(!ports))
                {
                    RTE_LOG(WARNING, IPVS, "%s: no memory\\n", __func__);
                    goto errout;
                }
                //设置rport为源端口,snat模式中为内部服务器--->外部服务器(www.baidu.com)
                rport = ports[0];
            }
        }
        else
        {
            //rport为选择后端real server的服务port
            rport = dest->port;
        }
        //conn 连接有一个 tuplehash 数组元素,长度为2,保存两个方向的 tupehash 结构体。不同方向的源地址和目的地址意义是不同的
        /* init inbound conn tuple hash */
        //t指向tuplehash中DPVS_CONN_DIR_INBOUND方向的conn_tuple_hash
        //即外网服务器(如 baidu.com) -> DPVS -> 内网服务器
        t         = &tuplehash_in(new);
        t->direct = DPVS_CONN_DIR_INBOUND;
        t->af     = param->af;
        t->proto  = param->proto;
        //源地址是外网 client addr
        t->saddr = *param->caddr;
        t->sport = param->cport;
        //目的地址是服务虚IP地址
        t->daddr = *param->vaddr;
        t->dport = param->vport;
        INIT_LIST_HEAD(&t->list);
    
        /* init outbound conn tuple hash */
        //t指向tuplehash中DPVS_CONN_DIR_OUTBOUND方向的conn_tuple_hash
        //即内网服务器 -> DPVS -> 外网服务器(如 baidu.com )
        t         = &tuplehash_out(new);
        t->direct = DPVS_CONN_DIR_OUTBOUND;
        t->af     = dest->af;
        t->proto  = param->proto;
        if (dest->fwdmode == DPVS_FWD_MODE_SNAT)
        {
            //如果是snat模式(主要用于内网服务器作为客户端角色,请求外网服务器),此时,saddr为mbuf数据包中的源地址
            t->saddr = iph->saddr;
        }
        else
        {
            //否则,使用后端real server的地址
            t->saddr = dest->addr;
        }
        //源port使用前面得到的rport,与saddr类似
        t->sport = rport;
        t->daddr = *param->caddr;       /* non-FNAT */
        t->dport = param->cport;        /* non-FNAT */
        INIT_LIST_HEAD(&t->list);
    
        /* init connection */
        //设置conn相关的协议族,端口之类信息
        new->af    = param->af;
        new->proto = param->proto;
        new->caddr = *param->caddr;
        new->cport = param->cport;
        new->vaddr = *param->vaddr;
        new->vport = param->vport;
        new->laddr = *param->caddr;     /* non-FNAT */
        new->lport = param->cport;      /* non-FNAT */
        if (dest->fwdmode == DPVS_FWD_MODE_SNAT)
        {
            new->daddr = iph->saddr;
        }
        else
        {
            new->daddr = dest->addr;
        }
        new->dport   = rport;
        new->outwall = param->outwall;
    
        /* neighbour confirm cache */
        if (AF_INET == tuplehash_in(new).af)
        {
            new->in_nexthop.in.s_addr = htonl(INADDR_ANY);
        }
        else
        {
            new->in_nexthop.in6 = in6addr_any;
        }
    
        if (AF_INET == tuplehash_out(new).af)
        {
            new->out_nexthop.in.s_addr = htonl(INADDR_ANY);
        }
        else
        {
            new->out_nexthop.in6 = in6addr_any;
        }
    
        new->in_dev  = NULL;
        new->out_dev = NULL;
    
        /* Controll member */
        new->control = NULL;
        rte_atomic32_clear(&new->n_control);
    
        /* caller will use it right after created,
         * just like dp_vs_conn_get(). */
        rte_atomic32_set(&new->refcnt, 1);
        new->state = 0;
    #ifdef CONFIG_DPVS_IPVS_STATS_DEBUG
        new->ctime = rte_rdtsc();
    #endif
    
        /* bind destination and corresponding trasmitter */
        //将conn与对应的后端real server dest绑定,主要设置不同转发模式相关的传输函数
        err = dp_vs_conn_bind_dest(new, dest);
        if (err != EDPVS_OK)
        {
            RTE_LOG(WARNING, IPVS, "%s: fail to bind dest: %s\\n",
                    __func__, dpvs_strerror(err));
            goto errout;
        }
    
        /* FNAT only: select and bind local address/port */
        //full-nat 特殊处理
        if (dest->fwdmode == DPVS_FWD_MODE_FNAT)
        {
            if ((err = dp_vs_laddr_bind(new, dest->svc)) != EDPVS_OK)
            {
                goto unbind_dest;
            }
        }
    
        /* init redirect if it exists */
        //初始化new conn相关dp_vs_redirect,主要为了解决在nat-mode模式下inside→outside两端连接数据包可能不从同一网卡的同一
        //物理队列上收取,导致可能会分配到不同的lcore上处理
        dp_vs_redirect_init(new);
    
        /* add to hash table (dual dir for each bucket) */
        //conn_hash将连接加到this_conn_tlb中,实际上是将tuphash两个方向的都加到流表里,方便不同方向的检索
        if ((err = dp_vs_conn_hash(new)) != EDPVS_OK)
        {
            goto unbind_laddr;
        }
    
        /* timer */
        //默认超时时间
        new->timeout.tv_sec  = conn_init_timeout;
        new->timeout.tv_usec = 0;
    
        /* synproxy */
        INIT_LIST_HEAD(&new->ack_mbuf);
        //设置syn_retry_max为0
        rte_atomic32_set(&new->syn_retry_max, 0);
        //设置dup_ack_cnt为0
        rte_atomic32_set(&new->dup_ack_cnt, 0);
        //处理synproxy
        if ((flags & DPVS_CONN_F_SYNPROXY) && !dp_vs_conn_is_template(new))
        {
            struct tcphdr _tcph, *th = NULL;
            struct dp_vs_synproxy_ack_pakcet *ack_mbuf;
            struct dp_vs_proto *pp;
    
            th = mbuf_header_pointer(mbuf, iph->len, sizeof(_tcph), &_tcph);
            if (!th)
            {
                RTE_LOG(ERR, IPVS, "%s: get tcphdr failed\\n", __func__);
                goto unbind_laddr;
            }
    
            /* save ack packet */
            if (unlikely(rte_mempool_get(this_ack_mbufpool, (void **)&ack_mbuf) != 0))
            {
                RTE_LOG(ERR, IPVS, "%s: no memory\\n", __func__);
                goto unbind_laddr;
            }
            //将mbuf加入到ack_mbuf列表.
            ack_mbuf->mbuf = mbuf;
            list_add_tail(&ack_mbuf->list, &new->ack_mbuf);
            new->ack_num++;
            sp_dbg_stats32_inc(sp_ack_saved);
    
            /* save ack_seq - 1 */
            //将client发来的fnat_seq保存到fnat_seq,将fnat_seq-1保存到syn_proxy_seq
            new->syn_proxy_seq.isn =
                htonl((uint32_t)((ntohl(th->ack_seq) - 1)));
    
            /* save ack_seq */
            new->fnat_seq.fdata_seq = ntohl(th->ack_seq);
    
            /* FIXME: use DP_VS_TCP_S_SYN_SENT for syn */
            pp = dp_vs_proto_lookup(param->proto);
            new->timeout.tv_sec = pp->timeout_table[new->state = DPVS_TCP_S_SYN_SENT];
        }
    
        /* schedule conn timer */
        dpvs_time_rand_delay(&new->timeout, 1000000);
        //最后将连接加到定时器,管理连接超时。tcp 不同状态的超时时间是不同的,以后单独分析定时器
        //超时处理函数为dp_vs_conn_expire
        dp_vs_conn_attach_timer(new, true);
    
    #ifdef CONFIG_DPVS_IPVS_DEBUG
        conn_dump("new conn: ", new);
    #endif
        return(new);
    
    unbind_laddr:
        dp_vs_laddr_unbind(new);
    unbind_dest:
        dp_vs_conn_unbind_dest(new);
    errout:
        dp_vs_conn_free(new);
        return(NULL);
    }
    
  • dp_vs_conn_bind_dest

    • 绑定conn关联的dest
    • 设置conn的传输函数
    static int dp_vs_conn_bind_dest(struct dp_vs_conn *conn,
                                    struct dp_vs_dest *dest)
    {
        /* ATTENTION:
         *   Initial state of conn should be INACTIVE, with conn->inactconns=1 and
         *   conn->actconns=0. We should not increase conn->actconns except in session
         *   sync.Generally, the INACTIVE and SYN_PROXY flags are passed down from
         *   the dest here. */
        conn->flags |= rte_atomic16_read(&dest->conn_flags);
    
        if (dest->max_conn &&
            (rte_atomic32_read(&dest->inactconns) + \\
             rte_atomic32_read(&dest->actconns) >= dest->max_conn))
        {
            dest->flags |= DPVS_DEST_F_OVERLOAD;
            return(EDPVS_OVERLOAD);
        }
        //增加dest的引用计数
        rte_atomic32_inc(&dest->refcnt);
    
        if (dp_vs_conn_is_template(conn))
        {
            rte_atomic32_inc(&dest->persistconns);
        }
        else
        {
            rte_atomic32_inc(&dest->inactconns);
        }
        //设置不同模式的流量处理函数,NAT相关的流量都要经过LB,而DR/TUNNEL模式属于单臂模式,只有入口流量,没有出口流量
        switch (dest->fwdmode)
        {
        case DPVS_FWD_MODE_NAT:
            conn->packet_xmit     = dp_vs_xmit_nat;
            conn->packet_out_xmit = dp_vs_out_xmit_nat;
            break;
    
        case DPVS_FWD_MODE_TUNNEL:
            conn->packet_xmit = dp_vs_xmit_tunnel;
            break;
    
        case DPVS_FWD_MODE_DR:
            conn->packet_xmit = dp_vs_xmit_dr;
            break;
    
        case DPVS_FWD_MODE_FNAT:
            conn->packet_xmit     = dp_vs_xmit_fnat;
            conn->packet_out_xmit = dp_vs_out_xmit_fnat;
            break;
    
        case DPVS_FWD_MODE_SNAT:
            conn->packet_xmit     = dp_vs_xmit_snat;
            conn->packet_out_xmit = dp_vs_out_xmit_snat;
            break;
    
        default:
            return(EDPVS_NOTSUPP);
        }
    
        conn->dest = dest;
        return(EDPVS_OK);
    }
    
  • dp_vs_conn_hash

    static inline int dp_vs_conn_hash(struct dp_vs_conn *conn)
    {
        int err;
    
    #ifdef CONFIG_DPVS_IPVS_CONN_LOCK
        rte_spinlock_lock(&this_conn_lock);
    #endif
        //将conn加入到对应的hash表中,方便查找,in/out方向tuplehash都加入到hash表中
        err = __dp_vs_conn_hash(conn, DPVS_CONN_TBL_MASK);
    
    #ifdef CONFIG_DPVS_IPVS_CONN_LOCK
        rte_spinlock_unlock(&this_conn_lock);
    #endif
        //将conn加入到redirect hash中
        dp_vs_redirect_hash(conn);
    
        return(err);
    }
    
    static inline int __dp_vs_conn_hash(struct dp_vs_conn *conn, uint32_t mask)
    {
        uint32_t ihash, ohash;
    
        if (unlikely(conn->flags & DPVS_CONN_F_HASHED))
        {
            return(EDPVS_EXIST);
        }
        //计算in/out两个方向上的tuplehash key
        ihash = dp_vs_conn_hashkey(tuplehash_in(conn).af,
                                   &tuplehash_in(conn).saddr, tuplehash_in(conn).sport,
                                   &tuplehash_in(conn).daddr, tuplehash_in(conn).dport,
                                   mask);
    
        ohash = dp_vs_conn_hashkey(tuplehash_out(conn).af,
                                   &tuplehash_out(conn).saddr, tuplehash_out(conn).sport,
                                   &tuplehash_out(conn).daddr, tuplehash_out(conn).dport,
                                   mask);
        //如果是长连接,则加入到dp_vs_ct_tbl中,否则加入至this_conn_tbl hash表中
        if (dp_vs_conn_is_template(conn))
        {
            /* lock is complusory for template */
            rte_spinlock_lock(&dp_vs_ct_lock);
            list_add(&tuplehash_in(conn).list, &dp_vs_ct_tbl[ihash]);
            list_add(&tuplehash_out(conn).list, &dp_vs_ct_tbl[ohash]);
            rte_spinlock_unlock(&dp_vs_ct_lock);
        }
        else
        {
            list_add(&tuplehash_in(conn).list, &this_conn_tbl[ihash]);
            list_add(&tuplehash_out(conn).list, &this_conn_tbl[ohash]);
        }
    
        conn->flags |= DPVS_CONN_F_HASHED;
        rte_atomic32_inc(&conn->refcnt);
    
        return(EDPVS_OK);
    }
    
  • dp_vs_conn_attach_timer

    static void dp_vs_conn_attach_timer(struct dp_vs_conn *conn, bool lock)
    {
        int rc;
        //如果conn->timer正在运行中,则直接返回
        if (dp_vs_conn_is_in_timer(conn))
        {
            return;
        }
        //如果为长连接,则将定时器加入到global_timer中,否则加入至per-lcore timer中
        if (dp_vs_conn_is_template(conn))
        {
            if (lock)
            {
                rc = dpvs_timer_sched(&conn->timer, &conn->timeout,
                                      dp_vs_conn_expire, conn, true);
            }
            else
            {
                rc = dpvs_timer_sched_nolock(&conn->timer, &conn->timeout,
                                             dp_vs_conn_expire, conn, true);
            }
        }
        else
        {
            if (lock)
            {
                rc = dpvs_timer_sched(&conn->timer, &conn->timeout,
                                      dp_vs_conn_expire, conn, false);
            }
            else
            {
                rc = dpvs_timer_sched_nolock(&conn->timer, &conn->timeout,
                                             dp_vs_conn_expire, conn, false);
            }
        }
    
        if (rc == EDPVS_OK)
        {
            //设置conn->timer正在运行中
            dp_vs_conn_set_in_timer(conn);
        }
    }
    
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值