burst tx 功能 开启_dpdk tx_pkt_burst rte_pktmbuf_free mbuf释放

[root@localhost ixgbe]# grep tx_pkt_burst -rn *ixgbe_ethdev.c:1102: eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;

ixgbe_ethdev.c:1582: eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts;

ixgbe_ethdev.c:2989: dev->tx_pkt_burst =NULL;

ixgbe_ethdev.c:5360: dev->tx_pkt_burst =NULL;

ixgbe_rxtx.c:2400: dev->tx_pkt_burst =ixgbe_xmit_pkts_vec;

ixgbe_rxtx.c:2403: dev->tx_pkt_burst =ixgbe_xmit_pkts_simple;

ixgbe_rxtx.c:2413: dev->tx_pkt_burst =ixgbe_xmit_pkts;

ixgbe_vf_representor.c:206: ethdev->tx_pkt_burst = ixgbe_vf_representor_tx_burst

(gdb) bt

#0 hinic_xmit_pkts (tx_queue=0x13e7e7000, tx_pkts=0xffffbd40ce00, nb_pkts=1)

at/data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:1066#1 0x0000000000465b18 in rte_eth_tx_burst (port_id=0, queue_id=0, tx_pkts=0xffffbd40ce00, nb_pkts=1)

at/data1/dpdk-19.11/arm64-armv8a-linuxapp-gcc/include/rte_ethdev.h:4666#2 0x00000000004666bc in reply_to_icmp_echo_rqsts () at /data1/dpdk-19.11/demo/dpdk-pingpong/main.c:695#3 0x00000000004667e8 in server_loop () at /data1/dpdk-19.11/demo/dpdk-pingpong/main.c:735#4 0x0000000000466820 in pong_launch_one_lcore (dummy=0x0) at /data1/dpdk-19.11/demo/dpdk-pingpong/main.c:742#5 0x0000000000593538 in eal_thread_loop (arg=0x0)

at/data1/dpdk-19.11/lib/librte_eal/linux/eal/eal_thread.c:153#6 0x0000ffffbe617d38 in start_thread (arg=0xffffbd40d910) at pthread_create.c:309#7 0x0000ffffbe55f5f0 in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:91(gdb) s1071 struct hinic_txq *txq =tx_queue;

(gdb) n1077 if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh)

(gdb) n1081 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {

(gdb) n1082 mbuf_pkt = *tx_pkts++;

(gdb) n1083 queue_info = 0;

(gdb) n1086 if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt,

(gdb) n1093 wqe_wqebb_cnt =HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);

(gdb) n1094 free_wqebb_cnt =HINIC_GET_SQ_FREE_WQEBBS(txq);

(gdb) n1095 if (unlikely(wqe_wqebb_cnt >free_wqebb_cnt)) {

(gdb) n1108 sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);

(gdb) n1111 if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,

(gdb) n1121 task = &sq_wqe->task;

(gdb) n1124 hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,

(gdb) n1128 tx_info = &txq->tx_info[sqe_info.pi];

(gdb) n1129 tx_info->mbuf =mbuf_pkt;

(gdb) n1130 tx_info->wqebb_cnt =wqe_wqebb_cnt;

(gdb) n1133 hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,

(gdb) n1134sqe_info.sge_cnt, sqe_info.owner);

(gdb) n1133 hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,

(gdb) n1137hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs);

(gdb) n1139 tx_bytes += mbuf_pkt->pkt_len;

(gdb) n1081 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {

(gdb) n1143 if(nb_tx) {

(gdb) n1144 hinic_sq_write_db(txq->sq, txq->cos);

(gdb) n1146 txq->txq_stats.packets +=nb_tx;

(gdb) n1147 txq->txq_stats.bytes +=tx_bytes;

(gdb) n1149 txq->txq_stats.burst_pkts =nb_tx;

(gdb) n1151 returnnb_tx;

(gdb) n1152}

(gdb) n

#!/bin/bashfor i in {1..254};doip=10.10.103.229ping-c 1000 $ip &> /dev/null && echo $ip is up &done

wait

ping 10.10.103.229 持续10多分钟,才之心到断点Breakpoint 1, hinic_xmit_mbuf_cleanup (txq=0x13e7e7000) at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:584

hinic_xmit_mbuf_cleanup 会执行free buf

3b1383f7b8c99c91b5cac4d7aa036d23.png

net_hinic: Disable promiscuous, nic_dev: hinic-0000:05:00.0, port_id: 0, promisc: 0net_hinic: Disable allmulticast succeed, nic_dev: hinic-0000:05:00.0, port_id: 0Initilize port0done.

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

tip=10.10.103.229 ARP: hrd=1 proto=0x0800 hln=6 pln=4 op=1(ARP Request)

[Switching to Thread0xffffbd40d910 (LWP 44270)]

Breakpoint1, hinic_xmit_mbuf_cleanup (txq=0x13e7e7000) at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:584

584 int i, nb_free = 0;

(gdb) bt

#0 hinic_xmit_mbuf_cleanup (txq=0x13e7e7000) at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:584#1 0x000000000078ac00 in hinic_xmit_pkts (tx_queue=0x13e7e7000, tx_pkts=0xffffbd40ce80, nb_pkts=1)

at/data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:1078#2 0x00000000004663c8 inreply_to_icmp_echo_rqsts ()

#3 0x00000000004674fc inpong_launch_one_lcore ()

#4 0x0000000000593ae8 in eal_thread_loop (arg=0x0)

at/data1/dpdk-19.11/lib/librte_eal/linux/eal/eal_thread.c:153#5 0x0000ffffbe617d38 in start_thread (arg=0xffffbd40d910) at pthread_create.c:309#6 0x0000ffffbe55f5f0 in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:91(gdb) delete

Delete all breakpoints?(y or n) y

(gdb) c

txq->wq->delta txq->q_depth初始化

61f9dde5c7fc4d50d49a2411290a4523.png

static inthinic_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,

uint16_t nb_desc, unsignedintsocket_id,

__rte_unusedconst struct rte_eth_txconf *tx_conf)

{intrc;struct hinic_nic_dev *nic_dev;struct hinic_hwdev *hwdev;struct hinic_txq *txq;

u16 sq_depth, tx_free_thresh;

nic_dev=HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);

hwdev= nic_dev->hwdev;/*queue depth must be power of 2, otherwise will be aligned up*/sq_depth= (nb_desc & (nb_desc - 1)) ?((u16)(1U << (ilog2(nb_desc) + 1))) : nb_desc;/*alloc tx sq hw wqepage*/rc=hinic_create_sq(hwdev, queue_idx, sq_depth);

}

8bdf8ab460f9279f411b57cf24b50d08.png

f681aae7562f1741991cd2f3bf322814.png

将nb_txd改小

rte_eth_tx_queue_setup(portid, 0, nb_txd,

rte_eth_dev_socket_id(portid),

&txq_conf);

Breakpoint 1, hinic_xmit_mbuf_cleanup (txq=0x13e7e7000) at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:584

584 int i, nb_free = 0;

(gdb) s586 int wqebb_cnt = 0;

(gdb) list581{582 struct hinic_tx_info *tx_info;583 struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK];584 int i, nb_free = 0;585u16 hw_ci, sw_ci, sq_mask;586 int wqebb_cnt = 0;587

588 hw_ci =HINIC_GET_SQ_HW_CI(txq);589 sw_ci =HINIC_GET_SQ_LOCAL_CI(txq);590 sq_mask =HINIC_GET_SQ_WQE_MASK(txq);

(gdb) n588 hw_ci =HINIC_GET_SQ_HW_CI(txq);

(gdb) n589 sw_ci =HINIC_GET_SQ_LOCAL_CI(txq);

(gdb) n590 sq_mask =HINIC_GET_SQ_WQE_MASK(txq);

(gdb) n592 for (i = 0; i < txq->tx_free_thresh; ++i) {

(gdb) n593 tx_info = &txq->tx_info[sw_ci];

(gdb) n594 if (hw_ci == sw_ci ||(gdb) p*tx_info

$1 = {mbuf = 0x13e9a9400, wqebb_cnt = 1, cpy_mbuf = 0x0}

(gdb) n595 (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))

(gdb) n594 if (hw_ci == sw_ci ||(gdb) n598 sw_ci = (sw_ci + tx_info->wqebb_cnt) &sq_mask;

(gdb) n600 if (unlikely(tx_info->cpy_mbuf !=NULL)) {

(gdb) n605 wqebb_cnt += tx_info->wqebb_cnt;

(gdb) n606 mbuf = tx_info->mbuf;

(gdb) n608 if (likely(mbuf->nb_segs == 1)) {

(gdb) n609 m =rte_pktmbuf_prefree_seg(mbuf);

(gdb) n610 tx_info->mbuf =NULL;

(gdb) n612 if (unlikely(m ==NULL))

(gdb) n615 mbuf_free[nb_free++] =m;

(gdb) n616 if (unlikely(m->pool != mbuf_free[0]->pool ||(gdb) n592 for (i = 0; i < txq->tx_free_thresh; ++i) {

(gdb) n593 tx_info = &txq->tx_info[sw_ci];

(gdb) n594 if (hw_ci == sw_ci ||(gdb) n595 (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))

(gdb) n594 if (hw_ci == sw_ci ||(gdb) n598 sw_ci = (sw_ci + tx_info->wqebb_cnt) &sq_mask;

(gdb) n600 if (unlikely(tx_info->cpy_mbuf !=NULL)) {

(gdb) n605 wqebb_cnt += tx_info->wqebb_cnt;

(gdb) n606 mbuf = tx_info->mbuf;

(gdb) n608 if (likely(mbuf->nb_segs == 1)) {

(gdb) n609 m =rte_pktmbuf_prefree_seg(mbuf);

(gdb) n610 tx_info->mbuf =NULL;

(gdb) n612 if (unlikely(m ==NULL))

(gdb) n615 mbuf_free[nb_free++] =m;

(gdb) n616 if (unlikely(m->pool != mbuf_free[0]->pool ||(gdb) n592 for (i = 0; i < txq->tx_free_thresh; ++i) {

(gdb) c

Continuing.

Breakpoint1, hinic_xmit_mbuf_cleanup (txq=0x13e7e7000) at /data1/dpdk-19.11/drivers/net/hinic/hinic_pmd_tx.c:584

584 int i, nb_free = 0;

(gdb) delete

Delete all breakpoints?(y or n) y

(gdb) c

Continuing.

bnxt_xmit_pkts

uint16_t bnxt_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

uint16_t nb_pkts)

{struct bnxt_tx_queue *txq =tx_queue;

uint16_t nb_tx_pkts= 0;

uint16_t db_mask= txq->tx_ring->tx_ring_struct->ring_size >> 2;

uint16_t last_db_mask= 0;/*Handle TX completions*/bnxt_handle_tx_cp(txq);/*Handle TX burst request*/

for (nb_tx_pkts = 0; nb_tx_pkts < nb_pkts; nb_tx_pkts++) {if(bnxt_start_xmit(tx_pkts[nb_tx_pkts], txq)) {break;

}else if ((nb_tx_pkts & db_mask) !=last_db_mask) {

B_TX_DB(txq->tx_ring->tx_doorbell,

txq->tx_ring->tx_prod);

last_db_mask= nb_tx_pkts &db_mask;

}

}if(nb_tx_pkts)

B_TX_DB(txq->tx_ring->tx_doorbell, txq->tx_ring->tx_prod);returnnb_tx_pkts;

}

这个函数的逻辑可以分为三个部分来看:

首先是bnxt_handle_tx_cp,这里的cp是指complete(完成),这个函数主要负责处理之前网卡已经发送完成的mbuf,也就是网卡已经通过DMA将mbuf中的数据拷贝走,软件可以释放mbuf的逻辑;

其次是bnxt_start_xmit,这个是真正的发送逻辑,其实这里的发送也并不是真的把数据拷贝到网卡上,而是根据每个mbuf的数据地址设置到bd ring,从而告诉网卡DMA拷贝的源地址;

最后是B_TX_DB对tx_doorbell的写操作,作用就是前面bd的地址信息已经填充完毕,告诉网卡可以发起DMA了。

在分析具体函数前,首先熟悉一下相关的数据结构。和tx_queue相关联的有两个ring,一个是tx_ring(发送ring),一个是cp_ring(完成ring)。其数据结构关系如下:

28541347_15377833651Hhy.jpg

下面分别介绍三个阶段的实现。

释放已经DMA完成的mbuf

bnxt_handle_tx_cp

static int bnxt_handle_tx_cp(struct bnxt_tx_queue *txq)

{struct bnxt_cp_ring_info *cpr = txq->cp_ring;

uint32_t raw_cons= cpr->cp_raw_cons; /*记录完成队列上次完成队列释放(consumer)的index*/uint32_t cons;int nb_tx_pkts = 0;struct tx_cmpl *txcmp;if ((txq->tx_ring->tx_ring_struct->ring_size -(bnxt_tx_avail(txq->tx_ring))) >txq->tx_free_thresh) { /*如果发送ring中的已用desc数量大于tx_free_thresh*/

while (1) {

cons= RING_CMP(cpr->cp_ring_struct, raw_cons);

txcmp= (struct tx_cmpl *)&cpr->cp_desc_ring[cons];/*struct tx_cmpl中的type由硬件设置,TX_CMPL_TYPE_TX_L2表示网卡已经DMA完成,软件可以释放mbuf中的数据了*/

if (CMP_TYPE(txcmp) ==TX_CMPL_TYPE_TX_L2)

nb_tx_pkts++;elseRTE_LOG_DP(DEBUG, PMD,"Unhandled CMP type %02x\n",

CMP_TYPE(txcmp));

raw_cons= NEXT_RAW_CMP(raw_cons); /*raw_cons = raw_cons + 1*/}if (nb_tx_pkts) /*nb_tx_pkts记录了本次可以释放的mbuf数量*/bnxt_tx_cmp(txq, nb_tx_pkts);/*释放mbuf*/cpr->cp_raw_cons = raw_cons; /*更新 cpr->cp_raw_cons*/B_CP_DIS_DB(cpr, cpr->cp_raw_cons); /*通过cp ring的cp_doorbell通知硬件对应的cp ring bd已经可以释放了*/}returnnb_tx_pkts;

}

真正释放mbuf的操作是在bnxt_tx_cmp函数完成的。

bnxt_tx_cmp

static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, intnr_pkts)

{struct bnxt_tx_ring_info *txr = txq->tx_ring;

uint16_t cons= txr->tx_cons;inti, j;for (i = 0; i < nr_pkts; i++) {struct bnxt_sw_tx_bd *tx_buf;struct rte_mbuf *mbuf;

tx_buf= &txr->tx_buf_ring[cons]; /*tx_buf_ring存放txring中的mbuf*/cons= RING_NEXT(txr->tx_ring_struct, cons);

mbuf= tx_buf->mbuf;

tx_buf->mbuf =NULL;/*EW - no need to unmap DMA memory?*/

/*tx_buf->nr_bds记录一个mbuf对应的bd数量,一个mbuf可能对应多个bd*/

for (j = 1; j < tx_buf->nr_bds; j++)

cons= RING_NEXT(txr->tx_ring_struct, cons); /*cons = cons + 1*/rte_pktmbuf_free(mbuf);

}

txr->tx_cons = cons; /*清空了一部分mbuf,更新consumer index*/}

这里注意一点,在函数的最后更新tx_ring的consumer index,虽然对于发送端来说,软件驱动是productor(产生数据),网卡是consumer(消费数据),但是真正释放数据还是由软件驱动完成,所以consumer也是要在软件更新的。

数据包发送

数据包发送是在bnxt_start_xmit中完成的。

bnxt_start_xmit

static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,struct bnxt_tx_queue *txq)

{struct bnxt_tx_ring_info *txr = txq->tx_ring;struct tx_bd_long *txbd;struct tx_bd_long_hi *txbd1;

uint32_t vlan_tag_flags, cfa_action;bool long_bd = false;

uint16_t last_prod= 0;struct rte_mbuf *m_seg;struct bnxt_sw_tx_bd *tx_buf;static const uint32_t lhint_arr[4] ={

TX_BD_LONG_FLAGS_LHINT_LT512,

TX_BD_LONG_FLAGS_LHINT_LT1K,

TX_BD_LONG_FLAGS_LHINT_LT2K,

TX_BD_LONG_FLAGS_LHINT_LT2K

};if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |PKT_TX_UDP_CKSUM| PKT_TX_IP_CKSUM |PKT_TX_VLAN_PKT|PKT_TX_OUTER_IP_CKSUM))

long_bd= true;/*1. 将待发送的mbuf放入tx_ring的bnxt_sw_tx_bd中*/tx_buf= &txr->tx_buf_ring[txr->tx_prod];

tx_buf->mbuf =tx_pkt;

tx_buf->nr_bds = long_bd + tx_pkt->nb_segs;/*一个mbuf可能对应多个bd,last_prod指向该mbuf对应的最后一个bd的index*/last_prod= (txr->tx_prod + tx_buf->nr_bds - 1) &txr->tx_ring_struct->ring_mask;if (unlikely(bnxt_tx_avail(txr) < tx_buf->nr_bds))return -ENOMEM;/*2. 根据mbuf的信息设置rx tx_desc_ring中对应的bd,其中关键是txbd->addr*/txbd= &txr->tx_desc_ring[txr->tx_prod];

txbd->opaque = txr->tx_prod;

txbd->flags_type = tx_buf->nr_bds <

txbd->len = tx_pkt->data_len;if (txbd->len >= 2014)

txbd->flags_type |=TX_BD_LONG_FLAGS_LHINT_GTE2K;elsetxbd->flags_type |= lhint_arr[txbd->len >> 9];/*txbd->addr是mbuf的dma地址,也就是iova地址*/txbd->addr = rte_cpu_to_le_32(RTE_MBUF_DATA_DMA_ADDR(tx_buf->mbuf)); /*txr->tx_prod = txr->tx_prod + 1*/

if(long_bd) {

txbd->flags_type |=TX_BD_LONG_TYPE_TX_BD_LONG;

vlan_tag_flags= 0;

cfa_action= 0;if (tx_buf->mbuf->ol_flags &PKT_TX_VLAN_PKT) {/*shurd: Should this mask at

* TX_BD_LONG_CFA_META_VLAN_VID_MASK?*/vlan_tag_flags= TX_BD_LONG_CFA_META_KEY_VLAN_TAG |tx_buf->mbuf->vlan_tci;/*Currently supports 8021Q, 8021AD vlan offloads

* QINQ1, QINQ2, QINQ3 vlan headers are deprecated*/

/*DPDK only supports 802.11q VLAN packets*/vlan_tag_flags|=TX_BD_LONG_CFA_META_VLAN_TPID_TPID8100;

}/*更新tx_ring的productor index*/txr->tx_prod = RING_NEXT(txr->tx_ring_struct, txr->tx_prod);

txbd1= (struct tx_bd_long_hi *)&txr->tx_desc_ring[txr->tx_prod];

txbd1->lflags = 0;

txbd1->cfa_meta =vlan_tag_flags;

txbd1->cfa_action =cfa_action;/*根据mbuf的ol_flags设置bd中对应的flag*/

if (tx_pkt->ol_flags &PKT_TX_TCP_SEG) {/*TSO*/txbd1->lflags |=TX_BD_LONG_LFLAGS_LSO;

txbd1->hdr_size = tx_pkt->l2_len + tx_pkt->l3_len +tx_pkt->l4_len + tx_pkt->outer_l2_len +tx_pkt->outer_l3_len;

txbd1->mss = tx_pkt->tso_segsz;

}else if ((tx_pkt->ol_flags & PKT_TX_OIP_IIP_TCP_UDP_CKSUM) ==PKT_TX_OIP_IIP_TCP_UDP_CKSUM) {/*Outer IP, Inner IP, Inner TCP/UDP CSO*/txbd1->lflags |=TX_BD_FLG_TIP_IP_TCP_UDP_CHKSUM;

txbd1->mss = 0;

}else if ((tx_pkt->ol_flags & PKT_TX_IIP_TCP_UDP_CKSUM) ==PKT_TX_IIP_TCP_UDP_CKSUM) {/*(Inner) IP, (Inner) TCP/UDP CSO*/txbd1->lflags |=TX_BD_FLG_IP_TCP_UDP_CHKSUM;

txbd1->mss = 0;

}else if ((tx_pkt->ol_flags & PKT_TX_OIP_TCP_UDP_CKSUM) ==PKT_TX_OIP_TCP_UDP_CKSUM) {/*Outer IP, (Inner) TCP/UDP CSO*/txbd1->lflags |=TX_BD_FLG_TIP_TCP_UDP_CHKSUM;

txbd1->mss = 0;

}else if ((tx_pkt->ol_flags & PKT_TX_OIP_IIP_CKSUM) ==PKT_TX_OIP_IIP_CKSUM) {/*Outer IP, Inner IP CSO*/txbd1->lflags |=TX_BD_FLG_TIP_IP_CHKSUM;

txbd1->mss = 0;

}else if ((tx_pkt->ol_flags & PKT_TX_TCP_UDP_CKSUM) ==PKT_TX_TCP_UDP_CKSUM) {/*TCP/UDP CSO*/txbd1->lflags |=TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM;

txbd1->mss = 0;

}else if (tx_pkt->ol_flags &PKT_TX_IP_CKSUM) {/*IP CSO*/txbd1->lflags |=TX_BD_LONG_LFLAGS_IP_CHKSUM;

txbd1->mss = 0;

}else if (tx_pkt->ol_flags &PKT_TX_OUTER_IP_CKSUM) {/*IP CSO*/txbd1->lflags |=TX_BD_LONG_LFLAGS_T_IP_CHKSUM;

txbd1->mss = 0;

}

}else{

txbd->flags_type |=TX_BD_SHORT_TYPE_TX_BD_SHORT;

}

m_seg= tx_pkt->next;/*i is set at the end of the if(long_bd) block*/

while (txr->tx_prod !=last_prod) {/*更新tx_ring的productor index*/txr->tx_prod = RING_NEXT(txr->tx_ring_struct, txr->tx_prod); /*txr->tx_prod = txr->tx_prod + 1*/tx_buf= &txr->tx_buf_ring[txr->tx_prod];

txbd= &txr->tx_desc_ring[txr->tx_prod];

txbd->addr =rte_cpu_to_le_32(RTE_MBUF_DATA_DMA_ADDR(m_seg));

txbd->flags_type =TX_BD_SHORT_TYPE_TX_BD_SHORT;

txbd->len = m_seg->data_len;

m_seg= m_seg->next;

}

txbd->flags_type |=TX_BD_LONG_FLAGS_PACKET_END;/*更新tx_ring的productor index*/txr->tx_prod = RING_NEXT(txr->tx_ring_struct, txr->tx_prod); /*txr->tx_prod = txr->tx_prod + 1*/

return 0;

}

其中值得注意的有两点,一个是mbuf向db(tx_bd_long)转换的过程,其bd地址设置为mbuf的iova地址,也就是dma地址。

txbd->addr = rte_cpu_to_le_32(RTE_MBUF_DATA_DMA_ADDR(tx_buf->mbuf));#define RTE_MBUF_DATA_DMA_ADDR(mb) \((uint64_t)((mb)->buf_iova + (mb)->data_off))

另一方面是在发送过程中会更新tx_ring的productor index。

启动DMA

启动硬件DMA拷贝是通过一下语句完成:

B_TX_DB(txq->tx_ring->tx_doorbell, txq->tx_ring->tx_prod)

将tx_ring的productor index(tx_ring->tx_prod)写入tx_ring的tx_doorbell中。而无论是cp_ring的cp_doorbell还是tx_ring的tx_doorbell都在在bnxt_alloc_hwrm_rings函数中初始化为设备的bar空间地址的。

cpr->cp_doorbell = (char *)pci_dev->mem_resource[2].addr + idx * 0x80;

txr->tx_doorbell = (char *)pci_dev->mem_resource[2].addr + idx * 0x80;

ixgbe_xmit_pkts()

发送时回写的三种情况(默认为第一种,回写取决于RS):

1、TXDCTL[n].WTHRESH = 0 and a descriptor that has RS set is ready to be written back.

2、TXDCTL[n].WTHRESH > 0 and TXDCTL[n].WTHRESH descriptors have accumulated.

3、TXDCTL[n].WTHRESH > 0 and the corresponding EITR counter has reached zero. The timer expiration flushes any accumulated descriptors and sets an interrupt event (TXDW).

发送时回写:

1、挂载每个包的最后一个分段时,若当前使用的desc数大于上限(默认为32),设置RS

2、burst发包的最后一个包的最后一个分段,设置RS

uint16_t

ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

uint16_t nb_pkts)

{

...

txq=tx_queue;

sw_ring= txq->sw_ring;

txr= txq->tx_ring;

tx_id= txq->tx_tail; /*相当于ixgbe的next_to_use*/txe= &sw_ring[tx_id]; /*得到tx_tail指向的entry*/txp=NULL;

.../*若空闲的mbuf数小于下限(默认为32),清理空闲的mbuf*/

if (txq->nb_tx_free < txq->tx_free_thresh)

ixgbe_xmit_cleanup(txq);

.../*TX loop*/

for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {

...

tx_pkt= *tx_pkts++; /*待发送的mbuf*/pkt_len= tx_pkt->pkt_len; /*待发送的mbuf的长度*/...

nb_used= (uint16_t)(tx_pkt->nb_segs + new_ctx); /*使用的desc数*/...

tx_last= (uint16_t) (tx_id + nb_used - 1); /*tx_last指向最后一个desc*/...if (tx_last >= txq->nb_tx_desc) /*注意是一个环形缓冲区*/tx_last= (uint16_t) (tx_last - txq->nb_tx_desc);

...if (nb_used > txq->nb_tx_free) {

...if (ixgbe_xmit_cleanup(txq) != 0) {/*Could not clean any descriptors*/

if (nb_tx == 0) /*若是第一个包(未发包),return 0*/

return 0;goto end_of_tx; /*若非第一个包(已发包),停止发包,更新发送队列参数*/}

...

}

.../*每个包可能包含多个分段,m_seg指向第一个分段*/m_seg=tx_pkt;do{

txd= &txr[tx_id]; /*desc*/txn= &sw_ring[txe->next_id]; /*下一个entry*/...

txe->mbuf = m_seg; /*将m_seg挂载到txe*/...

slen= m_seg->data_len; /*m_seg的长度*/buf_dma_addr= rte_mbuf_data_dma_addr(m_seg); /*m_seg的总线地址*/txd->read.buffer_addr =rte_cpu_to_le_64(buf_dma_addr);/*总线地址赋给txd->read.buffer_addr*/txd->read.cmd_type_len =rte_cpu_to_le_32(cmd_type_len| slen); /*长度赋给txd->read.cmd_type_len*/...

txe->last_id = tx_last; /*last_id指向最后一个desc*/tx_id= txe->next_id; /*tx_id指向下一个desc*/txe= txn; /*txe指向下一个entry*/m_seg= m_seg->next; /*m_seg指向下一个分段*/}while (m_seg !=NULL);

.../*最后一个分段*/cmd_type_len|=IXGBE_TXD_CMD_EOP;

txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used); /*更新nb_tx_used*/txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used); /*更新nb_tx_free*/...if (txq->nb_tx_used >= txq->tx_rs_thresh) { /*若使用的mbuf数大于上限(默认为32),设置RS*/...

cmd_type_len|=IXGBE_TXD_CMD_RS;

...

txp= NULL; /*txp为NULL表示已设置RS*/}elsetxp= txd; /*txp非NULL表示未设置RS*/...

txd->read.cmd_type_len |=rte_cpu_to_le_32(cmd_type_len);

}

...

end_of_tx:/*burst发包的最后一个包的最后一个分段*/...if (txp != NULL) /*若未设置RS,设置RS*/txp->read.cmd_type_len |=rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);

...

IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); /*将tx_id写入TDT*/txq->tx_tail = tx_id; /*tx_tail指向下一个desc*/...returnnb_tx;

}

ixgbe_xmit_cleanup()

static inline intixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)

{

...

uint16_t last_desc_cleaned= txq->last_desc_cleaned;

.../*最后一个entry*/desc_to_clean_to= (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);if (desc_to_clean_to >= nb_tx_desc) /*注意是环形缓冲区*/desc_to_clean_to= (uint16_t)(desc_to_clean_to -nb_tx_desc);

.../*最后一个entry的最后一个desc*/desc_to_clean_to=sw_ring[desc_to_clean_to].last_id;

status=txr[desc_to_clean_to].wb.status;/*若最后一个desc的DD为0,return -1*/

if (!(status &rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {

...return -(1);

}

.../*将要清理的desc数*/

if (last_desc_cleaned > desc_to_clean_to) /*注意是环形缓冲区*/nb_tx_to_clean= (uint16_t)((nb_tx_desc - last_desc_cleaned) +desc_to_clean_to);elsenb_tx_to_clean= (uint16_t)(desc_to_clean_to -last_desc_cleaned);

...

txr[desc_to_clean_to].wb.status= 0; /*清零DD*/...

txq->last_desc_cleaned = desc_to_clean_to; /*更新last_desc_cleaned*/txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean); /*更新nb_tx_free*/...return 0;

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值