1、关键数据结构
1.1 struct mvneta_rx_queue
该数据结构定义了网卡的一个收包队列
struct mvneta_rx_queue {
/* rx queue number, in the range 0-7 */
u8 id;//收包队列id,当前驱动只用一个收包队列
/* num of rx descriptors in the rx descriptor ring */
int size;//收包队列中描述符的个数
/* counter of times when mvneta_refill() failed */
int missed;
u32 pkts_coal;
u32 time_coal;
/* Virtual address of the RX buffer */
void **buf_virt_addr;//数据包在收包队列缓存区数组的首地址
/* Virtual address of the RX DMA descriptors array */
struct mvneta_rx_desc *descs;//描述符数组首地址。
/* DMA address of the RX DMA descriptors array */
dma_addr_t descs_phys;//描述物理地址
/* Index of the last RX DMA descriptor */
int last_desc;//最后一个描述符索引
/* Index of the next RX DMA descriptor to process */
int next_desc_to_proc;//下一个要处理的数据包的描述符索引
};
1.2 struct mvneta_rx_desc
该数据结构定义了一个收包描述符,用于描述收到的数据包的信息
struct mvneta_rx_desc {
u32 status; /* Info about received packet */
u16 reserved1; /* pnc_info - (for future use, PnC) */
u16 data_size; /* 收到数据包的大小 */
u32 buf_phys_addr; /* 收到数据包在缓存区的物理地址 */
u32 reserved2; /* pnc_flow_id (for future use, PnC) */
u32 buf_cookie; /* cookie for access to RX buffer in rx path */
u16 reserved3; /* prefetch_cmd, for future use */
u16 reserved4; /* csum_l4 - (for future use, PnC) */
u32 reserved5; /* pnc_extra PnC (for future use, PnC) */
u32 reserved6; /* hw_cmd (for future use, PnC and HWF) */
};
1.3 struct mvneta_tx_queue
该数据结构定义了一个发包队列。
struct mvneta_tx_queue {
/* Number of this TX queue, in the range 0-7 */
u8 id;//发送队列id
/* Number of TX DMA descriptors in the descriptor ring */
int size;//发送队列中发送描述符的个数
/* Number of currently used TX DMA descriptor in the
* descriptor ring
*/
int count;
int pending;
int tx_stop_threshold;
int tx_wake_threshold;
/* Array of transmitted skb */
struct sk_buff **tx_skb;//skb回收队列,用于在发送完成后清理skb资源
/* Index of last TX DMA descriptor that was inserted */
int txq_put_index;//下一个要插入队列索引
/* Index of the TX DMA descriptor to be cleaned up */
int txq_get_index;//下一个要回收skb资源的索引
u32 done_pkts_coal;
/* Virtual address of the TX DMA descriptors array */
struct mvneta_tx_desc *descs;//指向发送描述符数组首地址
/* DMA address of the TX DMA descriptors array */
dma_addr_t descs_phys;//描述数组的物理地址
/* Index of the last TX DMA descriptor */
int last_desc;//最后一个描述符索引
/* Index of the next TX DMA descriptor to process */
int next_desc_to_proc;
/* DMA buffers for TSO headers */
char *tso_hdrs;
/* DMA address of TSO headers */
dma_addr_t tso_hdrs_phys;
/* Affinity mask for CPUs*/
cpumask_t affinity_mask;
};
1.4 struct mvneta_tx_desc
该数据结构描述了待发送数据包的信息
struct mvneta_tx_desc {
u32 command; /* Options used by HW for packet transmitting.*/
u16 reserverd1; /* csum_l4 (for future use) */
u16 data_size; /* 待发送数据包的大小 */
u32 buf_phys_addr; /* 发送数据包的物理地址 */
u32 reserved2; /* hw_cmd - (for future use, PMT) */
u32 reserved3[4]; /* Reserved - (for future use) */
};
2. 收包流程分析
3720收包的主要流程在mvneta_rx_swbm函数中,下面我们从这个函数开始分析其收包流程。
2.1 mvneta_rx_swbm
static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
struct mvneta_rx_queue *rxq)
{
struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
struct net_device *dev = pp->dev;
int rx_done;
u32 rcvd_pkts = 0;
u32 rcvd_bytes = 0;
/* Get number of received packets */
rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
if (rx_todo > rx_done)
rx_todo = rx_done;
rx_done = 0;
/* Fairness NAPI loop */
while (rx_done < rx_todo) {
struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);//获取数据包的描述符
struct sk_buff *skb;
unsigned char *data;
dma_addr_t phys_addr;
u32 rx_status, frag_size;
int rx_bytes, err, index;
rx_done++;
rx_status = rx_desc->status;
rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);//得到数据包大小
index = rx_desc - rxq->descs;//获取数据包在队列中的索引值
data = rxq->buf_virt_addr[index];//根据索引值获取到数据包的地址
phys_addr = rx_desc->buf_phys_addr - pp->rx_offset_correction;//获取数据包的物理地址
if (!mvneta_rxq_desc_is_first_last(rx_status) ||
(rx_status & MVNETA_RXD_ERR_SUMMARY)) {
mvneta_rx_error(pp, rx_desc);
err_drop_frame:
dev->stats.rx_errors++;
/* leave the descriptor untouched */
continue;
}
if (rx_bytes <= rx_copybreak) { //判断接受到的数据包是否小于256字节
/* better copy a small frame and not unmap the DMA region */
skb = netdev_alloc_skb_ip_align(dev, rx_bytes);//分配skb
if (unlikely(!skb))
goto err_drop_frame;
dma_sync_single_range_for_cpu(dev->dev.parent,
phys_addr,
MVNETA_MH_SIZE + NET_SKB_PAD,
rx_bytes,
DMA_FROM_DEVICE);
skb_put_data(skb, data + MVNETA_MH_SIZE + NET_SKB_PAD,
rx_bytes);//将数据包复制到skb的data所指空间
skb->protocol = eth_type_trans(skb, dev);//确定数据包协议类型
mvneta_rx_csum(pp, rx_status, skb);
napi_gro_receive(&port->napi, skb);//送入上层协议栈
rcvd_pkts++;
rcvd_bytes += rx_bytes;
/* leave the descriptor and buffer untouched */
continue;
}
/* 下面逻辑处理是当数据包大于256字节时 */
/* Refill processing */
err = mvneta_rx_refill(pp, rx_desc, rxq);//给接受队列缓存区分配新的空间用于接受新的数据包
if (err) {
netdev_err(dev, "Linux processing - Can't refill\n");
rxq->missed++;
goto err_drop_frame;
}
frag_size = pp->frag_size;
skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);//构造skb,最后skb->data指向data
/* After refill old buffer has to be unmapped regardless
* the skb is successfully built or not.
*/
dma_unmap_single(dev->dev.parent, phys_addr,//将收包缓存区数据包的物理地址解除dma映射。
MVNETA_RX_BUF_SIZE(pp->pkt_size),
DMA_FROM_DEVICE);
if (!skb)
goto err_drop_frame;
rcvd_pkts++;
rcvd_bytes += rx_bytes;
/* Linux processing */
skb_reserve(skb, MVNETA_MH_SIZE + NET_SKB_PAD);
skb_put(skb, rx_bytes);
skb->protocol = eth_type_trans(skb, dev);//确定数据包协议类型
mvneta_rx_csum(pp, rx_status, skb);
napi_gro_receive(&port->napi, skb);//送入上层协议栈处理
}
if (rcvd_pkts) {
struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
u64_stats_update_begin(&stats->syncp);
stats->rx_packets += rcvd_pkts;
stats->rx_bytes += rcvd_bytes;
u64_stats_update_end(&stats->syncp);
}
/* Update rxq management counters */
mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
return rx_done;
}
该函数主要通过数据包大小来决定不同的处理逻辑,如果是小包会调用skb_put_data(该函数底层实现是memcpy)函数将收包队列缓存区的数据拷贝一份到skb->data中,这也是导致小包性能低的一个原因,对于打包驱动没有采用拷贝的方式,而是调用build_skb函数直接将收包队里缓存区中的数据的指针赋值跟skb->data
2.2 mvneta_rx_refill
static int mvneta_rx_refill(struct mvneta_port *pp,
struct mvneta_rx_desc *rx_desc,
struct mvneta_rx_queue *rxq)
{
dma_addr_t phys_addr;
void *data;
data = mvneta_frag_alloc(pp->frag_size);//分配新的地址空间
if (!data)
return -ENOMEM;
phys_addr = dma_map_single(pp->dev->dev.parent, data,
MVNETA_RX_BUF_SIZE(pp->pkt_size),//获取该地址空间的物理地址
DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(pp->dev->dev.parent, phys_addr))) {
mvneta_frag_free(pp->frag_size, data);
return -ENOMEM;
}
phys_addr += pp->rx_offset_correction;
mvneta_rx_desc_fill(rx_desc, phys_addr, data, rxq);//将新分配的地址空间放入到收包队列缓存区中供dma硬件存放新的数据包
return 0;
}
该函数主要用来填充收包队列的缓冲区的空间的。
3、发包完成资源的回收
3720发包完成资源的回收是在驱动的轮询函数mvneta_poll中完成的,在mvneta_poll函数中会调用mvneta_tx_done_gbe函数,下面分析一下这个函数的实现。
3.1 mvneta_tx_done_gbe
static void mvneta_tx_done_gbe(struct mvneta_port *pp, u32 cause_tx_done)
{
struct mvneta_tx_queue *txq;
struct netdev_queue *nq;
while (cause_tx_done) {
txq = mvneta_tx_done_policy(pp, cause_tx_done);//获取发送队列
nq = netdev_get_tx_queue(pp->dev, txq->id);
__netif_tx_lock(nq, smp_processor_id());
if (txq->count)
mvneta_txq_done(pp, txq);//回收发送队列中skb资源
__netif_tx_unlock(nq);
cause_tx_done &= ~((1 << txq->id));
}
}
3.2 mvneta_txq_done
static void mvneta_txq_done(struct mvneta_port *pp,
struct mvneta_tx_queue *txq)
{
struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
int tx_done;
tx_done = mvneta_txq_sent_desc_proc(pp, txq);//获取发送完成的数据包个数
if (!tx_done)
return;
mvneta_txq_bufs_free(pp, txq, tx_done, nq);//调用资源回收函数
txq->count -= tx_done;
if (netif_tx_queue_stopped(nq)) {
if (txq->count <= txq->tx_wake_threshold)
netif_tx_wake_queue(nq);
}
}
3.3 mvneta_txq_bufs_free
static void mvneta_txq_bufs_free(struct mvneta_port *pp,
struct mvneta_tx_queue *txq, int num,
struct netdev_queue *nq)
{
unsigned int bytes_compl = 0, pkts_compl = 0;
int i;
for (i = 0; i < num; i++) {
struct mvneta_tx_desc *tx_desc = txq->descs +
txq->txq_get_index;//获取发送描述符
struct sk_buff *skb = txq->tx_skb[txq->txq_get_index];//获取发送完成的skb
if (skb) {
bytes_compl += skb->len;
pkts_compl++;
}
mvneta_txq_inc_get(txq);//获取下一个描述符
if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr))
dma_unmap_single(pp->dev->dev.parent,
tx_desc->buf_phys_addr,
tx_desc->data_size, DMA_TO_DEVICE);
if (!skb)
continue;
dev_kfree_skb_any(skb);//释放skb资源
}
netdev_tx_completed_queue(nq, pkts_compl, bytes_compl);
}