1、dev_queue_xmit 函数分析
int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
struct Qdisc *q;
//获取net_device的netdev_queue *_txq
txq = netdev_core_pick_tx(dev, skb, sb_dev);
//获取_tx->qdisc
q = rcu_dereference_bh(txq->qdisc);
trace_net_dev_queue(skb);
if (q->enqueue) {
//调用下一级发送
rc = __dev_xmit_skb(skb, q, dev, txq);
goto out;
}
}
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
struct sk_buff *to_free = NULL;
bool contended;
int rc;
qdisc_calculate_pkt_len(skb, q);
//qdisc未被锁定
if (q->flags & TCQ_F_NOLOCK) {
//置位TCQ_F_CAN_BYPASS && qdisc为空 && 当前qdisc未运行,则直接发送
if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
qdisc_run_begin(q)) {
//不可能qdisc不是非空,则skb入队,调用__qdisc_run发送
if (unlikely(!nolock_qdisc_is_empty(q))) {
rc = q->enqueue(skb, q, &to_free) &
NET_XMIT_MASK;
__qdisc_run(q);
qdisc_run_end(q);
goto no_lock_out;
}
//更新统计数据
qdisc_bstats_cpu_update(q, skb);
//sch_direct_xmit返回非0,则q非空,则不入队,直接发送
if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
!nolock_qdisc_is_empty(q))
__qdisc_run(q);
qdisc_run_end(q);
return NET_XMIT_SUCCESS;
}
//正常排队,skb入队,发送
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
qdisc_run(q);
no_lock_out:
if (unlikely(to_free))
//释放skb
kfree_skb_list(to_free);
return rc;
}
__dev_xmit_skb有两种发送方式,一种是TCQ_F_CAN_BYPASS可以绕过排队;另一种是正常发送。先调用q->enqueue将skb入队,然后调用qdisc_run(q)进行发送。关于qdisc数据结构如下:
void __qdisc_run(struct Qdisc *q)
{
int quota = dev_tx_weight;
int packets;
while (qdisc_restart(q, &packets)) {
quota -= packets;
if (quota <= 0) {
if (q->flags & TCQ_F_NOLOCK)
set_bit(__QDISC_STATE_MISSED, &q->state);
else
__netif_schedule(q);
break;
}
}
}
qdisc_restart是不断从qdisc队列取出skb,循环发送,占用用户进程内核态时间sy,当quota用尽,则触发软中断发送,占用的是软中断时间si
bool qdisc_restart(struct Qdisc *q, int *packets)
{
spinlock_t *root_lock = NULL;
struct netdev_queue *txq;
struct net_device *dev;
struct sk_buff *skb;
bool validate;
/* Dequeue packet */
skb = dequeue_skb(q, &validate, packets);
if (unlikely(!skb))
return false;
if (!(q->flags & TCQ_F_NOLOCK))
root_lock = qdisc_lock(q);
dev = qdisc_dev(q);
txq = skb_get_tx_queue(dev, skb);
return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
}
sch_direct_xmit最终调用MAC驱动层的发送方法:ops->ndo_start_xmit(skb, dev)。
2、软中断发送
__netif_schedule(q)会触发软中断。
void __netif_reschedule(struct Qdisc *q)
{
struct softnet_data *sd;
unsigned long flags;
local_irq_save(flags);
sd = this_cpu_ptr(&softnet_data);
q->next_sched = NULL;
*sd->output_queue_tailp = q;
sd->output_queue_tailp = &q->next_sched;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
preempt_check_resched_rt();
}
该函数获取当前CPU的sd结构体,将qdisc描述符挂到sd上,然后置位NET_TX_SOFTIRQ标志。然后在内核线程smpboot_thread_fn中会扫描软中断标志位,依次调用run_ksoftirqd->__do_softirq()->net_rx_action
void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);//1
if (sd->output_queue) { //2
struct Qdisc *head;
local_irq_disable();
head = sd->output_queue; //3
local_irq_enable();
while (head) { //4
struct Qdisc *q = head;
spinlock_t *root_lock = NULL;
head = head->next_sched;
qdisc_run(q); //5
}
}
1.获取sd结构体
2.如果sd的queue上有qdisc
3.获取qdisc的头
4.遍历qdisc链表
5.调用qdisc_run(q)发送,最终调用MAC驱动中的发送方法:
以stmmac为例,ops->ndo_start_xmit(skb, dev)即:
netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
u32 queue = skb_get_queue_mapping(skb);
struct stmmac_tx_queue *tx_q;
//获取MAC的发送队列
tx_q = &priv->tx_queue[queue];
//获取skb分片数
int nfrags = skb_shinfo(skb)->nr_frags;
//循环将分片包映射到网卡DMA空间
for (i = 0; i < nfrags; i++) {
//获取分片addr
const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
//获取分片长度
int len = skb_frag_size(frag);
//为分片addr构造内存映射,以允许设备DMA从RAM读取数据
des = skb_frag_dma_map(priv->device, frag, 0, len,DMA_TO_DEVICE);
tx_q->tx_skbuff_dma[entry].buf = des;
//分片addr与设备DMA建立映射
stmmac_set_desc_addr(priv, desc, des);
tx_q->tx_skbuff_dma[entry].map_as_page = true;
tx_q->tx_skbuff_dma[entry].len = len;
tx_q->tx_skbuff_dma[entry].last_segment = last_segment;
}
//最后一个描述符指向skb
tx_q->tx_skbuff[entry] = skb;
//开启DMA发送
stmmac_enable_dma_transmission(priv, priv->ioaddr);
//设置尾指针
stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
}
至此,数据从MAC经网卡DMA发送出去。
来看一下MAC发送过程中的数据结构: