Linux网络发送过程分析

这里主要分析TCP/IP 数据发送过程,

1 数据发送

 用户一般从udp和tcp socket发送数据,经过路由和邻居系统后,最终调用到dev_queue_xmit函数来发送数据.

1.1 dev_queue_xmit函数

int dev_queue_xmit(struct sk_buff *skb)

{

         struct net_device *dev = skb->dev;

         struct netdev_queue *txq;

         struct Qdisc *q;

         int rc = -ENOMEM;

 

         skb_reset_mac_header(skb);

         rcu_read_lock_bh();

         skb_update_prio(skb);

         /*通过skb包携带的IP报头的tos确定发送队列 */

         txq = netdev_pick_tx(dev, skb);

         /*获取流量管理规则 */

         q = rcu_dereference_bh(txq->qdisc);

         trace_net_dev_queue(skb);

         /*在流量规则初始化时被设置为pfifo_fast_enqueue */

         if (q->enqueue) {

                   rc = __dev_xmit_skb(skb, q, dev, txq);

                   goto out;

         }

        

         /*.............*/

out:

         rcu_read_unlock_bh();

         return rc;}

 

1.2 __dev_xmit_skb 函数

 

static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

                                      struct net_device *dev,

                                      struct netdev_queue *txq)

{

         spinlock_t *root_lock = qdisc_lock(q);

         bool contended;

         int rc;

 

         /*计算skb携带数据大小 */

        qdisc_pkt_len_init(skb);

         qdisc_calculate_pkt_len(skb, q);

         /*判断qdisc是否处于运行状态 */

         contended = qdisc_is_running(q);

         if (unlikely(contended))

                   spin_lock(&q->busylock);

 

         spin_lock(root_lock);

         if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {

                   printk(KERN_WARNING "[mtk_net]__dev_xmit_skb drop skb_len = %d \n", skb->len);

                   kfree_skb(skb);

                   rc = NET_XMIT_DROP;

         } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&

                      qdisc_run_begin(q)) {

                   /*实际上这里是直接发送报文,无需使用流量控制,但需要满足三个条件

          1. pfifo设置TCQ_F_CAN_BYPASS标志,

          2. qdisc_qlen为0,也就是没有多余到数据包待发送,

               3. qdisc_run_bein(q)=1,也就是说txq队列上没有运行流量控制

          */

                   if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))

                            skb_dst_force(skb);

 

                   /*更新统计信息 */

                   qdisc_bstats_update(q, skb);

                 /*直接调用sch_direct_xmit发送数据,如果返回1,表示需要启用流量控制 */

                   if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {

                            if (unlikely(contended)) {

                                     spin_unlock(&q->busylock);

                                     contended = false;

                            }

                            __qdisc_run(q);/*启动流量控制 */

                   } else

                            qdisc_run_end(q);/*标记停止流量控制 */

 

                   rc = NET_XMIT_SUCCESS;

         } else {

                   skb_dst_force(skb);

                 /*进入这里,表现系统正在调用sch_direct_xmit发送数据,所以这里只能调用q->enqueue入队列,暂缓发送,sch_direct_xmit会释放root_lock*/

                rc = q->enqueue(skb, q) & NET_XMIT_MASK;

                   if (qdisc_run_begin(q)) {

                            if (unlikely(contended)) {

                                     spin_unlock(&q->busylock);

                                     contended = false;

                            }

                            __qdisc_run(q);

                   }

         }

         spin_unlock(root_lock);

         if (unlikely(contended))

                   spin_unlock(&q->busylock);

         return rc;

}

1.3 sch_direct_xmit 函数

int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,

                       struct net_device *dev, struct netdev_queue *txq,

                       spinlock_t *root_lock)

{

         int ret = NETDEV_TX_BUSY;

         /*释放root_lock,这样其他cpu也可以发送数据 */

       /* And release qdisc */

         spin_unlock(root_lock);

        

         HARD_TX_LOCK(dev, txq, smp_processor_id());

         if (!netif_xmit_frozen_or_stopped(txq))/*这里直接调用驱动的ndo_start_xmit函数发送数据,会对skb数据进行线性化处理 */

                   ret = dev_hard_start_xmit(skb, dev, txq);

         HARD_TX_UNLOCK(dev, txq);

 

         spin_lock(root_lock);

         if (dev_xmit_complete(ret)) {

                   /* Driver sent out skb successfully or skb was consumed */

                   ret = qdisc_qlen(q);

         } else if (ret == NETDEV_TX_LOCKED) {

                   /* Driver try lock failed */

                   ret = handle_dev_cpu_collision(skb, txq, q);

         } else {

                   /* Driver returned NETDEV_TX_BUSY - requeue skb */

                   if (unlikely(ret != NETDEV_TX_BUSY))

                            net_warn_ratelimited("BUG %s code %d qlen %d\n",

                                                    dev->name, ret, q->q.qlen);

                   ret = dev_requeue_skb(skb, q);

         }

         if (ret && netif_xmit_frozen_or_stopped(txq))

                   ret = 0;

         return ret;

}

 

1.4 __qdisc_run 流量控制

oid __qdisc_run(struct Qdisc *q)

{

         int quota = weight_p;/*权重,一次至多发送weight_p个数据包 */

 

         while (qdisc_restart(q)) {/*从流量控制中dequeue出skb,并使用sch_direct_xmit直接发送数据 */

                   /*

                    * Ordered by possible occurrence: Postpone processing if

                    * 1. we've exceeded packet quota

                    * 2. another process needs the CPU;

                    */

                   if (--quota <= 0 || need_resched()) {

                            __netif_schedule(q);/*如果还有数据没发完,进行一次调度,剩余的包由net_tx_action函数发送 */

                            break;

                   }

         }

 

         qdisc_run_end(q);

}

 

2.5 net_tx_action 函数

 当系统负载比较重时,会启动流量控制,如果一次流量控制不能发送完数据,就会启动一个软中断

static void net_tx_action(struct softirq_action *h)

{

         struct softnet_data *sd = &__get_cpu_var(softnet_data);

 

         if (sd->completion_queue) {/*如果使用dev_kfree_skb_irq释放skb,则会连入completion_queue队列 */

                   struct sk_buff *clist;

                /*释放skb */

                   local_irq_disable();

                   clist = sd->completion_queue;

                   sd->completion_queue = NULL;

                   local_irq_enable();

 

                   while (clist) {

                            struct sk_buff *skb = clist;

                            clist = clist->next;

                            WARN_ON(atomic_read(&skb->users));

                            trace_kfree_skb(skb, net_tx_action);

                            __kfree_skb(skb);

                   }

         }

 

         if (sd->output_queue) {/*__netif_schedule会设置output_queue */

                   struct Qdisc *head;

 

                   local_irq_disable();

                   head = sd->output_queue;

                   sd->output_queue = NULL;

                   sd->output_queue_tailp = &sd->output_queue;

                   local_irq_enable();

                   while (head) {

                            struct Qdisc *q = head;

                            spinlock_t *root_lock;

                            head = head->next_sched;

 

                            root_lock = qdisc_lock(q);

                            if (spin_trylock(root_lock)) {

                                     smp_mb__before_clear_bit();

                                     clear_bit(__QDISC_STATE_SCHED,

                                                 &q->state);

                                     qdisc_run(q);/*使用流量控制发送数据包 */

                                     spin_unlock(root_lock);

                            } else {

                                     if (!test_bit(__QDISC_STATE_DEACTIVATED,

                                                     &q->state)) {

                                               __netif_reschedule(q);/*如果拿不到root_lock,启动一个软中断 */

                                     } else {

                                               smp_mb__before_clear_bit();

                                               clear_bit(__QDISC_STATE_SCHED,

                                                          &q->state);

                                     }

                            }

                   }

         }

}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值