Linux网络数据接收过程分析

在Linux内核中,当网卡驱动接收到数据时,会调用netif_rx_ni函数传递数据到IP层

,主要把数据包链接到input_pkt_queue队列,并启动一次软中断函数

int netif_rx_ni(struct sk_buff *skb)

{

         int err;

         preempt_disable();

         err = netif_rx(skb);

         if (local_softirq_pending())

                   do_softirq();

         preempt_enable();

 

         return err;

}

1.    netif_rx 函数

int netif_rx(struct sk_buff *skb)

{

         int ret;

 

         /*…………..*/

         {

                   unsigned int qtail;

        直接调用enqueue_to_backlog函数

                   ret = enqueue_to_backlog(skb, get_cpu(), &qtail);

                   put_cpu();

         }

         return ret;

}

static int enqueue_to_backlog(struct sk_buff *skb, int cpu,

                                  unsigned int *qtail)

{

         struct softnet_data *sd;

         unsigned long flags;

 

         /*获取per cpu数据 */

         sd = &per_cpu(softnet_data, cpu);

 

         local_irq_save(flags);

 

         rps_lock(sd);

        /*如果input_pkt_queue队列中有skb包,且没有超过netdev_max_backlog,则会直接把skb链接到input_pkt_queue队列 */

         if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {

                   if (skb_queue_len(&sd->input_pkt_queue)) {

enqueue:

                            __skb_queue_tail(&sd->input_pkt_queue, skb);

                            input_queue_tail_incr_save(sd, qtail);

                            rps_unlock(sd);

                            local_irq_restore(flags);

                            return NET_RX_SUCCESS;

                   }

 

                   /* Schedule NAPI for backlog device

                    * We can use non atomic operation since we own the queue lock

                    */

                   if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {

                            if (!rps_ipi_queued(sd))/*如果input_pkt_queue队列为空,启动一次软中断,并把sd挂接到poll_list链表 */       

                                     ____napi_schedule(sd, &sd->backlog);

                   }

                   goto enqueue;//挂接skb到input_pkt_queue队列

         }

 

         sd->dropped++;

         rps_unlock(sd);

 

         local_irq_restore(flags);

   /* 如果超过最大包数,则直接丢掉*/

         atomic_long_inc(&skb->dev->rx_dropped);

         kfree_skb(skb);

         return NET_RX_DROP;

}

 

2.    net_rx_action 函数

static void net_rx_action(struct softirq_action *h)

{

         struct softnet_data *sd = &__get_cpu_var(softnet_data);

         unsigned long time_limit = jiffies + 2;

         int budget = netdev_budget;/*此网络设备允许最大传输数,目前为300 */

         void *have;

 

         local_irq_disable();

 

         while (!list_empty(&sd->poll_list)) {

                   struct napi_struct *n;

                   int work, weight;

                   if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))

                            goto softnet_break; /*如果budget用完,或者已经超过2 jiffies 则退出 */

                   local_irq_enable();

                  

                   n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);

                   have = netpoll_poll_lock(n);

                   weight = n->weight;

                   work = 0;

                   if (test_bit(NAPI_STATE_SCHED, &n->state)) {

                            work = n->poll(n, weight);//调用poll函数,如果驱动没有实现,则调用默认的process_backlog函数

                            trace_napi_poll(n);

                   }

                   WARN_ON_ONCE(work > weight);

                   budget -= work;

                   local_irq_disable();

                   if (unlikely(work == weight)) { /*如果网络设备的budget用完,则完成一次接收 */

                            if (unlikely(napi_disable_pending(n))) {

                                     local_irq_enable();

                                     napi_complete(n);

                                     local_irq_disable();

                            } else {

                                     if (n->gro_list) {

                                               /* flush too old packets

                                                * If HZ < 1000, flush all packets.

                                                */

                                               local_irq_enable();

                                               napi_gro_flush(n, HZ >= 1000);

                                               local_irq_disable();

                                     }

                                     list_move_tail(&n->poll_list, &sd->poll_list);

                            }

                   }

                   netpoll_poll_unlock(have);

         }

out:

         net_rps_action_and_irq_enable(sd);

}

static int process_backlog(struct napi_struct *napi, int quota)

{

         int work = 0;

         struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);

         napi->weight = weight_p;

         local_irq_disable();

         while (work < quota) {

                   struct sk_buff *skb;

                   unsigned int qlen;

                   /*第一次进来时为空 */

                   while ((skb = __skb_dequeue(&sd->process_queue))) {

                            local_irq_enable();

             /*把数据传递到IP层 */

                            __netif_receive_skb(skb);

                            local_irq_disable();

                            input_queue_head_incr(sd);

                            if (++work >= quota) {

                                     local_irq_enable();

                                     return work;

                            }

                   }

                   rps_lock(sd);

                   qlen = skb_queue_len(&sd->input_pkt_queue);

                   if (qlen)/*直接把input_ptk_queue链接到process_queue队列 */

                            skb_queue_splice_tail_init(&sd->input_pkt_queue,

                                                           &sd->process_queue);

                   if (qlen < quota - work) {

                            /*如果此napi没有数据包,则把napi移除 */

                            list_del(&napi->poll_list);

                            napi->state = 0;

                            quota = work + qlen;

                   }

                   rps_unlock(sd);

         }

         local_irq_enable();

 

         return work;

}

3.    __netif_receive_skb函数

static int __netif_receive_skb(struct sk_buff *skb)

{

         int ret;

 

         if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {

                   unsigned long pflags = current->flags;

                   current->flags |= PF_MEMALLOC;

                   ret = __netif_receive_skb_core(skb, true);

                   tsk_restore_flags(current, pflags, PF_MEMALLOC);

         } else       /*直接调用__netif_receive_skb_core函数 */

                   ret = __netif_receive_skb_core(skb, false);

         return ret;

}

static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)

{

         struct packet_type *ptype, *pt_prev;

         rx_handler_func_t *rx_handler;

         struct net_device *orig_dev;

         struct net_device *null_or_dev;

         bool deliver_exact = false;

         int ret = NET_RX_DROP;

         __be16 type;

 

         orig_dev = skb->dev;

        /*重置network报头和mac head长度 */

         skb_reset_network_header(skb);

         if (!skb_transport_header_was_set(skb))

                   skb_reset_transport_header(skb);

         skb_reset_mac_len(skb);

         pt_prev = NULL;

         rcu_read_lock();

another_round:

         skb->skb_iif = skb->dev->ifindex;

         __this_cpu_inc(softnet_data.processed);

         /* deliver only exact match when indicated */

         null_or_dev = deliver_exact ? skb->dev : NULL;

         type = skb->protocol;

         /*先把skb传递给ptype_all的协议层,如果用tcpdump抓包时,会注册处理函数到ptype_all */

     list_for_each_entry_rcu(ptype, &ptype_all, list) {

                   if (!ptype->dev || ptype->dev == skb->dev) {

                            if (pt_prev)

                                     ret = deliver_skb(skb, pt_prev, orig_dev);

                            pt_prev = ptype;

                   }

         }

 

        /*ptype_base为基本协议处理,

         协议层通过dev_add_pack注册pack处理函数

         如IP的ip_rcv和arp_rcv  */

         list_for_each_entry_rcu(ptype,

                            &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {

                   if (ptype->type == type &&

                       (ptype->dev == null_or_dev || ptype->dev == skb->dev ||

                        ptype->dev == orig_dev)) {

                            if (pt_prev)

                                     ret = deliver_skb(skb, pt_prev, orig_dev);

                            pt_prev = ptype;

                   }

         }

         if (pt_prev) {

                   if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))

                            goto drop;

                   else

                            ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);

         } else {

drop:

                   atomic_long_inc(&skb->dev->rx_dropped);

                   kfree_skb(skb);

                   /* Jamal, now you will not able to escape explaining

                    * me how you were going to use this. :-)

                    */

                   ret = NET_RX_DROP;

         }

unlock:

         rcu_read_unlock();

out:

         return ret;

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值