Linux内核收包精髓

网卡和磁盘是现代服务器里面性能要求最为苛刻的2个外设,我们来看一下Linux内核是如何处理高性能的网卡的。

int netif_rx(struct sk_buff *skb)
{
  int this_cpu = smp_processor_id();
  struct softnet_data *queue;
  unsigned long flags;
  if (skb->stamp.tv_sec == 0)
    do_gettimeofday(&skb->stamp);
  /* The code is rearranged so that the path is the most
    short when CPU is congested, but is still operating.
   */
  queue = &softnet_data[this_cpu];
  local_irq_save(flags);
  netdev_rx_stat[this_cpu].total++;
  if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
    if (queue->input_pkt_queue.qlen) {
      if (queue->throttle)
        goto drop;
enqueue:
      dev_hold(skb->dev);
      __skb_queue_tail(&queue->input_pkt_queue,skb);
      /* Runs from irqs or BH's, no need to wake BH */
      cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
      local_irq_restore(flags);
#ifndef OFFLINE_SAMPLE
      get_sample_stats(this_cpu);
#endif
      return softnet_data[this_cpu].cng_level;
    }
    if (queue->throttle) {
      queue->throttle = 0;
#ifdef CONFIG_NET_HW_FLOWCONTROL
      if (atomic_dec_and_test(&netdev_dropping))
        netdev_wakeup();
#endif
    }
    goto enqueue;
  }
  if (queue->throttle == 0) {
    queue->throttle = 1;
    netdev_rx_stat[this_cpu].throttled++;
#ifdef CONFIG_NET_HW_FLOWCONTROL
    atomic_inc(&netdev_dropping);
#endif
  }
drop:
  netdev_rx_stat[this_cpu].dropped++;
  local_irq_restore(flags);
  kfree_skb(skb);
  return NET_RX_DROP;
}
/* Deliver skb to an old protocol, which is not threaded well
  or which do not understand shared skbs.
 */
static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
{
  static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
  int ret = NET_RX_DROP;
  if (!last) {
    skb = skb_clone(skb, GFP_ATOMIC);
    if (skb == NULL)
      return ret;
  }
  if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
    kfree_skb(skb);
    return ret;
  }
  /* The assumption (correct one) is that old protocols
    did not depened on BHs different of NET_BH and TIMER_BH.
   */
  /* Emulate NET_BH with special spinlock */
  spin_lock(&net_bh_lock);
  /* Disable timers and wait for all timers completion */
  tasklet_disable(bh_task_vec+TIMER_BH);
  ret = pt->func(skb, skb->dev, pt);
  tasklet_hi_enable(bh_task_vec+TIMER_BH);
  spin_unlock(&net_bh_lock);
  return ret;
}
static void net_rx_action(struct softirq_action *h)
{
  int this_cpu = smp_processor_id();
  struct softnet_data *queue = &softnet_data[this_cpu];
  unsigned long start_time = jiffies;
  int bugdet = netdev_max_backlog;

  br_read_lock(BR_NETPROTO_LOCK);

  for (;;) {
    struct sk_buff *skb;
    struct net_device *rx_dev;

    local_irq_disable();
    skb = __skb_dequeue(&queue->input_pkt_queue);
    local_irq_enable();

    if (skb == NULL)
      break;

    skb_bond(skb);

    rx_dev = skb->dev;

#ifdef CONFIG_NET_FASTROUTE
    if (skb->pkt_type == PACKET_FASTROUTE) {
      netdev_rx_stat[this_cpu].fastroute_deferred_out++;
      dev_queue_xmit(skb);
      dev_put(rx_dev);
      continue;
    }
#endif
    skb->h.raw = skb->nh.raw = skb->data;
    {
      struct packet_type *ptype, *pt_prev;
      unsigned short type = skb->protocol;

      pt_prev = NULL;
      for (ptype = ptype_all; ptype; ptype = ptype->next) {
        if (!ptype->dev || ptype->dev == skb->dev) {
          if (pt_prev) {
            if (!pt_prev->data) {
              deliver_to_old_ones(pt_prev, skb, 0);
            } else {
              atomic_inc(&skb->users);
              pt_prev->func(skb,
                      skb->dev,
                      pt_prev);
            }
          }
          pt_prev = ptype;
        }
      }

#ifdef CONFIG_NET_DIVERT
      if (skb->dev->divert && skb->dev->divert->divert)
        handle_diverter(skb);
#endif /* CONFIG_NET_DIVERT */

      
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
      if (skb->dev->br_port != NULL &&
          br_handle_frame_hook != NULL) {
        handle_bridge(skb, pt_prev);
        dev_put(rx_dev);
        continue;
      }
#endif

      for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) {
        if (ptype->type == type &&
            (!ptype->dev || ptype->dev == skb->dev)) {
          if (pt_prev) {
            if (!pt_prev->data)
              deliver_to_old_ones(pt_prev, skb, 0);
            else {
              atomic_inc(&skb->users);
              pt_prev->func(skb,
                      skb->dev,
                      pt_prev);
            }
          }
          pt_prev = ptype;
        }
      }

      if (pt_prev) {
        if (!pt_prev->data)
          deliver_to_old_ones(pt_prev, skb, 1);
        else
          pt_prev->func(skb, skb->dev, pt_prev);
      } else
        kfree_skb(skb);
    }

    dev_put(rx_dev);

    if (bugdet-- < 0 || jiffies - start_time > 1)
      goto softnet_break;

#ifdef CONFIG_NET_HW_FLOWCONTROL
  if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) {
    if (atomic_dec_and_test(&netdev_dropping)) {
      queue->throttle = 0;
      netdev_wakeup();
      goto softnet_break;
    }
  }
#endif

  }
  br_read_unlock(BR_NETPROTO_LOCK);

  local_irq_disable();
  if (queue->throttle) {
    queue->throttle = 0;
#ifdef CONFIG_NET_HW_FLOWCONTROL
    if (atomic_dec_and_test(&netdev_dropping))
      netdev_wakeup();
#endif
  }
  local_irq_enable();

  NET_PROFILE_LEAVE(softnet_process);
  return;

softnet_break:
  br_read_unlock(BR_NETPROTO_LOCK);

  local_irq_disable();
  netdev_rx_stat[this_cpu].time_squeeze++;
  /* This already runs in BH context, no need to wake up BH's */
  cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
  local_irq_enable();

  NET_PROFILE_LEAVE(softnet_process);
  return;
}

看懂了吧?

麻痹的,Linux内核在网卡异步处理机制上比传统的BSD激进多了,性能优越得多,不过稳定性也随之下降!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值