网卡和磁盘是现代服务器里面性能要求最为苛刻的2个外设,我们来看一下Linux内核是如何处理高性能的网卡的。
int netif_rx(struct sk_buff *skb) { int this_cpu = smp_processor_id(); struct softnet_data *queue; unsigned long flags; if (skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp); /* The code is rearranged so that the path is the most short when CPU is congested, but is still operating. */ queue = &softnet_data[this_cpu]; local_irq_save(flags); netdev_rx_stat[this_cpu].total++; if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { if (queue->input_pkt_queue.qlen) { if (queue->throttle) goto drop; enqueue: dev_hold(skb->dev); __skb_queue_tail(&queue->input_pkt_queue,skb); /* Runs from irqs or BH's, no need to wake BH */ cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); local_irq_restore(flags); #ifndef OFFLINE_SAMPLE get_sample_stats(this_cpu); #endif return softnet_data[this_cpu].cng_level; } if (queue->throttle) { queue->throttle = 0; #ifdef CONFIG_NET_HW_FLOWCONTROL if (atomic_dec_and_test(&netdev_dropping)) netdev_wakeup(); #endif } goto enqueue; } if (queue->throttle == 0) { queue->throttle = 1; netdev_rx_stat[this_cpu].throttled++; #ifdef CONFIG_NET_HW_FLOWCONTROL atomic_inc(&netdev_dropping); #endif } drop: netdev_rx_stat[this_cpu].dropped++; local_irq_restore(flags); kfree_skb(skb); return NET_RX_DROP; } /* Deliver skb to an old protocol, which is not threaded well or which do not understand shared skbs. */ static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last) { static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED; int ret = NET_RX_DROP; if (!last) { skb = skb_clone(skb, GFP_ATOMIC); if (skb == NULL) return ret; } if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { kfree_skb(skb); return ret; } /* The assumption (correct one) is that old protocols did not depened on BHs different of NET_BH and TIMER_BH. */ /* Emulate NET_BH with special spinlock */ spin_lock(&net_bh_lock); /* Disable timers and wait for all timers completion */ tasklet_disable(bh_task_vec+TIMER_BH); ret = pt->func(skb, skb->dev, pt); tasklet_hi_enable(bh_task_vec+TIMER_BH); spin_unlock(&net_bh_lock); return ret; }
static void net_rx_action(struct softirq_action *h) { int this_cpu = smp_processor_id(); struct softnet_data *queue = &softnet_data[this_cpu]; unsigned long start_time = jiffies; int bugdet = netdev_max_backlog; br_read_lock(BR_NETPROTO_LOCK); for (;;) { struct sk_buff *skb; struct net_device *rx_dev; local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); local_irq_enable(); if (skb == NULL) break; skb_bond(skb); rx_dev = skb->dev; #ifdef CONFIG_NET_FASTROUTE if (skb->pkt_type == PACKET_FASTROUTE) { netdev_rx_stat[this_cpu].fastroute_deferred_out++; dev_queue_xmit(skb); dev_put(rx_dev); continue; } #endif skb->h.raw = skb->nh.raw = skb->data; { struct packet_type *ptype, *pt_prev; unsigned short type = skb->protocol; pt_prev = NULL; for (ptype = ptype_all; ptype; ptype = ptype->next) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) { if (!pt_prev->data) { deliver_to_old_ones(pt_prev, skb, 0); } else { atomic_inc(&skb->users); pt_prev->func(skb, skb->dev, pt_prev); } } pt_prev = ptype; } } #ifdef CONFIG_NET_DIVERT if (skb->dev->divert && skb->dev->divert->divert) handle_diverter(skb); #endif /* CONFIG_NET_DIVERT */ #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) if (skb->dev->br_port != NULL && br_handle_frame_hook != NULL) { handle_bridge(skb, pt_prev); dev_put(rx_dev); continue; } #endif for (ptype=ptype_base[ntohs(type)&15];ptype;ptype=ptype->next) { if (ptype->type == type && (!ptype->dev || ptype->dev == skb->dev)) { if (pt_prev) { if (!pt_prev->data) deliver_to_old_ones(pt_prev, skb, 0); else { atomic_inc(&skb->users); pt_prev->func(skb, skb->dev, pt_prev); } } pt_prev = ptype; } } if (pt_prev) { if (!pt_prev->data) deliver_to_old_ones(pt_prev, skb, 1); else pt_prev->func(skb, skb->dev, pt_prev); } else kfree_skb(skb); } dev_put(rx_dev); if (bugdet-- < 0 || jiffies - start_time > 1) goto softnet_break; #ifdef CONFIG_NET_HW_FLOWCONTROL if (queue->throttle && queue->input_pkt_queue.qlen < no_cong_thresh ) { if (atomic_dec_and_test(&netdev_dropping)) { queue->throttle = 0; netdev_wakeup(); goto softnet_break; } } #endif } br_read_unlock(BR_NETPROTO_LOCK); local_irq_disable(); if (queue->throttle) { queue->throttle = 0; #ifdef CONFIG_NET_HW_FLOWCONTROL if (atomic_dec_and_test(&netdev_dropping)) netdev_wakeup(); #endif } local_irq_enable(); NET_PROFILE_LEAVE(softnet_process); return; softnet_break: br_read_unlock(BR_NETPROTO_LOCK); local_irq_disable(); netdev_rx_stat[this_cpu].time_squeeze++; /* This already runs in BH context, no need to wake up BH's */ cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); local_irq_enable(); NET_PROFILE_LEAVE(softnet_process); return; }
看懂了吧?
麻痹的,Linux内核在网卡异步处理机制上比传统的BSD激进多了,性能优越得多,不过稳定性也随之下降!