-
static int __init net_dev_init(void)
-
{
-
int i, rc =-ENOMEM;
-
-
BUG_ON(!dev_boot_phase);
-
if(dev_proc_init())
-
goto out;
-
if(netdev_kobject_init())
-
goto out;
-
INIT_LIST_HEAD(&ptype_all);
-
for(i = 0; i < PTYPE_HASH_SIZE; i++)
-
INIT_LIST_HEAD(&ptype_base[i]);
-
if(register_pernet_subsys(&netdev_net_ops))
-
goto out;
-
-
/*
-
* Initialise the packet receive queues.
-
*/
-
/*
-
为每个CPU初始化PERCPU的全局变量softnet_data,作为该CPU的接收缓存
-
*/
-
for_each_possible_cpu(i){
-
struct softnet_data *sd =&per_cpu(softnet_data, i);
-
-
...... ......
-
}
-
-
dev_boot_phase = 0;
-
-
/* The loopback device is special if any other network devices
-
*is present in a network namespace the loopback device must
-
* be present. Since we now dynamically allocate and free the
-
* loopback device ensure this invariant is maintained by
-
* keeping the loopback device as the first device on the
-
* list of network devices. Ensuring the loopback devices
-
*is the first device that appears and the last network device
-
* that disappears.
-
*/
-
if(register_pernet_device(&loopback_net_ops))
-
goto out;
-
-
if(register_pernet_device(&default_device_ops))
-
goto out;
-
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
-
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
-
-
hotcpu_notifier(dev_cpu_callback, 0);
-
dst_init();
-
dev_mcast_init();
-
rc = 0;
-
out:
-
return rc;
-
}
-
static const struct net_device_ops e1000_netdev_ops ={
-
.ndo_open = e1000_open,
-
...... ......
-
};
-
static irqreturn_t e1000_intr(int irq, void *data)
-
{
-
...... ......
-
/*
-
检测是否可以调度NAPI:
-
当没有disable NAPI且没有该网卡对应的NAPI在运行时(保证对应一个网卡的NAPI只有一个实例在运行),即可调度一个新的NAPI。
-
NAPI是一种新的网卡数据检查处理方式。基本上是interrupt+poll。详细信息问google
-
*/
-
if(likely(napi_schedule_prep(&adapter->napi))){
-
/*
-
清楚单次的统计信息。
-
刚看到这里时,我也奇怪,为什么total的统计信息要被清零。
-
实际上这些统计信息只是一次NAPI运行的统计信息,并不是网卡总的统计信息。
-
网卡的统计信息为netdev->stats。NAPI运行完会将下面的值加到网卡的统计信息上的。
-
*/
-
adapter->total_tx_bytes = 0;
-
adapter->total_tx_packets = 0;
-
adapter->total_rx_bytes = 0;
-
adapter->total_rx_packets = 0;
-
/* 要求调度对应的NAPI实例 */
-
__napi_schedule(&adapter->napi);
-
}else{
-
/* this really should notif it does it is basically a
-
* bug, but not a hard error, so enable ints and continue */
-
if(!test_bit(__E1000_DOWN,&adapter->flags))
-
e1000_irq_enable(adapter);
-
}
-
-
return IRQ_HANDLED;
-
}
-
static void net_rx_action(struct softirq_action *h)
-
{
-
struct softnet_data *sd =&__get_cpu_var(softnet_data);
-
unsigned long time_limit = jiffies + 2;
-
int budget = netdev_budget;
-
void *have;
-
-
local_irq_disable();
-
while(!list_empty(&sd->poll_list)){
-
struct napi_struct *n;
-
int work, weight;
-
-
/*If softirq windowis exhuasted then punt.
-
* Allow this to run for 2 jiffies since which will allow
-
* an average latency of 1.5/HZ.
-
*/
-
if(unlikely(budget <= 0 || time_after(jiffies, time_limit)))
-
goto softnet_break;
-
-
local_irq_enable();
-
-
/* Even though interrupts have been re-enabled, this
-
* access is safe because interrupts can only add new
-
* entries to the tail of this list,and only ->poll()
-
* calls can remove this head entry from the list.
-
*/
-
/* 取得一个网卡的NAPI实例 */
-
n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
-
have = netpoll_poll_lock(n);
-
-
weight = n->weight;
-
-
/* This NAPI_STATE_SCHED test isfor avoiding a race
-
* with netpoll's poll_napi(). Only the entity which
-
* obtains the lock and sees NAPI_STATE_SCHED set will
-
* actually make the ->poll()call. Therefore we avoid
-
* accidently calling ->poll() when NAPI isnot scheduled.
-
*/
-
work = 0;
-
if(test_bit(NAPI_STATE_SCHED,&n->state)){
-
/* poll这个网卡 */
-
work = n->poll(n, weight);
-
trace_napi_poll(n);
-
}
-
-
WARN_ON_ONCE(work > weight);
-
-
budget -= work;
-
-
local_irq_disable();
-
-
/* Drivers must not modify the NAPI state if they
-
* consume the entire weight.In such cases this code
-
* still "owns" the NAPI instance and therefore can
-
* move the instance around on the list at-will.
-
*/
-
if(unlikely(work == weight)){
-
/* 该NAPI的weight消耗完毕,需要处理下一个 */
-
if(unlikely(napi_disable_pending(n))){
-
local_irq_enable();
-
napi_complete(n);
-
local_irq_disable();
-
}else
-
list_move_tail(&n->poll_list,&sd->poll_list);
-
}
-
-
netpoll_poll_unlock(have);
-
}
-
out:
-
net_rps_action_and_irq_enable(sd);
-
-
#ifdef CONFIG_NET_DMA
-
/*
-
* There may not be any more sk_buffs coming rightnow, so push
-
* any pending DMA copies to hardware
-
*/
-
dma_issue_pending_all();
-
#endif
-
-
return;
-
-
softnet_break:
-
sd->time_squeeze++;
-
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-
goto out;
-
}
-
static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
-
struct e1000_rx_ring *rx_ring,
-
int*work_done,int work_to_do)
-
{
-
...... ......
-
i = rx_ring->next_to_clean;
-
rx_desc = E1000_RX_DESC(*rx_ring, i);
-
buffer_info =&rx_ring->buffer_info[i];
-
-
while(rx_desc->status & E1000_RXD_STAT_DD){
-
struct sk_buff *skb;
-
u8 status;
-
-
if(*work_done >= work_to_do) //如果已经poll到足够的包,可以跳出返回
-
break;
-
(*work_done)++;
-
rmb();/* read descriptor and rx_buffer_info after status DD */
-
status = rx_desc->status;
-
skb = buffer_info->skb;
-
buffer_info->skb =NULL;
-
/*
-
设置skb->pkt_type:PACKET_BROADCAST等;
-
即数据链路层协议类型
-
*/
-
skb->protocol = eth_type_trans(skb, netdev);
-
e1000_receive_skb(adapter, status, rx_desc->special, skb);
-
-
next_desc:
-
/* 处理下一个数据包 */
-
...... ......
-
}
-
...... ......
-
-
return cleaned;
-
}
-
static int __netif_receive_skb(struct sk_buff *skb)
-
{
-
struct packet_type *ptype,*pt_prev;
-
rx_handler_func_t *rx_handler;
-
struct net_device *orig_dev;
-
struct net_device *master;
-
struct net_device *null_or_orig;
-
struct net_device *orig_or_bond;
-
int ret = NET_RX_DROP;
-
__be16 type;
-
if(!netdev_tstamp_prequeue)
-
net_timestamp_check(skb);
-
if(vlan_tx_tag_present(skb)&& vlan_hwaccel_do_receive(skb))
-
return NET_RX_SUCCESS;
-
-
/*if we've gotten here through NAPI, check netpoll */
-
if(netpoll_receive_skb(skb))
-
return NET_RX_DROP;
-
if(!skb->skb_iif)
-
skb->skb_iif = skb->dev->ifindex;
-
/*
-
* bonding note: skbs received on inactive slaves should only
-
* be delivered to pkt handlers that are exact matches. Also
-
* the deliver_no_wcard flag will be set.If packet handlers
-
* are sensitive to duplicate packets these skbs will need to
-
* be dropped at the handler. The vlan accel path may have
-
* already set the deliver_no_wcard flag.
-
*/
-
/*关于网卡的bond的处理, 这个feature我只是了解,所以略过 */
-
null_or_orig =NULL;
-
orig_dev = skb->dev;
-
master = ACCESS_ONCE(orig_dev->master);
-
if(skb->deliver_no_wcard)
-
null_or_orig = orig_dev;
-
elseif(master){
-
if(skb_bond_should_drop(skb, master)){
-
skb->deliver_no_wcard = 1;
-
null_or_orig = orig_dev;/* deliver only exact match */
-
}else
-
skb->dev = master;
-
}
-
-
__this_cpu_inc(softnet_data.processed);
-
/* 初始化l3 header 和 l4 header 的地址*/
-
skb_reset_network_header(skb);
-
skb_reset_transport_header(skb);
-
/* 得到mac地址长度,准确来说是2层地址的长度 */
-
skb->mac_len = skb->network_header - skb->mac_header;
-
-
pt_prev =NULL;
-
-
rcu_read_lock();
-
-
/*
-
省略一些不太相关的代码
-
*/
-
...... ......
-
-
/*
-
通过2层协议类型作为key,得到相应链表。
-
*/
-
type = skb->protocol;
-
list_for_each_entry_rcu(ptype,
-
&ptype_base[ntohs(type)& PTYPE_HASH_MASK], list){
-
if(ptype->type == type &&(ptype->dev == null_or_orig ||
-
ptype->dev == skb->dev || ptype->dev == orig_dev ||
-
ptype->dev == orig_or_bond)){
-
if(pt_prev) //找到匹配的协议类型,上传给L3层
-
ret = deliver_skb(skb, pt_prev, orig_dev);
-
pt_prev = ptype;
-
}
-
}
-
-
if(pt_prev){
-
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
-
}else{
-
kfree_skb(skb);
-
/* Jamal,now you will not able to escape explaining
-
* me how you were going to use this.:-)
-
*/
-
ret = NET_RX_DROP;
-
}
-
-
out:
-
rcu_read_unlock();
-
return ret;
-
}