主机到网络层的内核实现-2 [内核为分组接收：NAPI框架和传统框架]

最新推荐文章于 2024-04-20 16:10:12 发布

dolphin98629

最新推荐文章于 2024-04-20 16:10:12 发布

阅读量727

点赞数

分类专栏： Net Working Kernel Linux

Linux 同时被 3 个专栏收录

194 篇文章 6 订阅

订阅专栏

Net Working

146 篇文章 12 订阅

订阅专栏

Kernel

81 篇文章 0 订阅

订阅专栏

1 内核为分组实现的传统框架实现过程详解
1.1 内核初始化网络时对每个cpu的分组接收队列初始化
初始化每cpu softnet_data结构以及初始化传统框架对应的伪网络设备
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
* present) and leaves us with a valid list of present and active devices.
*
* This is called single threaded during boot, so no need
* to take the rtnl semaphore.
*/
static int __init net_dev_init(void)
{
int i, rc = -ENOMEM;

BUG_ON(!dev_boot_phase);

if (dev_proc_init()) // /proc目录下
goto out;

if (netdev_kobject_init()) // 在/sys/class/目录下新建net子目录,下对应各种网卡设备
goto out;

INIT_LIST_HEAD(&ptype_all); //初始化所有分组类型对应的分组处理程序的头
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]); //初始化网络层分组处理程序对应的散列表

if (register_pernet_subsys(&netdev_net_ops))
goto out;

/*
* Initialise the packet receive queues. 以下为初始化每cpu 分组接收队列
*/

for_each_possible_cpu(i) {
struct softnet_data *sd = &per_cpu(softnet_data, i);

  memset(sd, 0, sizeof(*sd));
  skb_queue_head_init(&sd->input_pkt_queue); //传统框架对应的接收队列
  skb_queue_head_init(&sd->process_queue);
  sd->completion_queue = NULL;
  INIT_LIST_HEAD(&sd->poll_list);
  sd->output_queue = NULL;
  sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS
  sd->csd.func = rps_trigger_softirq;
  sd->csd.info = sd;
  sd->csd.flags = 0;
  sd->cpu = i;
#endif

  sd->backlog.poll = process_backlog;   //传统方法对应的伪网络设备
  sd->backlog.weight = weight_p;
  sd->backlog.gro_list = NULL;
  sd->backlog.gro_count = 0;
}

dev_boot_phase = 0;

/* The loopback device is special if any other network devices
* is present in a network namespace the loopback device must
* be present. Since we now dynamically allocate and free the
* loopback device ensure this invariant is maintained by
* keeping the loopback device as the first device on the
* list of network devices. Ensuring the loopback devices
* is the first device that appears and the last network device
* that disappears.
*/
if (register_pernet_device(&loopback_net_ops))
goto out;

if (register_pernet_device(&default_device_ops))
goto out;

open_softirq(NET_TX_SOFTIRQ, net_tx_action); //建立发送软中断处理程序
open_softirq(NET_RX_SOFTIRQ, net_rx_action); //建立接收软中断处理程序
//softirq_vec[nr].action = action; 共有10个软中断

hotcpu_notifier(dev_cpu_callback, 0);
dst_init();
dev_mcast_init();
rc = 0;
out:
return rc;
}
注：在将分组加入到每cpu接收分组队列时，调用__napi_schedule函数将softnet_data->backlog 挂在到softnet_data->poll_list没找着
1.2 当中断发生时，调用中断处理程序
注：支持NAPI框架的网络设备的中断处理程序与支持普通框架的网络设备的中断处理程序是不同的(会关irq，然后将该设备放到cpu接收分组队列中，激活软中断)
支持传统框架的网络设备，其处理程序执行下面3个短函数：
（1）网卡驱动程序中的中断处理程序net_interrupt执行，确认分组到达造成中断
(2) net_rx() 该函数创建新的sk_buff，分析链路层头部，然后将分组内容从网卡传到套接字缓冲区内存中
（3）调用netif_rx()通用方法，将分组的套接字缓冲区添加到cpu接收队列中，退出中断
下面详解每个函数的执行：
（1）网卡驱动程序中的中断处理程序net_interrupt执行
该程序确认是由于分组到达造成中断后，向上传递给net_rx函数
driver/net/Mac89x0.c文件中，对应的驱动程序如下：
static irqreturn_t net_interrupt(int irq, void *dev_id)
{
struct net_device *dev = dev_id;
struct net_local *lp;
int ioaddr, status;

if (dev == NULL) {
printk ("net_interrupt(): irq %d for unknown device.\n", irq);
return IRQ_NONE;
}

ioaddr = dev->base_addr;
lp = netdev_priv(dev);

/* we MUST read all the events out of the ISQ, otherwise we'll never
           get interrupted again. As a consequence, we can't have any limit
           on the number of times we loop in the interrupt handler. The
           hardware guarantees that eventually we'll run out of events. Of
           course, if you're on a slow machine, and packets are arriving
           faster than you can read them off, you're screwed. Hasta la
           vista, baby! */
while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
  if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
  switch(status & ISQ_EVENT_MASK) {
  case ISQ_RECEIVER_EVENT:   //如果是分组到达，直接调用net_rx
   /* Got a packet(s). */
   net_rx(dev);
   break;
  case ISQ_TRANSMITTER_EVENT: //传输完成
   lp->stats.tx_packets++;
   netif_wake_queue(dev);
   if ((status & TX_OK) == 0) lp->stats.tx_errors++;
   if (status & TX_LOST_CRS) lp->stats.tx_carrier_errors++;
   if (status & TX_SQE_ERROR) lp->stats.tx_heartbeat_errors++;
   if (status & TX_LATE_COL) lp->stats.tx_window_errors++;
   if (status & TX_16_COL) lp->stats.tx_aborted_errors++;
   break;
  case ISQ_BUFFER_EVENT:
   if (status & READY_FOR_TX) {
    /* we tried to transmit a packet earlier,
                                   but inexplicably ran out of buffers.
                                   That shouldn't happen since we only ever
                                   load one packet. Shrug. Do the right
                                   thing anyway. */
    netif_wake_queue(dev);
   }
   if (status & TX_UNDERRUN) {
    if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
                                lp->send_underrun++;
                                if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
                                else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
                        }
   break;
  case ISQ_RX_MISS_EVENT:
   lp->stats.rx_missed_errors += (status >>6);
   break;
  case ISQ_TX_COL_EVENT:
   lp->stats.collisions += (status >>6);
   break;
  }
}
return IRQ_HANDLED;
}
（2）net_rx(struct net_device *dev)
该函数创建新的sk_buff，分析链路层头部，然后将分组内容从网卡传到缓冲区内存中，[napi设备是由DMA控制完成的，cpu不参与]
* We have a good packet(s), get it/them out of the buffers. */
static void
net_rx(struct net_device *dev)
{
struct net_local *lp = netdev_priv(dev);//紧接在struct net_device后面，用于表示net_device环形缓冲区
读写端，和统计其中的分组数，以及加锁等等
struct sk_buff *skb;
int status, length;

int ioaddr = dev->base_addr;
status = readword(ioaddr, RX_FRAME_PORT); //从网卡设备中读取状态
length = readword(ioaddr, RX_FRAME_PORT);

if ((status & RX_OK) == 0) {
count_rx_errors(status, lp);
return;
}

/* Malloc up new buffer. */
skb = dev_alloc_skb(length + 2); //创建新的套接字缓冲区
if (skb == NULL) {
#if 0  /* Again, this seems a cruel thing to do */
  printk(KERN_WARNING "%s: Memory squeeze, dropping packet.\n", dev->name);
#endif
  lp->stats.rx_dropped++;
  return;
}
skb_reserve(skb, 2); /* longword align L3 header */

readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);
if (length & 1)
skb->data[length-1] = readword(ioaddr, RX_FRAME_PORT); //从网卡中读取分组内容

if (net_debug > 3) {
  printk( "%s: received %d byte packet of type %x\n",
   dev->name, length,
   (skb->data[ETH_ALEN+ETH_ALEN] << 8) | skb->data[ETH_ALEN+ETH_ALEN+1]);
}

skb->protocol=eth_type_trans(skb,dev);
//链路层头部分析，skb->protocol为链路层类型，填写skb>pkt_type=packet_broadcast|packet_multicast|packet_otherhost等等，链路层类型有：
ETH_P_802_30，ETH_P_AX25等等
netif_rx(skb);//将分组放到特定于cpu的接收分组队列中，退出中断上下文
lp->stats.rx_packets++; //net_device统计信息
lp->stats.rx_bytes += length;
}
链路层头部分析：
链路层头部结构：共14个字节
struct ethhdr{
unsigned char h_dest[ETH_ALEN]; 目的mac地址
unsigned char h_source[ETH_ALEN]；源mac地址
__be16 h_proto; 链路层协议类型
}
/**
* eth_type_trans - determine the packet's protocol ID.
* @skb: received socket data
* @dev: receiving network device
*
* The rule here is that we
* assume 802.3 if the type field is short enough to be a length.
* This is normal practice and works for any 'now in use' protocol.
*/
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
{
struct ethhdr *eth;
unsigned char *rawp;

skb->dev = dev;
skb_reset_mac_header(skb); //设置链路层头部 skb->mac_header = skb->data
skb_pull_inline(skb, ETH_HLEN);//去除链路层头部 skb->len - = 14；skb->data +=14
eth = eth_hdr(skb); //链路层头部 (struct ethhdr*)skb->mac_header
if (unlikely(is_multicast_ether_addr(eth->h_dest))) { 如果h_dest[0] &01 则为多播
  if (!compare_ether_addr_64bits(eth->h_dest, dev->broadcast))
   skb->pkt_type = PACKET_BROADCAST; //根据组播还是广播设置套接字缓冲区结构
  else
   skb->pkt_type = PACKET_MULTICAST;
}

/*
*      This ALLMULTI check should be redundant by 1.4
*      so don't forget to remove it.
*
*      Seems, you forgot to remove it. All silly devices
*      seems to set IFF_PROMISC.
*/

else if (1 /*dev->flags&IFF_PROMISC */ ) {
if (unlikely(compare_ether_addr_64bits(eth->h_dest, dev->dev_addr)))
skb->pkt_type = PACKET_OTHERHOST;
}

/*
* Some variants of DSA tagging don't have an ethertype field
* at all, so we check here whether one of those tagging
* variants has been configured on the receiving interface,
* and if so, set skb->protocol without looking at the packet.
*/
if (netdev_uses_dsa_tags(dev))
return htons(ETH_P_DSA);
if (netdev_uses_trailer_tags(dev))
return htons(ETH_P_TRAILER);

if (ntohs(eth->h_proto) >= 1536)
return eth->h_proto;

rawp = skb->data;

/*
*      This is a magic hack to spot IPX packets. Older Novell breaks
*      the protocol design and runs IPX over 802.3 without an 802.2 LLC
*      layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
*      won't work for fault tolerant netware but does for the rest.
*/
if (*(unsigned short *)rawp == 0xFFFF)
  return htons(ETH_P_802_3);

/*
* Real 802.2 LLC
*/
return htons(ETH_P_802_2);
}
（3）调用netif_rx()通用方法，将分组转移到cpu接收队列中，退出中断
/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
*
* This function receives a packet from a device driver and queues it for
* the upper (protocol) levels to process. It always succeeds. The buffer
* may be dropped during processing for congestion control or by the
* protocol layers.
*
* return values:
* NET_RX_SUCCESS (no congestion)
* NET_RX_DROP (packet was dropped)
*
*/

int netif_rx(struct sk_buff *skb)
{
int ret;

/* if netpoll wants it, pretend we never saw it */
if (netpoll_rx(skb))
return NET_RX_DROP;

if (netdev_tstamp_prequeue)
net_timestamp_check(skb);//填写skb->tstamp = ktime_get_real 时间

#ifdef CONFIG_RPS
{
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;

rcu_read_lock();

  cpu = get_rps_cpu(skb->dev, skb, &rflow);
  if (cpu < 0)
   cpu = smp_processor_id();

ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

  rcu_read_unlock();
}
#else
{
  unsigned int qtail;
  ret = enqueue_to_backlog(skb, get_cpu(), &qtail); //插入到cpu队列中
  put_cpu();//开启内核抢占
}
#endif
return ret;
}
将数据分组插入到队列中
/*
* enqueue_to_backlog is called to queue an skb to a per CPU backlog
* queue (may be a remote CPU queue).
*/
static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
         unsigned int *qtail)
{
struct softnet_data *sd;
unsigned long flags;

sd = &per_cpu(softnet_data, cpu); //获得cpu接收分组队列

local_irq_save(flags);

rps_lock(sd);
if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { //未超出分组长度限额
  if (skb_queue_len(&sd->input_pkt_queue)) { //如果队列中已经有分组
enqueue:
   __skb_queue_tail(&sd->input_pkt_queue, skb); //插入到队列尾部
   input_queue_tail_incr_save(sd, qtail);
   rps_unlock(sd);
   local_irq_restore(flags);
   return NET_RX_SUCCESS;
  }

  /* Schedule NAPI for backlog device
   * We can use non atomic operation since we own the queue lock
   */
如果第一次给input_pkt_queue队列中添加分组，且该伪设备未添加到poll_list中，将传统框架加入到napi框架中
  if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
   if (!rps_ipi_queued(sd))
    ____napi_schedule(sd, &sd->backlog);
    //添加伪设备到每cpu接收分组队列中，找了好久，才发现，看代码不认真啊
  }
  goto enqueue;
}

sd->dropped++;
rps_unlock(sd);

local_irq_restore(flags);

kfree_skb(skb);
return NET_RX_DROP;
}
1.3 当中断发生时，执行软中断处理程序
为了尽快响应被中断的进程，中断处理程序分为两部分，软中断处理程序在中断处理程序执行结束后适当时间执行，由net_dev_init的初始化函数open_softirq(NET_RX_SOFTIRQ, net_rx_action) 可知，该软中断的执行函数为net_rx_action[不管是对于支持传统的框架还是支持NAPI框架的网络设备，软中断处理程序一样，不一样的是每个网络设备的poll函数，不同框架的网络设备的poll函数与其硬件中断处理部分是相对应的]，下面分析该函数执行流程：
static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data); //获取该cpu上的接收队列结构
unsigned long time_limit = jiffies + 2;
//由于该cpu可能有大量的分组到达，为了响应其它进程，处理该部分不能超过2个jiffies，时间限制
int budget = netdev_budget;
//每次最大处理的分组个数，数量限制，该值在/proc/sys/net/core/netdev_budget文件中，默认300，可以修改
void *have;

local_irq_disable();

while (!list_empty(&sd->poll_list)) {//循环处理poll双列表中每个网卡设备
struct napi_struct *n;
int work, weight;

  /* If softirq window is exhuasted then punt.
   * Allow this to run for 2 jiffies since which will allow
   * an average latency of 1.5/HZ.
   */
  if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
//如果因为超时，说明分组太多，重新激活软中断
   goto softnet_break;

local_irq_enable();

  /* Even though interrupts have been re-enabled, this
   * access is safe because interrupts can only add new
   * entries to the tail of this list, and only ->poll()
   * calls can remove this head entry from the list.
   */
  n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);//找到一个网络设备

have = netpoll_poll_lock(n);

weight = n->weight;//在多网卡设备中，为了使告诉网卡得到优先服务，每个网络设备分配不同的权重，当该网络设备处理的指定数量的分组后，将其移到队列尾部，服务其他网络设备，避免饥饿

  /* This NAPI_STATE_SCHED test is for avoiding a race
   * with netpoll's poll_napi(). Only the entity which
   * obtains the lock and sees NAPI_STATE_SCHED set will
   * actually make the ->poll() call. Therefore we avoid
   * accidently calling ->poll() when NAPI is not scheduled.
   */
  work = 0;
  if (test_bit(NAPI_STATE_SCHED, &n->state)) {//该网络设备确实已经处于链表中
   work = n->poll(n, weight);//调用特定于网络设备的poll函数，将分组向上传递给互联网络层，由驱动程序实现
  //如果该NAPI网络设备已经处于NAPI_STATE_DISABLE状态，在特定设备的poll函数中：如果该网络设备的所有分组已处理完毕，且未达到其分配限额，说明该网络设备已经空闲，开启对应的irq中断请求,将其从队列删除，清除NAPI_STATE_SCHED状态
t race_napi_poll(n);
  }

WARN_ON_ONCE(work > weight);

budget -= work; //从总配额中减去此次处理该网络设备的数量

local_irq_disable();

  /* Drivers must not modify the NAPI state if they
   * consume the entire weight. In such cases this code
   * still "owns" the NAPI instance and therefore can
   * move the instance around on the list at-will.
   */
  if (unlikely(work == weight)) { //该网络设备已经处理了限额的分组数量，将其放置队列尾部
   if (unlikely(napi_disable_pending(n))) { //这是一种特殊情况，并没有开启IRQ中断，此时若到来大量分组，由于网络设备从等待队列中移除，且其state=NAPI_STATE_DISABLE状态，所有分组都会丢弃
     local_irq_enable();
    napi_complete(n);   //仅仅将该网络设备从该cpu的poll队列中移除，然后清除NAPI_STATE_SCHED状态
    local_irq_disable();
   } else
    list_move_tail(&n->poll_list, &sd->poll_list);//仅仅将该网络设备放置在队列尾部
  }

netpoll_poll_unlock(have);
}
out:
net_rps_action_and_irq_enable(sd);

#ifdef CONFIG_NET_DMA
/*
* There may not be any more sk_buffs coming right now, so push
* any pending DMA copies to hardware
*/
dma_issue_pending_all();
#endif

return;

softnet_break:
sd->time_squeeze++;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
goto out;
}
1.4 支持传统框架的网络设备的网络处理程序[poll函数]
static int process_backlog(struct napi_struct *napi, int quota) //限额=quota
{
int work = 0;
struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
//获取伪设备所在的该cpu接收队列结构，napi_struct一般嵌入到特定于网络设备更大的结构体中

#ifdef CONFIG_RPS
/* Check if we have pending ipi, its better to send them now,
* not waiting net_rx_action() end.
*/
if (sd->rps_ipi_list) {
  local_irq_disable();
  net_rps_action_and_irq_enable(sd);
}
#endif
napi->weight = weight_p;
local_irq_disable();
while (work < quota) { //循环执行，直至处理了quota个分组或者处理完毕
  struct sk_buff *skb;
  unsigned int qlen;

  while ((skb = __skb_dequeue(&sd->process_queue))) {
   local_irq_enable();
   __netif_receive_skb(skb);
   local_irq_disable();
   input_queue_head_incr(sd);
   if (++work >= quota) { //如果处理的分组达到上界，退出
    local_irq_enable();
    return work;   //达到限额，退出
   }
  }

  rps_lock(sd);
  qlen = skb_queue_len(&sd->input_pkt_queue);
  if (qlen)
   skb_queue_splice_tail_init(&sd->input_pkt_queue,&sd->process_queue);
//将接收队列全部转移到处理队列中，以便在处理的同时，可以正常接收通过传统框架到来的分组,将input_pkt_queue接收队列清空，此时process_queue中可能含有的分组大于quota

  if (qlen < quota - work) { //如果接收队列中分组了，将其从该cpu等待队列中删除
   /*
    * Inline a custom version of __napi_complete().
    * only current cpu owns and manipulates this napi,
    * and NAPI_STATE_SCHED is the only possible flag set on backlog.
    * we can use a plain write instead of clear_bit(),
    * and we dont need an smp_mb() memory barrier.
    */
   list_del(&napi->poll_list);
   napi->state = 0;

   quota = work + qlen; //限额减少到实际上到达的分组数，处理完后直接退出
  }
  rps_unlock(sd);
}
local_irq_enable();

return work;
}
注：linux中所有的sk_buff套接字缓冲区都通过其next和prev指针，连接到每cpu的接收分组队列中或者特定于网络设备的发送分组队列中[分组放置到该等待队列中，间隔一定时间会发送出去]，完成发送或接收工作。
2 内核为分组实现的NAPI框架实现过程详解
2-1 网络设备的初始化
使用NAPI需要在编译内核的时候选择打开相应网卡设备的NAPI支持选项，对于E1000网卡来说就是CONFIG_E1000_NAPI宏,E1000网卡的初始化函数，也就是通常所说的probe方法,
该方在/drivers/net/e1000/main.c文件
  static int __devinit e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
    --> pci_set_dma_mask(pdev, PCI_DMA_64BIT))设置dma mask
--> netdev = alloc_etherdev(sizeof(struct e1000_adapter))分net_device结构初始化
   -->netdev->netdev_ops = &e1000_netdev_ops //注册net_device的设备处理程序
   -->netif_napi_add(netdev,&adapter->napi,e1000_clean,64)将e1000_clean注册为poll函数
   -->register_netdev(netdev)将该net_device注册到内核系统中
netif_napi_add实现过程：
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
  int (*poll)(struct napi_struct *, int), int weight)
{
INIT_LIST_HEAD(&napi->poll_list);
napi->gro_count = 0;
napi->gro_list = NULL;
napi->skb = NULL;
napi->poll = poll;
napi->weight = weight;
list_add(&napi->dev_list, &dev->napi_list);
napi->dev = dev;
#ifdef CONFIG_NETPOLL
spin_lock_init(&napi->poll_lock);
napi->poll_owner = -1;
#endif
set_bit(NAPI_STATE_SCHED, &napi->state);
}
EXPORT_SYMBOL(netif_napi_add);
2-2 打开网络设备
   当通过内核外部调用ifconfig命令使用该网络设备时，会调用net_device的open函数，该函数负责完成硬件寄存器初始化、注册系统资源[中断、DMA、IO端口]等等
static int e1000_open(struct net_device *netdev)
--> e1000_setup_tx_resources(adapter) 建立发送资源申请
--> e1000_setup_rx_resources(adapter)) 建立接收DMA等资源申请
--> e1000_request_irq(struct e1000_adapter* adapter) 建立中断处理程序
      -->request_irq(adapter->pdev->irq,e1000_intr,IRQF_SHARED,
　　　　　　　　　　netdev->name,netdev)
新建一个irq_node结构：
/*
* This structure is used to chain together the ISRs for a particular
* interrupt source (if it supports chaining).
*/
typedef struct irq_node {
irqreturn_t (*handler)(int, void *);
void  *dev_id;
struct irq_node *next;
unsigned long flags;
const char *devname;
} irq_node_t;
然后将其加入到全局数组irq_list的数组列表中
2-3 硬件中断处理程序
static irqreturn_t e1000_intr(int irq,void* data)
->ew32(IMC,~0),e1000_write_flush() 关闭irq中断
->napi_schedule_prep（&adapter->napi）修改napi->state=NAPI_STATE_SCHED
-->__napi_schedule(&adapter->napi)将该网卡设备加入到该cpu接收分组等待队列中,然后出发软中断
　　　　-->__raise_softirq_irqoff(NET_RX_SOFTIRQ)
2-4 软中断处理程序
net_rx_action(struct softirq_action *h)
　　　-->static int e1000_clean(struct net_device *netdev, int *budget)
　　　　调用该设备的poll函数执行，该函数核心代码：
　　　　if(work_done < budget ) {

                       e1000_set_itr(adapter);//使能该IRQ中断
                             e1000_irq_enable(adapter);
                         //将该网络设备从cpu接收列表中删除，并清除其NAPI_STATE_SCHED状态

}
第二部分可以参考http://blog.163.com/s_xli1/blog/static/7835796220084148561437/，代码版本较低，讲解挺详细，也可参考源代码分析
3 发送分组
　　数据由互联网络层下来后选择分组的路由，确定目的地的物理地址，然后填充链路层头部最后调用net/core/dev.c的dev_queue_xmit（struct sk_buff*　sk）将数据发送到特定于硬件的接收队列中，然后由特定适配器的函数net_device中的hard_start_xmit发送出去

dolphin98629

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
主机到网络层的内核实现-2 [内核为分组接收：NAPI框架和传统框架]

1 内核为分组实现的传统框架实现过程详解1.1 内核初始化网络时对每个cpu的分组接收队列初始化初始化每cpu softnet_data结构以及初始化传统框架对应的伪网络设备/* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices t
复制链接

扫一扫