在e1000 网卡的probe函数中会调用netif_napi_add 来注册在第一次接收中断后的poll函数。
netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);
在e1000_setup_rx_resources 函数中会申请一个4k的DMA buffer
static int e1000_setup_rx_resources(struct e1000_adapter *adapter,
struct e1000_rx_ring *rxdr)
{
struct pci_dev *pdev = adapter->pdev;
int size, desc_len;
size = sizeof(struct e1000_rx_buffer) * rxdr->count;
rxdr->buffer_info = vzalloc(size);
if (!rxdr->buffer_info)
return -ENOMEM;
desc_len = sizeof(struct e1000_rx_desc);
/* Round up to nearest 4K */
rxdr->size = rxdr->count * desc_len;
rxdr->size = ALIGN(rxdr->size, 4096);
//这里应该调用pci_alloc_consistent 比较好
rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
GFP_KERNEL);
if (!rxdr->desc) {
setup_rx_desc_die:
vfree(rxdr->buffer_info);
return -ENOMEM;
}
memset(rxdr->desc, 0, rxdr->size);
rxdr->next_to_clean = 0;
rxdr->next_to_use = 0;
rxdr->rx_skb_top = NULL;
return 0;
}
在e1000_open->e1000_request_irq中会注册中断函数是e1000_intr
static int e1000_request_irq(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
irq_handler_t handler = e1000_intr;
int irq_flags = IRQF_SHARED;
int err;
err = request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name,
netdev);
if (err) {
e_err(probe, "Unable to allocate interrupt Error: %d\n", err);
}
return err;
}
这样当第一次接收到rx的中断时,在e1000_intr 中通过napi_schedule_prep 来判断napi是否可以被调动
static irqreturn_t e1000_intr(int __always_unused irq, void *data)
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 rctl, icr = er32(ICR);
if (napi_schedule_prep(&adapter->napi)) {
adapter->total_tx_bytes = 0;
adapter->total_tx_packets = 0;
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__napi_schedule(&adapter->napi);
}
return IRQ_HANDLED;
}
如果可以被调动的话,则调用__napi_schedule
void __napi_schedule(struct napi_struct *n)
{
unsigned long flags;
local_irq_save(flags);
____napi_schedule(this_cpu_ptr(&softnet_data), n);
local_irq_restore(flags);
}
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
这里会出发NET_RX_SOFTIRQ 软件中断
而在net_dev_init中会初始化软件中断的处理函数
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
这里的处理函数就是net_rx_action
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
usecs_to_jiffies(netdev_budget_usecs);
int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
for (;;) {
struct napi_struct *n;
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
goto out;
break;
}
n = list_first_entry(&list, struct napi_struct, poll_list);
//调用e1000的poll函数,直到list为null
budget -= napi_poll(n, &repoll);
/* If softirq window is exhausted then punt.
* Allow this to run for 2 jiffies since which will allow
* an average latency of 1.5/HZ.
*/
if (unlikely(budget <= 0 ||
time_after_eq(jiffies, time_limit))) {
sd->time_squeeze++;
break;
}
}
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
list_splice_tail(&repoll, &list);
list_splice(&list, &sd->poll_list);
if (!list_empty(&sd->poll_list))
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
out:
__kfree_skb_flush();
}
static int napi_poll(struct napi_struct *n, struct list_head *repoll)
{
void *have;
int work, weight;
list_del_init(&n->poll_list);
have = netpoll_poll_lock(n);
weight = n->weight;
/* This NAPI_STATE_SCHED test is for avoiding a race
* with netpoll's poll_napi(). Only the entity which
* obtains the lock and sees NAPI_STATE_SCHED set will
* actually make the ->poll() call. Therefore we avoid
* accidentally calling ->poll() when NAPI is not scheduled.
*/
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
//最终在这里调用e1000的poll函数e1000_clean
work = n->poll(n, weight);
trace_napi_poll(n, work, weight);
}
}
static int e1000_clean(struct napi_struct *napi, int budget)
{
struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter,
napi);
int tx_clean_complete = 0, work_done = 0;
tx_clean_complete = e1000_clean_tx_irq(adapter, &adapter->tx_ring[0]);
adapter->clean_rx(adapter, &adapter->rx_ring[0], &work_done, budget);
if (!tx_clean_complete)
work_done = budget;
/* If budget not fully consumed, exit the polling mode */
if (work_done < budget) {
if (likely(adapter->itr_setting & 3))
e1000_set_itr(adapter);
//调用napi_complete_done从n->poll_list中删除e1000 对应的list,避免中断中被disable 太长时间。
napi_complete_done(napi, work_done);
if (!test_bit(__E1000_DOWN, &adapter->flags))
e1000_irq_enable(adapter);
}
return work_done;
}
netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);
在e1000_setup_rx_resources 函数中会申请一个4k的DMA buffer
static int e1000_setup_rx_resources(struct e1000_adapter *adapter,
struct e1000_rx_ring *rxdr)
{
struct pci_dev *pdev = adapter->pdev;
int size, desc_len;
size = sizeof(struct e1000_rx_buffer) * rxdr->count;
rxdr->buffer_info = vzalloc(size);
if (!rxdr->buffer_info)
return -ENOMEM;
desc_len = sizeof(struct e1000_rx_desc);
/* Round up to nearest 4K */
rxdr->size = rxdr->count * desc_len;
rxdr->size = ALIGN(rxdr->size, 4096);
//这里应该调用pci_alloc_consistent 比较好
rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
GFP_KERNEL);
if (!rxdr->desc) {
setup_rx_desc_die:
vfree(rxdr->buffer_info);
return -ENOMEM;
}
memset(rxdr->desc, 0, rxdr->size);
rxdr->next_to_clean = 0;
rxdr->next_to_use = 0;
rxdr->rx_skb_top = NULL;
return 0;
}
在e1000_open->e1000_request_irq中会注册中断函数是e1000_intr
static int e1000_request_irq(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
irq_handler_t handler = e1000_intr;
int irq_flags = IRQF_SHARED;
int err;
err = request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name,
netdev);
if (err) {
e_err(probe, "Unable to allocate interrupt Error: %d\n", err);
}
return err;
}
这样当第一次接收到rx的中断时,在e1000_intr 中通过napi_schedule_prep 来判断napi是否可以被调动
static irqreturn_t e1000_intr(int __always_unused irq, void *data)
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 rctl, icr = er32(ICR);
if (napi_schedule_prep(&adapter->napi)) {
adapter->total_tx_bytes = 0;
adapter->total_tx_packets = 0;
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__napi_schedule(&adapter->napi);
}
return IRQ_HANDLED;
}
如果可以被调动的话,则调用__napi_schedule
void __napi_schedule(struct napi_struct *n)
{
unsigned long flags;
local_irq_save(flags);
____napi_schedule(this_cpu_ptr(&softnet_data), n);
local_irq_restore(flags);
}
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
这里会出发NET_RX_SOFTIRQ 软件中断
而在net_dev_init中会初始化软件中断的处理函数
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
这里的处理函数就是net_rx_action
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
usecs_to_jiffies(netdev_budget_usecs);
int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
for (;;) {
struct napi_struct *n;
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
goto out;
break;
}
n = list_first_entry(&list, struct napi_struct, poll_list);
//调用e1000的poll函数,直到list为null
budget -= napi_poll(n, &repoll);
/* If softirq window is exhausted then punt.
* Allow this to run for 2 jiffies since which will allow
* an average latency of 1.5/HZ.
*/
if (unlikely(budget <= 0 ||
time_after_eq(jiffies, time_limit))) {
sd->time_squeeze++;
break;
}
}
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
list_splice_tail(&repoll, &list);
list_splice(&list, &sd->poll_list);
if (!list_empty(&sd->poll_list))
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
out:
__kfree_skb_flush();
}
static int napi_poll(struct napi_struct *n, struct list_head *repoll)
{
void *have;
int work, weight;
list_del_init(&n->poll_list);
have = netpoll_poll_lock(n);
weight = n->weight;
/* This NAPI_STATE_SCHED test is for avoiding a race
* with netpoll's poll_napi(). Only the entity which
* obtains the lock and sees NAPI_STATE_SCHED set will
* actually make the ->poll() call. Therefore we avoid
* accidentally calling ->poll() when NAPI is not scheduled.
*/
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
//最终在这里调用e1000的poll函数e1000_clean
work = n->poll(n, weight);
trace_napi_poll(n, work, weight);
}
}
static int e1000_clean(struct napi_struct *napi, int budget)
{
struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter,
napi);
int tx_clean_complete = 0, work_done = 0;
tx_clean_complete = e1000_clean_tx_irq(adapter, &adapter->tx_ring[0]);
adapter->clean_rx(adapter, &adapter->rx_ring[0], &work_done, budget);
if (!tx_clean_complete)
work_done = budget;
/* If budget not fully consumed, exit the polling mode */
if (work_done < budget) {
if (likely(adapter->itr_setting & 3))
e1000_set_itr(adapter);
//调用napi_complete_done从n->poll_list中删除e1000 对应的list,避免中断中被disable 太长时间。
napi_complete_done(napi, work_done);
if (!test_bit(__E1000_DOWN, &adapter->flags))
e1000_irq_enable(adapter);
}
return work_done;
}