NAPI:
1. 网卡接收到帧之后,把帧放到自己的接收环形队列(双向链表)中,产生硬中断,
2. 调用napi_schedule调度函数,把自己对应的napi_struct挂到CPU的接收sotfnet_data的poll_list链表中,调用__raise_softirq_irqoff触发软中断。
3. 调用net_rx_action()函数,遍历poll_list链表,关闭硬中断,调用驱动轮训poll函数处理网卡环形队列中的帧,(分配SKB内存,取出队列中SKB,因为队列中空间还要存新来的数据),调用netif_receive_skb()函数发送到协议栈。
以e100网卡为例:
e100:e100_rx_alloc_list(创建DMA环形缓冲区),e100_rx_alloc_skb,e100_poll调用e100_rx_clean,e100_rx_clean遍历环形队列,调用e100_rx_indicate接收数据,netif_receive_skb()交由上层。
net_rx_action()函数,遍历poll_list链表,
napi_schedule:NAPI的调度函数。把设备的napi_struct实例添加到当前CPU的softnet_data的poll_list中,
__raise_softirq_irqoff:设置NET_RX_SOFTIRQ标志位来触发软中断。
open_softirq(NET_TX_SOFTIRQ, net_tx_action);接收软终端为net_rx_action
1.softnet_data结构体
CPU处理接收/发送报文的数据结构。每个cpu都有属于自己的队列来处理接收到的帧,一个CPU可以处理多个网卡的流量,不同cpu之间互不干扰。
现在结构体整的越来越复杂,看的麻烦,就看有注释的几个就好。
/*
* Incoming packets are placed on per-cpu queues
*/
struct softnet_data {
struct list_head poll_list;//双向链表,有数据要传输的设备链表,用于NAPI
struct sk_buff_head process_queue;
/* stats */
unsigned int processed;
unsigned int time_squeeze;
unsigned int cpu_collision;
unsigned int received_rps;
#ifdef CONFIG_RPS
struct softnet_data *rps_ipi_list;
#endif
#ifdef CONFIG_NET_FLOW_LIMIT
struct sd_flow_limit __rcu *flow_limit;
#endif
struct Qdisc *output_queue;
struct Qdisc **output_queue_tailp;
struct sk_buff *completion_queue;
#ifdef CONFIG_RPS
/* Elements below can be accessed between CPUs for RPS */
struct call_single_data csd ____cacheline_aligned_in_smp;
struct softnet_data *rps_ipi_next;
unsigned int cpu;
unsigned int input_queue_head;
unsigned int input_queue_tail;
#endif
unsigned int dropped;
struct sk_buff_head input_pkt_queue;//要传输的报文队列,用于非NAPI
struct napi_struct backlog;
};
这个list_head结构体有点意思,它只有两个指针netx,priv,没有包括具体内容。百度了一下list_head结构体,发现它还有大学问,又得好好研究了,以后单独在研究。先mark一下。list_head相关:
https://www.cnblogs.com/Cqlismy/p/11359196.html
https://blog.csdn.net/clam_zxf/article/details/87358200
https://blog.csdn.net/T146lLa128XX0x/article/details/80575800
/*
* Simple doubly linked list implementation.
*
* Some of the internal functions ("__xxx") are useful when
* manipulating whole lists rather than single entries, as
* sometimes we already know the next/prev entries and we can
* generate better code by using them directly rather than
* using the generic single-entry routines.
*/
struct list_head {
struct list_head *next, *prev;
};
2. napi_struct结构体
/*
* Structure for NAPI scheduling similar to tasklet but with weighting
*/
struct napi_struct {
/* The poll_list must only be managed by the entity which
* changes the state of the NAPI_STATE_SCHED bit. This means
* whoever atomically sets that bit can add this napi_struct
* to the per-cpu poll_list, and whoever clears that bit
* can remove from the list right before clearing the bit.
*/
struct list_head poll_list;//cpu的poll_list
unsigned long state;/* 设备的状态,是否开启NAPI? */
int weight;//poll函数每次能处理的报文数量
unsigned int gro_count;
//设备的轮训poll函数
int (*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
spinlock_t poll_lock;
int poll_owner;
#endif
struct net_device *dev;
struct sk_buff *gro_list;
struct sk_buff *skb;
struct hrtimer timer;
struct list_head dev_list;
struct hlist_node napi_hash_node;
unsigned int napi_id;
};
3. napi_schedule()
调度函数。真可谓层层封装,napi_shedule()掉__napi_schedule(),__napi_schedule()又掉____napi_schedule()。最后结果就是,把napi_stuct挂到CPU的poll_list队列上。
/**
* napi_schedule - schedule NAPI poll
* @n: napi context
*
* Schedule NAPI poll routine to be called if it is not already
* running.
*/
static inline void napi_schedule(struct napi_struct *n)
{
if (napi_schedule_prep(n))
__napi_schedule(n);
}
4. __napi_schedule()
/**
* __napi_schedule - schedule for receive
* @n: entry to schedule
*
* The entry's receive function will be scheduled to run.
* Consider using __napi_schedule_irqoff() if hard irqs are masked.
*/
void __napi_schedule(struct napi_struct *n)
{
unsigned long flags;
local_irq_save(flags);
____napi_schedule(this_cpu_ptr(&softnet_data), n);
local_irq_restore(flags);
}
5. ____napi_schedule()
把napi_stuct挂到CPU的poll_list队列上,触发软中断,调用net_rx_action()函数。
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
6.net_rx_action()
遍历函数,遍历poll_list链表,关闭硬中断,调用驱动轮训poll函数处理网卡环形队列中的帧。
static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
for (;;) {
struct napi_struct *n;
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
return;
break;
}
n = list_first_entry(&list, struct napi_struct, poll_list);
budget -= napi_poll(n, &repoll);
/* If softirq window is exhausted then punt.
* Allow this to run for 2 jiffies since which will allow
* an average latency of 1.5/HZ.
*/
if (unlikely(budget <= 0 ||
time_after_eq(jiffies, time_limit))) {
sd->time_squeeze++;
break;
}
}
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
list_splice_tail(&repoll, &list);
list_splice(&list, &sd->poll_list);
if (!list_empty(&sd->poll_list))
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
}
7.具体poll函数处理数据帧
轮训函数,参考下一篇博文e100网卡收包分析。
8.netif_receive_skb()
把报文交给上层协议栈。
static inline int netif_receive_skb(struct sk_buff *skb)
{
return netif_receive_skb_sk(skb->sk, skb);
}
其他相关函数:
1. netif_napi_add()
初始化napi_struct实例?
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
/* napi->poll_list 此时初始化为 prev,next都指向自己, 等调用napi_schedule时,
链入softnet_data的poll_list */
INIT_LIST_HEAD(&napi->poll_list);
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
napi->timer.function = napi_watchdog;
napi->gro_count = 0;
napi->gro_list = NULL;
napi->skb = NULL;
napi->poll = poll; /* 设备的poll函数 */
if (weight > NAPI_POLL_WEIGHT)
pr_err_once("netif_napi_add() called with weight %d on device %s\n",
weight, dev->name);
napi->weight = weight;/* 设备每次poll能处理的数据包个数上限 */
/* napi->dev_list 挂在 dev->napi_list里面,作用是?*/
list_add(&napi->dev_list, &dev->napi_list);
napi->dev = dev;/* 所属设备 */
#ifdef CONFIG_NETPOLL
spin_lock_init(&napi->poll_lock);
napi->poll_owner = -1;
#endif
set_bit(NAPI_STATE_SCHED, &napi->state);/*设置NAPI标志位 */
}
这些标志都是什么含义?
enum {
NAPI_STATE_SCHED, /* Poll is scheduled */
NAPI_STATE_DISABLE, /* Disable pending */
NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */
NAPI_STATE_HASHED, /* In NAPI hash */
};
二、非NAPI
1. 网卡每接收一个帧,产生一个硬中断,调用驱动程序的xx_rx函数,分配一个缓存区来保存报文,在netif_rx()函数中,把skb放入cpu的sotfnet_data的input_pkt_queue中;
2. 调用napi_shedule函数,把自己对应的napi_struct挂到CPU的接收sotfnet_data的poll_list链表中,调用__raise_softirq_irqoff触发软中断;
3. 调用net_rx_action()函数,遍历poll_list链表,调用驱动poll对应的函数process_backlog()处理input_pkt_queue队列中的帧,调用netif_receive_skb()函数发送到协议栈。
其实NAPI和非NAPI差别没也多大。
1.netif_rx()
/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
*
* This function receives a packet from a device driver and queues it for
* the upper (protocol) levels to process. It always succeeds. The buffer
* may be dropped during processing for congestion control or by the
* protocol layers.
*
* return values:
* NET_RX_SUCCESS (no congestion)
* NET_RX_DROP (packet was dropped)
*
*/
int netif_rx(struct sk_buff *skb)
{
trace_netif_rx_entry(skb);
return netif_rx_internal(skb);
}
2. netif_rx_internel()
static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
net_timestamp_check(netdev_tstamp_prequeue, skb);
trace_netif_rx(skb);
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
preempt_disable();
rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
if (cpu < 0)
cpu = smp_processor_id();
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
preempt_enable();
} else
#endif
{
unsigned int qtail;
ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
put_cpu();
}
return ret;
}
最后,摆张图:
参考:
https://www.cnblogs.com/muahao/p/10861771.html
https://blog.csdn.net/zhangskd/article/details/21627963
https://www.cnblogs.com/mfrbuaa/p/4642266.html