linux网卡报文接收流程

最新推荐文章于 2023-05-30 15:10:59 发布

hhhhhyyyyy8

最新推荐文章于 2023-05-30 15:10:59 发布

阅读量1.2k

点赞数 2

分类专栏： Linux内核网络

本文链接：https://blog.csdn.net/hhhhhyyyyy8/article/details/102492680

版权

Linux内核网络专栏收录该内容

32 篇文章 21 订阅

订阅专栏

NAPI：

1. 网卡接收到帧之后，把帧放到自己的接收环形队列（双向链表）中，产生硬中断，

2. 调用napi_schedule调度函数，把自己对应的napi_struct挂到CPU的接收sotfnet_data的poll_list链表中，调用__raise_softirq_irqoff触发软中断。

3. 调用net_rx_action()函数，遍历poll_list链表，关闭硬中断，调用驱动轮训poll函数处理网卡环形队列中的帧，（分配SKB内存，取出队列中SKB，因为队列中空间还要存新来的数据），调用netif_receive_skb()函数发送到协议栈。

以e100网卡为例：
e100：e100_rx_alloc_list(创建DMA环形缓冲区),e100_rx_alloc_skb,e100_poll调用e100_rx_clean，e100_rx_clean遍历环形队列，调用e100_rx_indicate接收数据，netif_receive_skb()交由上层。

net_rx_action()函数，遍历poll_list链表,
napi_schedule：NAPI的调度函数。把设备的napi_struct实例添加到当前CPU的softnet_data的poll_list中，
__raise_softirq_irqoff：设置NET_RX_SOFTIRQ标志位来触发软中断。
open_softirq(NET_TX_SOFTIRQ, net_tx_action);接收软终端为net_rx_action

1.softnet_data结构体

CPU处理接收/发送报文的数据结构。每个cpu都有属于自己的队列来处理接收到的帧，一个CPU可以处理多个网卡的流量，不同cpu之间互不干扰。

现在结构体整的越来越复杂，看的麻烦，就看有注释的几个就好。


/*
 * Incoming packets are placed on per-cpu queues
 */
struct softnet_data {
	struct list_head	poll_list;//双向链表，有数据要传输的设备链表，用于NAPI
	struct sk_buff_head	process_queue;

	/* stats */
	unsigned int		processed;
	unsigned int		time_squeeze;
	unsigned int		cpu_collision;
	unsigned int		received_rps;
#ifdef CONFIG_RPS
	struct softnet_data	*rps_ipi_list;
#endif
#ifdef CONFIG_NET_FLOW_LIMIT
	struct sd_flow_limit __rcu *flow_limit;
#endif
	struct Qdisc		*output_queue;
	struct Qdisc		**output_queue_tailp;
	struct sk_buff		*completion_queue;

#ifdef CONFIG_RPS
	/* Elements below can be accessed between CPUs for RPS */
	struct call_single_data	csd ____cacheline_aligned_in_smp;
	struct softnet_data	*rps_ipi_next;
	unsigned int		cpu;
	unsigned int		input_queue_head;
	unsigned int		input_queue_tail;
#endif
	unsigned int		dropped;
	struct sk_buff_head	input_pkt_queue;//要传输的报文队列，用于非NAPI
	struct napi_struct	backlog;

};

这个list_head结构体有点意思，它只有两个指针netx，priv，没有包括具体内容。百度了一下list_head结构体，发现它还有大学问，又得好好研究了，以后单独在研究。先mark一下。list_head相关：

https://www.cnblogs.com/Cqlismy/p/11359196.html

https://blog.csdn.net/clam_zxf/article/details/87358200

https://blog.csdn.net/T146lLa128XX0x/article/details/80575800

/*
 * Simple doubly linked list implementation.
 *
 * Some of the internal functions ("__xxx") are useful when
 * manipulating whole lists rather than single entries, as
 * sometimes we already know the next/prev entries and we can
 * generate better code by using them directly rather than
 * using the generic single-entry routines.
 */

struct list_head {
	struct list_head *next, *prev;
};

2. napi_struct结构体


/*
 * Structure for NAPI scheduling similar to tasklet but with weighting
 */
struct napi_struct {
	/* The poll_list must only be managed by the entity which
	 * changes the state of the NAPI_STATE_SCHED bit.  This means
	 * whoever atomically sets that bit can add this napi_struct
	 * to the per-cpu poll_list, and whoever clears that bit
	 * can remove from the list right before clearing the bit.
	 */
        
	struct list_head	poll_list;//cpu的poll_list

	unsigned long		state;/* 设备的状态，是否开启NAPI？ */
	int			weight;//poll函数每次能处理的报文数量
	unsigned int		gro_count;
        //设备的轮训poll函数
	int			(*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
	spinlock_t		poll_lock;
	int			poll_owner;
#endif
	struct net_device	*dev;
	struct sk_buff		*gro_list;
	struct sk_buff		*skb;
	struct hrtimer		timer;
	struct list_head	dev_list;
	struct hlist_node	napi_hash_node;
	unsigned int		napi_id;
};

3. napi_schedule()

调度函数。真可谓层层封装，napi_shedule()掉__napi_schedule()，__napi_schedule()又掉____napi_schedule()。最后结果就是，把napi_stuct挂到CPU的poll_list队列上。

/**
 *	napi_schedule - schedule NAPI poll
 *	@n: napi context
 *
 * Schedule NAPI poll routine to be called if it is not already
 * running.
 */
static inline void napi_schedule(struct napi_struct *n)
{
	if (napi_schedule_prep(n))
		__napi_schedule(n);
}

4. __napi_schedule()

/**
 * __napi_schedule - schedule for receive
 * @n: entry to schedule
 *
 * The entry's receive function will be scheduled to run.
 * Consider using __napi_schedule_irqoff() if hard irqs are masked.
 */
void __napi_schedule(struct napi_struct *n)
{
	unsigned long flags;

	local_irq_save(flags);
	____napi_schedule(this_cpu_ptr(&softnet_data), n);
	local_irq_restore(flags);
}

5. ____napi_schedule()

把napi_stuct挂到CPU的poll_list队列上，触发软中断，调用net_rx_action()函数。

/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
				     struct napi_struct *napi)
{
	list_add_tail(&napi->poll_list, &sd->poll_list);
	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}

6.net_rx_action()

遍历函数，遍历poll_list链表，关闭硬中断，调用驱动轮训poll函数处理网卡环形队列中的帧。


static void net_rx_action(struct softirq_action *h)
{
	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
	unsigned long time_limit = jiffies + 2;
	int budget = netdev_budget;
	LIST_HEAD(list);
	LIST_HEAD(repoll);

	local_irq_disable();
	list_splice_init(&sd->poll_list, &list);
	local_irq_enable();

	for (;;) {
		struct napi_struct *n;

		if (list_empty(&list)) {
			if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
				return;
			break;
		}

		n = list_first_entry(&list, struct napi_struct, poll_list);
		budget -= napi_poll(n, &repoll);

		/* If softirq window is exhausted then punt.
		 * Allow this to run for 2 jiffies since which will allow
		 * an average latency of 1.5/HZ.
		 */
		if (unlikely(budget <= 0 ||
			     time_after_eq(jiffies, time_limit))) {
			sd->time_squeeze++;
			break;
		}
	}

	local_irq_disable();

	list_splice_tail_init(&sd->poll_list, &list);
	list_splice_tail(&repoll, &list);
	list_splice(&list, &sd->poll_list);
	if (!list_empty(&sd->poll_list))
		__raise_softirq_irqoff(NET_RX_SOFTIRQ);

	net_rps_action_and_irq_enable(sd);
}

7.具体poll函数处理数据帧

轮训函数，参考下一篇博文e100网卡收包分析。

8.netif_receive_skb()

把报文交给上层协议栈。

static inline int netif_receive_skb(struct sk_buff *skb)
{
	return netif_receive_skb_sk(skb->sk, skb);
}

其他相关函数：

1. netif_napi_add()

初始化napi_struct实例？

void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
		    int (*poll)(struct napi_struct *, int), int weight)
{
        /*  napi->poll_list 此时初始化为 prev,next都指向自己, 等调用napi_schedule时，
        链入softnet_data的poll_list  */
	INIT_LIST_HEAD(&napi->poll_list);
	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
	napi->timer.function = napi_watchdog;
	napi->gro_count = 0;
	napi->gro_list = NULL;
	napi->skb = NULL;
	napi->poll = poll; /* 设备的poll函数 */
	if (weight > NAPI_POLL_WEIGHT)
		pr_err_once("netif_napi_add() called with weight %d on device %s\n",
			    weight, dev->name);
	napi->weight = weight;/* 设备每次poll能处理的数据包个数上限 */
        /*  napi->dev_list 挂在 dev->napi_list里面,作用是？*/
	list_add(&napi->dev_list, &dev->napi_list);
	napi->dev = dev;/* 所属设备 */
#ifdef CONFIG_NETPOLL
	spin_lock_init(&napi->poll_lock);
	napi->poll_owner = -1;
#endif
	set_bit(NAPI_STATE_SCHED, &napi->state);/*设置NAPI标志位 */
}

这些标志都是什么含义？

enum {
	NAPI_STATE_SCHED,	/* Poll is scheduled */
	NAPI_STATE_DISABLE,	/* Disable pending */
	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
	NAPI_STATE_HASHED,	/* In NAPI hash */
};

二、非NAPI

1. 网卡每接收一个帧，产生一个硬中断，调用驱动程序的xx_rx函数，分配一个缓存区来保存报文，在netif_rx()函数中，把skb放入cpu的sotfnet_data的input_pkt_queue中；

2. 调用napi_shedule函数，把自己对应的napi_struct挂到CPU的接收sotfnet_data的poll_list链表中，调用__raise_softirq_irqoff触发软中断；

3. 调用net_rx_action()函数，遍历poll_list链表，调用驱动poll对应的函数process_backlog()处理input_pkt_queue队列中的帧，调用netif_receive_skb()函数发送到协议栈。

其实NAPI和非NAPI差别没也多大。

1.netif_rx()


/**
 *	netif_rx	-	post buffer to the network code
 *	@skb: buffer to post
 *
 *	This function receives a packet from a device driver and queues it for
 *	the upper (protocol) levels to process.  It always succeeds. The buffer
 *	may be dropped during processing for congestion control or by the
 *	protocol layers.
 *
 *	return values:
 *	NET_RX_SUCCESS	(no congestion)
 *	NET_RX_DROP     (packet was dropped)
 *
 */

int netif_rx(struct sk_buff *skb)
{
	trace_netif_rx_entry(skb);

	return netif_rx_internal(skb);
}

2. netif_rx_internel()


static int netif_rx_internal(struct sk_buff *skb)
{
	int ret;

	net_timestamp_check(netdev_tstamp_prequeue, skb);

	trace_netif_rx(skb);
#ifdef CONFIG_RPS
	if (static_key_false(&rps_needed)) {
		struct rps_dev_flow voidflow, *rflow = &voidflow;
		int cpu;

		preempt_disable();
		rcu_read_lock();

		cpu = get_rps_cpu(skb->dev, skb, &rflow);
		if (cpu < 0)
			cpu = smp_processor_id();

		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

		rcu_read_unlock();
		preempt_enable();
	} else
#endif
	{
		unsigned int qtail;
		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
		put_cpu();
	}
	return ret;
}

最后，摆张图：

参考：

https://www.cnblogs.com/muahao/p/10861771.html

https://blog.csdn.net/zhangskd/article/details/21627963

https://www.cnblogs.com/mfrbuaa/p/4642266.html

https://www.cnblogs.com/aiwz/p/6333291.html

网络课程设计之网卡linux驱动程序 pcnet32.doc

hhhhhyyyyy8

关注

2
点赞
踩
7

收藏

觉得还不错? 一键收藏
0
评论
linux网卡报文接收流程

NAPI：1. 网卡接收到帧之后，把帧放到自己的接收环形队列（双向链表）中，产生硬中断，2. 调用napi_schedule调度函数，把自己对应的napi_struct挂到CPU的接收sotfnet_data的poll_list链表中，调用__raise_softirq_irqoff触发软中断。3. 调用net_rx_action()函数，遍历poll_list链表，关闭硬中断，调用驱...
复制链接

扫一扫

专栏目录