论网卡数据是如何从驱动到桥接/ip层(网卡驱动中断分析-中断方式非NAPI)

最新推荐文章于 2023-03-06 16:37:41 发布

小小程序员ing

最新推荐文章于 2023-03-06 16:37:41 发布

阅读量897

点赞数 2

分类专栏：内核编程文章标签：内核 linux 网络

本文链接：https://blog.csdn.net/u011551613/article/details/107803505

版权

内核编程专栏收录该内容

9 篇文章 8 订阅

订阅专栏

前言

函数源码简短的我会直接贴源代码。源码太长我只能贴关键性的一些。希望大家按照流程分析。该博客对于任何的有线网卡驱动都是适合的。

这一期的博客，我们分析的是中断的方式。NAPI的方式会放在下一个系列。其实只是有一些流程不一样而已。

在上一讲中我们分析了网卡驱动的注册流程和中断的注册。这里再回顾一下上一讲的注册中断的代码。在网卡的open函数中调用了request_irq函数注册了网卡中断。网卡一旦有数据到来就会进入该中断

rc = request_irq(dev->irq, interrupt_isr, IRQF_DISABLED, dev->name, dev);

linux内核接收帧方式

在分析网卡中断之前，我们需要来了解一下，驱动接收网卡数据包的方式。(NAPI和中断方式)

中断接收方式

在老的2.5.x内核以前，网卡接收数据帧的方式采用中断。当网卡接收到数据后，会调用中断处理函数。在中断处理函数中将流程分为中断上下半部(不知道中断上半部和下半部可以查看上一篇博客)。在下半部中对帧进行处理。

中断接收方式优缺点都很明显。优点:采用的是硬件中断机制，能够第一时间处理数据包。迅速，快捷。缺点:当数据包流过大的时候，会频繁的调用中断。这样会导致cpu长时间工作在中断上下文的处理中，浪费cpu资源。

NAPI方式

NAPI方式是在内核2.5.X引入的，后来集成到2.4.X中。NAPI目前只有高速网卡支持，普通的网卡仍然采用中断的方式接收。

NAPI的字面意思为new API。它是为了解决普通的中断方式在大数据流情况下浪费CPU而设计的。它在大流量高负载情况下比单纯的中断接收方式更优秀。

NAPI的设计思路很简单。它是混合使用中断和轮训的方式。当接收到帧的时候，如果内核还没有处理完前面的几个帧，那么就不产生中断，只是将帧存放到输入队列中。等待内核处理完了再产生中断。

网卡中断分析

在这里我先给出接收中断下半部需要做的事情，让大家对于流程有一个大体的了解

将帧拷贝到skb_buff数据结构
初始化skb_buff中的字段，以便由网络层使用。skb_buff->portocol字段
更新该设备的一些私有参数
为NET_RX_SOFTIRQ的软IRQ调度以准备执行，以此通知内核新帧的到来

我们来看看驱动程序当中是如何处理的

irqreturn_t interrupt_isr(int irq, void *dev_instance)
{
	struct net_device *dev = dev_instance;
	struct dev_priv *cp;
	unsigned int status;
	cp = NETDRV_PRIV(dev);
	status = REG32(CPUIISR);
	REG32(CPUIISR) = status;
	status &= REG32(CPUIIMR);

#ifdef CONFIG_RTL_819X_SWCORE
	cnt_swcore++;
	if (status & (RX_DONE_IP_ALL))
 		cnt_swcore_rx++;
#if defined(EN_RTL_INTERRUPT_MIGRATION)
	if (status & TX_DONE_IP_ALL)
		cnt_swcore_tx++;
#else
	if (status & TX_ALL_DONE_IP_ALL)
		cnt_swcore_tx++;
#endif
	if (status&LINK_CHANGE_IP)
		cnt_swcore_link++;
	if (status&(PKTHDR_DESC_RUNOUT_IP_ALL|MBUF_DESC_RUNOUT_IP_ALL))
		cnt_swcore_err++;
#endif

#if defined(CONFIG_RTL_8197F) &&  defined(CONFIG_FINETUNE_RUNOUT_IRQ)
	if (status&(PKTHDR_DESC_RUNOUT_IP_ALL|MBUF_DESC_RUNOUT_IP_ALL))
	{
		rtl865x_disableRunoutIrq();
	}
#endif

	rtl_rx_interrupt_process(status, cp);

	rtl_tx_interrupt_process(status, cp);

	#if	defined(CONFIG_RTL_IGMP_SNOOPING)||defined(CONFIG_RTL_LINKCHG_PROCESS) || defined (CONFIG_RTL_PHY_PATCH) || defined(CONFIG_RTL_ETH_802DOT1X_SUPPORT) || defined(CONFIG_RTL_8198C_10M_REFINE)
	if (status & LINK_CHANGE_IP)
		rtl_link_change_interrupt_process(status, cp);
	#endif

	return IRQ_HANDLED;
}

这里主要调用的处理函数为rtl_rx_interrupt_process函数。那我们再看看该函数中到底是如何处理帧的。

__IRAM_GEN static inline void rtl_rx_interrupt_process(unsigned int status, struct dev_priv *cp)
{
	#if defined(REINIT_SWITCH_CORE)
	if(rtl865x_duringReInitSwtichCore==1) {
		return;
	}
	#endif

#ifdef CONFIG_RTL_8197F
	if (status & (RX_DONE_IP_ALL | PKTHDR_DESC_RUNOUT_IP_ALL))
#else
	//if (status & (RX_DONE_IP_ALL | PKTHDR_DESC_RUNOUT_IP_ALL))
#endif
	{
#if defined(CONFIG_RTL_ETH_NAPI_SUPPORT)
		if ((rtl_rx_tasklet_running==0)&&(cp->napi.poll)) {
			rtl_rx_tasklet_running=1;
			REG32(CPUIIMR) &= ~(RX_DONE_IE_ALL | PKTHDR_DESC_RUNOUT_IE_ALL);
			rtl_rxSetTxDone(FALSE);
			napi_schedule(&cp->napi);
		}
#else
		#if defined(RX_TASKLET)
		if (rtl_rx_tasklet_running==0) {
			rtl_rx_tasklet_running=1;
			REG32(CPUIIMR) &= ~(RX_DONE_IE_ALL | PKTHDR_DESC_RUNOUT_IE_ALL);
			rtl_rxSetTxDone(FALSE);
			tasklet_hi_schedule(&cp->rx_dsr_tasklet);
		}
		#else
		interrupt_dsr_rx((unsigned long)cp);
		#endif
#endif
	}
}

从上面的代码可知，该驱动如果支持了NAPI，则会调用napi_schedule函数。（这里请大家记住该函数。在本篇博客的结尾，我们会在流程图中总结napi和中断的流程）。如果当我们的驱动程序不支持napi的时候，则会调用tasklet_hi_schedule开启一个内核微任务来处理(关于内核微任务的注册，请参考我的上一篇文章)

中断方式流程(tasklet分析)

我们先来看不支持napi的情况。这里主要分析的函数即rx_dsr_tasklet绑定的处理函数。我们再贴一次该task的初始化代码

tasklet_init(&cp->rx_dsr_tasklet, (void *)interrupt_dsr_rx, (unsigned long)cp);

显而易见，调用tasklet_hi_schedule之后实际会调用interrupt_dsr_rx

我们的目光又转到了interrupt_dsr_rx的分析上，让我们来看看代码

static int32 interrupt_dsr_rx(unsigned long task_priv)
#endif
{
	static __DRAM_FWD rtl_nicRx_info	info;
	int	ret, count, rx_ok=0;
	#if defined(CONFIG_RTL_ETH_NAPI_SUPPORT)
	uint32 rx_left, rx_count;
	#endif
	#if defined(CONFIG_SMP)
	unsigned long flags = 0;
	SMP_LOCK_ETH_RECV(flags);
	#endif

	//int MAX_RX_NUM = 160;	

	//RTL_swNic_txDone(0);

	#if defined (CONFIG_RTK_VOIP_QOS) && !defined (CONFIG_RTK_VOIP_ETHERNET_DSP_IS_HOST)
	interrupt_dsr_rx_check(&RxIntData);
	#endif
	rx_ok=0;
	#if defined(CONFIG_RTL_ETH_NAPI_SUPPORT)
	for (rx_left=budget; rx_left>0; rx_left--)
	#else
	while (1)
	#endif
	{	
		#if !defined(CONFIG_RTL_ETH_NAPI_SUPPORT)
		if (rx_ok > (NUM_RX_PKTHDR_DESC))
			break;
		#endif

		#if defined (CONFIG_RTK_VOIP_QOS) && !defined (CONFIG_RTK_VOIP_ETHERNET_DSP_IS_HOST)
		if (RTL_RX_PROCESS_RETURN_BREAK==interrupt_dsr_rx_per_packe_check(&RxIntData, task_priv))
			break;
		#endif

		#if defined(RTL_MULTIPLE_RX_TX_RING)
		info.priority = RTL_ASSIGN_RX_PRIORITY;
		#endif

		count = 0;
		do {
			ret = RTL_swNic_receive(&info, count++);
		} while (ret==RTL_NICRX_REPEAT);
		rx_ok++;

		switch(rtl_processReceivedInfo(&info,  ret)) {
			case RTL_RX_PROCESS_RETURN_SUCCESS:
				if (SUCCESS==rtl_decideRxDevice(&info)) {
					rtl_processRxFrame(&info);
				}
				break;
			case RTL_RX_PROCESS_RETURN_BREAK:
				{
					#if defined(CONFIG_RTL_ETH_NAPI_SUPPORT)
					rx_left++;
					#endif
					goto rx_out;
				}
			default:
				break;
		}
	}

rx_out:	
//panic_printk("%s:%d rx_ok=%d\n", __FUNCTION__, __LINE__, rx_ok);	
	#if defined(CONFIG_RTL_ETH_NAPI_SUPPORT)
	rx_count = budget - rx_left;
	#if defined(CONFIG_SMP)
	SMP_UNLOCK_ETH_RECV(flags);
	#endif
	return rx_count;
	#else
	#if defined(CONFIG_SMP)
	SMP_UNLOCK_ETH_RECV(flags);
	#endif
	interrupt_dsr_rx_done(&RxIntData);
	return RTL_NICRX_OK;
	#endif
	
}

我们来分析以上代码。

调用ret = RTL_swNic_receive(&info, count++);循环从网卡寄存器中读取数据。数据存放在info结构中（这里不是分析驱动，所以不深入了解接收函数的实现）
根据接收程序返回的结果调用处理程序rtl_processRxFrame。（这里有个知识点需要提醒大家。非NAPI接收的方式一定会调用netif_rx函数。所以如果我们一旦确认驱动的类型。NAPI或者非NAPI。如果是非NAPI，直接在驱动中搜索netif_rx函数反向推导即可。那么言下之意，rtl_processRxFrame一定会调用netif_rx。）

switch(rtl_processReceivedInfo(&info,  ret)) {
	case RTL_RX_PROCESS_RETURN_SUCCESS:
		if (SUCCESS==rtl_decideRxDevice(&info)) {
			rtl_processRxFrame(&info);
		}

下面我们来分析rtl_processRxFrame函数。该函数的代码实在是太长了。这里我只截取关键性的一些地方。比如将接收到的数据包封装成skb、以及对于skb的一些关键性的设置。

static inline void rtl_processRxFrame(rtl_nicRx_info *info)
{
	struct dev_priv	*cp_this;
	struct sk_buff 	*skb;
	uint32			vid, pid, len;
	uint8			*data;
	#if defined(CONFIG_RTL_ETH_802DOT1X_SUPPORT) || defined(CONFIG_RTL_VLAN_8021Q)
	int ret;
	#endif
	#ifdef CONFIG_RTK_OPENVPN_HW_CRYPTO
	int vpn_port;
	int dec_ret=1;
	#endif

#if defined (CONFIG_RTL_IGMP_SNOOPING)||defined(CONFIG_BRIDGE_IGMP_SNOOPING)
	int mCastFlag=0;
#endif	
#if defined(CONFIG_RTL_PROCESS_PPPOE_IGMP_FOR_BRIDGE_FORWARD)	
	int type;
#endif

	cp_this = info->priv;
	skb = info->input;
	vid = info->vid;
	data = skb->tail = skb->data;

在这里我们看到，定义了一个skb，然后指向接收数据指针的input。接着，初始化了skb->tail以及data字段。

TODO: (每一个驱动程序在转换成skb的时候所采用的的方式是不一样的。但是大体思路相同)。在这里我还是要多说一句。每个厂商在处理接收包的函数中做了很多的动作。比如说硬件vlan的处理，硬件pppoe的处理，VOIP的实现等等。这些不是该篇博客需要研究的。我们目前研究的是linux内核的通用流程。所以请大家在分析源码的时候忽略。关于vlan我们后续再讲，如果大家从事linux协议栈或者内核安全等方面的开发，这些知识是必须掌握的

rtl_processRxToProtcolStack(skb1, cp1);

在rtl_processRxFrame中调用了rtl_processRxToProtcolStack。

static inline void rtl_processRxToProtcolStack(struct sk_buff *skb, struct dev_priv *cp_this)
{
#ifdef CONFIG_RTL_8198C
	unsigned long flags=0;
	SMP_LOCK_ETH(flags);
#endif

	skb->protocol = eth_type_trans(skb, skb->dev);

#ifdef CONFIG_RTL_IPV6READYLOGO
	if(skb->protocol == 0x86dd)
		skb->ip_summed = CHECKSUM_NONE;
	else
		skb->ip_summed = CHECKSUM_UNNECESSARY;	
#else
	//skb->ip_summed = CHECKSUM_NONE;
	/* It must be a TCP or UDP packet with a valid checksum */	
	skb->ip_summed = CHECKSUM_UNNECESSARY;	
	//printk("[%s][%d]-skb->dev[%s],proto(0x%x)\n", __FUNCTION__, __LINE__, skb->dev->name,skb->protocol);
#endif

#if defined(CONFIG_RTL_IP_POLICY_ROUTING_SUPPORT)
	//skb->vlan_member = cp_this->port_member;
	//skb->src_port = IF_SWITCH;
	skb->switch_port = skb->dev->name;	
#endif

#ifndef CONFIG_OPENWRT_SDK
#if defined(RX_TASKLET)
	#if defined(CONFIG_RTL_LOCAL_PUBLIC)
	skb->localPublicFlags = 0;
	#endif
	#if defined(CONFIG_RTL_FAST_BRIDGE)
	skb->fast_br_forwarding_flags = 0;
	#endif

	#ifdef CONFIG_SMP
	netif_rx(skb);
	#else
	
	#ifdef CONFIG_RTL_8197F
	netif_rx(skb);
	#else
	netif_receive_skb(skb);
	#endif
	#endif
	
#else	/*	defined(RX_TASKLET)	*/
	netif_rx(skb);
#endif	/*	defined(RX_TASKLET)	*/
#else   //CONFIG_OPENWRT_SDK
#if !defined(CONFIG_RTL_IPTABLES_FAST_PATH) //|| defined(CONFIG_RTL_8197F)
	netif_rx(skb);
#else
	netif_receive_skb(skb);
#endif

#endif
#ifdef CONFIG_RTL_8198C
	SMP_UNLOCK_ETH(flags);
#endif
}

这里终于找到我们关注的函数呢，即netif_rx。同时我们也看到了设置skb->protocol的值。这里的值毫无疑问是指向了ip层协议。也设置了ip头部校验和的方式(硬件方式或者软件方式)。

netif_rx分析

该函数对于非napi的处理是一个通用性函数。正如上文所说，我们可以通过该函数来倒退接收流程。下面我们来分析该函数。

int netif_rx(struct sk_buff *skb)
{
	int ret;
#if defined(CONFIG_RPS) && defined(CONFIG_RTL_USB_IP_HOST_SPEEDUP)
	struct iphdr *iph = (struct iphdr *)skb->data;
#endif

	/* if netpoll wants it, pretend we never saw it */
	if (netpoll_rx(skb))
		return NET_RX_DROP;

#if defined(CONFIG_RTL_ISP_MULTI_WAN_SUPPORT)
	if(skb->dev->priv_flags & IFF_RSMUX) {
		extern int rtl_smux_pkt_recv(struct sk_buff *skb, struct net_device *dev);
		//atomic_inc(&skb->users);
		rtl_smux_pkt_recv(skb, skb->dev);
		return NET_RX_SUCCESS;
	}
#endif

	net_timestamp_check(netdev_tstamp_prequeue, skb);

	trace_netif_rx(skb);
#ifdef CONFIG_RPS
	#if defined(CONFIG_RTL_USB_IP_HOST_SPEEDUP)
	if ((static_key_false(&rps_needed)) && iph && (iph->daddr!=_br0_ip))
	#else
	if (static_key_false(&rps_needed)) 
	#endif
	{
		struct rps_dev_flow voidflow, *rflow = &voidflow;
		int cpu;

		preempt_disable();
		rcu_read_lock();

		cpu = get_rps_cpu(skb->dev, skb, &rflow);
		if (cpu < 0)
			cpu = smp_processor_id();

		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);

		rcu_read_unlock();
		preempt_enable();
	} else
#endif
	{
		unsigned int qtail;
		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
		put_cpu();
	}
	return ret;
}

该函数的源码不长。需要我们注意的主要是分析enqueue_to_backlog函数。关于netif_rx的分析，我在这里贴一个连接https://blog.csdn.net/yldfree/article/details/84969862。

该链接中的函数分析实则是enqueue_to_backlog的实现。

if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
	if (!rps_ipi_queued(sd))
		____napi_schedule(sd, &sd->backlog);
}

在enqueue_to_backlog中会将数据添加到接收队列中，然后调用____napi_schedule来触发软中断

static inline void ____napi_schedule(struct softnet_data *sd,
				     struct napi_struct *napi)
{
	list_add_tail(&napi->poll_list, &sd->poll_list);
	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}

这里看到主要是将数据添加到poll_list。然后触发软中断。（非NAPI的方式和NAPI的方式都会调用____napi_schedule 它们是一套通用的流程。）

net_rx_action函数分析

从上面我们已经分析了数据的处理最终会调用NET_RX_SOFTIRQ软中断触发的函数，即net_rx_action。

我们先来看看net_rx_action的注册

static int __init net_dev_init(void)
{
	int i, rc = -ENOMEM;

	BUG_ON(!dev_boot_phase);

	if (dev_proc_init())
		goto out;

	if (netdev_kobject_init())
		goto out;

	INIT_LIST_HEAD(&ptype_all);
	for (i = 0; i < PTYPE_HASH_SIZE; i++)
		INIT_LIST_HEAD(&ptype_base[i]);

	INIT_LIST_HEAD(&offload_base);

	if (register_pernet_subsys(&netdev_net_ops))
		goto out;

	/*
	 *	Initialise the packet receive queues.
	 */

	for_each_possible_cpu(i) {
		struct softnet_data *sd = &per_cpu(softnet_data, i);

		memset(sd, 0, sizeof(*sd));
		skb_queue_head_init(&sd->input_pkt_queue);
		skb_queue_head_init(&sd->process_queue);
		sd->completion_queue = NULL;
		INIT_LIST_HEAD(&sd->poll_list);
		sd->output_queue = NULL;
		sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS
		sd->csd.func = rps_trigger_softirq;
		sd->csd.info = sd;
		sd->csd.flags = 0;
		sd->cpu = i;
#endif

		sd->backlog.poll = process_backlog;
		sd->backlog.weight = weight_p;
		sd->backlog.gro_list = NULL;
		sd->backlog.gro_count = 0;
	}

	dev_boot_phase = 0;

	/* The loopback device is special if any other network devices
	 * is present in a network namespace the loopback device must
	 * be present. Since we now dynamically allocate and free the
	 * loopback device ensure this invariant is maintained by
	 * keeping the loopback device as the first device on the
	 * list of network devices.  Ensuring the loopback devices
	 * is the first device that appears and the last network device
	 * that disappears.
	 */
	if (register_pernet_device(&loopback_net_ops))
		goto out;

	if (register_pernet_device(&default_device_ops))
		goto out;

	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
	open_softirq(NET_RX_SOFTIRQ, net_rx_action);

	hotcpu_notifier(dev_cpu_callback, 0);
	dst_init();
	rc = 0;
out:
	return rc;
}

我们可以看到net_rx_action在网络设备初始化的时候被注册了。这里也有另一个函数net_tx_action，我们可以猜想肯定是发送处理函数。也是通过软中断进行的触发。

来看看net_rx_action的源码

static void net_rx_action(struct softirq_action *h)
{
	struct softnet_data *sd = &__get_cpu_var(softnet_data);
	unsigned long time_limit = jiffies + 2;
	int budget = netdev_budget;
	void *have;

	local_irq_disable();

	while (!list_empty(&sd->poll_list)) {
		struct napi_struct *n;
		int work, weight;

		/* If softirq window is exhuasted then punt.
		 * Allow this to run for 2 jiffies since which will allow
		 * an average latency of 1.5/HZ.
		 */
		if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
			goto softnet_break;

		local_irq_enable();

		/* Even though interrupts have been re-enabled, this
		 * access is safe because interrupts can only add new
		 * entries to the tail of this list, and only ->poll()
		 * calls can remove this head entry from the list.
		 */
		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);

		have = netpoll_poll_lock(n);

		weight = n->weight;

		/* This NAPI_STATE_SCHED test is for avoiding a race
		 * with netpoll's poll_napi().  Only the entity which
		 * obtains the lock and sees NAPI_STATE_SCHED set will
		 * actually make the ->poll() call.  Therefore we avoid
		 * accidentally calling ->poll() when NAPI is not scheduled.
		 */
		work = 0;
		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
			work = n->poll(n, weight);
			trace_napi_poll(n);
		}

		WARN_ON_ONCE(work > weight);

		budget -= work;

		local_irq_disable();

		/* Drivers must not modify the NAPI state if they
		 * consume the entire weight.  In such cases this code
		 * still "owns" the NAPI instance and therefore can
		 * move the instance around on the list at-will.
		 */
		if (unlikely(work == weight)) {
			if (unlikely(napi_disable_pending(n))) {
				local_irq_enable();
				napi_complete(n);
				local_irq_disable();
			} else {
				if (n->gro_list) {
					/* flush too old packets
					 * If HZ < 1000, flush all packets.
					 */
					local_irq_enable();
					napi_gro_flush(n, HZ >= 1000);
					local_irq_disable();
				}
				list_move_tail(&n->poll_list, &sd->poll_list);
			}
		}

		netpoll_poll_unlock(have);
	}
out:
	net_rps_action_and_irq_enable(sd);

#ifdef CONFIG_NET_DMA
	/*
	 * There may not be any more sk_buffs coming right now, so push
	 * any pending DMA copies to hardware
	 */
	dma_issue_pending_all();
#endif

	return;

softnet_break:
	sd->time_squeeze++;
	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
	goto out;
}

1）关闭本地的中断
2）遍历poll_list链表，如果不为空就处理封包。
3）开启中断

现在最关键的一点是：处理函数到底是谁。这里通过代码分析可以看到，当从poll_list获取到节点后，会调用test_bit(NAPI_STATE_SCHED, &n->state))。然后根据返回结果，调用work = n->poll(n, weight)。(上文已经说了，接收软中断是NAPI和非NAPI都会进入的处理函数。所以这里的poll的处理对弈NAPI和非NAPI都会处理)

poll函数的注册

我们来看看poll函数的注册。在上面我们看到了NET_RX_SOFTIRQ软中断的注册。在同一个函数中有这样一句话sd->backlog.poll = process_backlog。

说明:每一个网络接口都会有一个poll_list和处理函数。
从上面可知，现在的数据包的处理又循环到了process_backlog函数。

process_backlog代码分析

static int process_backlog(struct napi_struct *napi, int quota)
{
	int work = 0;
	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);

#ifdef CONFIG_RPS
	/* Check if we have pending ipi, its better to send them now,
	 * not waiting net_rx_action() end.
	 */
	if (sd->rps_ipi_list) {
		local_irq_disable();
		net_rps_action_and_irq_enable(sd);
	}
#endif
	napi->weight = weight_p;
	local_irq_disable();
	while (work < quota) {
		struct sk_buff *skb;
		unsigned int qlen;

		while ((skb = __skb_dequeue(&sd->process_queue))) {
			#if defined(CONFIG_RTL_8198C)
			if((*((volatile unsigned long *)(0xb8000000)) & 0xFFF) == 0x0) {
				skb = (struct sk_buff *)((unsigned long)skb & ~0x20000000);
			}
			#endif
			rcu_read_lock();							
			local_irq_enable();
			__netif_receive_skb(skb);
			rcu_read_unlock();
			local_irq_disable();
			input_queue_head_incr(sd);
			if (++work >= quota) {
				local_irq_enable();
				return work;
			}
		}

		rps_lock(sd);
		qlen = skb_queue_len(&sd->input_pkt_queue);
		if (qlen)
			skb_queue_splice_tail_init(&sd->input_pkt_queue,
						   &sd->process_queue);

		if (qlen < quota - work) {
			/*
			 * Inline a custom version of __napi_complete().
			 * only current cpu owns and manipulates this napi,
			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
			 * we can use a plain write instead of clear_bit(),
			 * and we dont need an smp_mb() memory barrier.
			 */
			list_del(&napi->poll_list);
			napi->state = 0;

			quota = work + qlen;
		}
		rps_unlock(sd);
	}
	local_irq_enable();

	return work;
}

这里有几点需要说明

处理数据的时候都是先将本地中断关闭掉，处理完成之后再打开
处理的流程都是先从队列中获取一个封包，然后再调用处理函数。
__netif_receive_skb即为封包的处理函数。

__netif_receive_skb分析

关于__netif_receive_skb我就不再贴源码了。该函数最终会调用的是__netif_receive_skb_core函数。
说明：此函数非常重要，__netif_receive_skb_core当中会决定数据包的走向。二层桥处理代码也在该函数中。

说明：

__netif_receive_skb_core函数就是将数据包扔到了网络协议栈中。这一章的上面全是驱动层的东西。从__netif_receive_skb_core函数开始就已经将数据包扔到了二层(桥设备)。关于接下来的数据包到底是从二层转发还是直接上三层，我们在下一章进行讲解。

在分析流程的时候，大家可以通过文章中所说的关键函数来倒退流程。这样会简单的多。

好了，来贴中断方式的流程图吧：
在这里插入图片描述
流程图是精简版。如果画的太细，估计一天一夜都画不完。但是该流程完整的描述了一个驱动程序的扔包到协议栈的过程。

我的天，流程太复杂了。绕了好多好多圈子。而且我已经省略一些不重要的信息了。码字分析源代码，对于我而言也是充满的挑战。没事我们一条道路上走到黑，下一章继续加油

流程太长，我们在下一章博客再来讲解。请大家放心，关键的分析章节我都会贴流程图。后面也会贴总的流程图。

欢迎加入qq群:610849576 。个人能力有限，欢迎大家一起讨论。

未完待续

小小程序员ing

关注

2
点赞
踩
3

收藏

觉得还不错? 一键收藏
2
评论
论网卡数据是如何从驱动到桥接/ip层(网卡驱动中断分析-中断方式非NAPI)

前言在上一讲中我们分析了网卡驱动的注册流程和中断的注册。这里再回顾一下上一讲的注册中断的代码。在网卡的open函数中调用了request_irq函数注册了网卡中断。网卡一旦有数据到来就会进入该中断rc = request_irq(dev->irq, interrupt_isr, IRQF_DISABLED, dev->name, dev);linux内核接收帧方式在分析网卡中断之前，我们需要来了解一下，驱动接收网卡数据包的方式。(NAPI和中断方式)中断接收方式在老的2.5.x内核
复制链接

扫一扫

专栏目录