imx6ull 以太网

我要暴富

已于 2022-09-13 15:22:03 修改

阅读量1.1k

点赞数 2

文章标签：网络网络协议

于 2022-06-28 11:56:24 首次发布

本文链接：https://blog.csdn.net/weixin_48006170/article/details/124971934

版权

网络设备的状态

enum netdev_state_t {				
	__LINK_STATE_START,				
	__LINK_STATE_PRESENT,			
	__LINK_STATE_NOCARRIER,
	__LINK_STATE_LINKWATCH_PENDING,
	__LINK_STATE_DORMANT,
};

22.6.14 Enhanced buffer descriptors（增强型缓冲区描述符）

imx6ull 用下表来描述一个缓冲区。
data buffer pointer 保存着缓冲区的地址，缓冲区必须位于外置内存中。
在这里插入图片描述

Enhanced buffer descriptors可以与驱动中的struct bufdesc 对应。

struct bufdesc {
	__fec16	cbd_sc;		/* Control and status info */
	__fec16	cbd_datlen;	/* Data length */
	__fec32	cbd_bufaddr;	/* Buffer address */
};

imx6ull 的数据包存放形式

在这里插入图片描述

网络数据包在接收时网卡会将数据包dma 到内存的ring buffer中，中断通知驱动数据已到达；同样发送时，驱动将数据包写到ring buffer中，通知网卡将数据拷走。所以无论是在接收和发送时都要通过ring buffer暂时保存数据包，那么数据包在ring buffer中是怎么存放的。

ring buffer 的结构如图所示，由许多个struct bufdesc 组成。
对于接收队列的ring buffer来说，网卡会将数据不停地放入 bufdesc，直到放满为止，驱动则不断的从bufdesc 取出数据，直到空为止。
网卡和驱动就形成一个生产者和消费者的关系。

imx6ull 中数据包存到队列中的形式就是以struct bufdesc 结构，ring buffer的大小也是按照这个结构计算——一个队列中能个存放若干个struct bufdesc 结构体。

struct sk_buff是内核描述一个数据包的结构体，imx6ull 驱动把sk_buff->data 和bufdesc->bd_bufaddr 映射到同一块区域，当网卡将数据dma 到bufdesc->bd_bufaddr 时，内核就可以通过sk_buff->data 来获得数据包内容，一个skb 对应一个bufdesc。
在imx6ull 的mac 驱动中 rxq 会创建一个skb数组，元素的个数与ringbuffer 中的bufdesc 个数是一致的。

在这里插入图片描述

fec_probe

static int
fec_probe(struct platform_device *pdev)
{
	struct fec_enet_private *fep;
	struct fec_platform_data *pdata;
	struct net_device *ndev;
	int i, irq, ret = 0;
	struct resource *r;
	const struct of_device_id *of_id;
	static int dev_id;
	struct device_node *np = pdev->dev.of_node, *phy_node;
	int num_tx_qs;
	int num_rx_qs;

	fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs);

	/* Init network device */
	ndev = alloc_etherdev_mqs(sizeof(struct fec_enet_private) +
				  FEC_STATS_SIZE, num_tx_qs, num_rx_qs);
	if (!ndev)
		return -ENOMEM;

	SET_NETDEV_DEV(ndev, &pdev->dev);

	/* setup board info structure */
	fep = netdev_priv(ndev);

	of_id = of_match_device(fec_dt_ids, &pdev->dev);	//获取fec_dt_ids->data，保存这一些cpu 独有的特性
	if (of_id)
		pdev->id_entry = of_id->data;
	fep->quirks = pdev->id_entry->driver_data;	//特性保存到 quirks 

	fep->netdev = ndev;							//填充私有数据结构 fep
	fep->num_rx_queues = num_rx_qs;
	fep->num_tx_queues = num_tx_qs;


#if !defined(CONFIG_M5272)
	/* default enable pause frame auto negotiation */
	if (fep->quirks & FEC_QUIRK_HAS_GBIT)
		fep->pause_flag |= FEC_PAUSE_FLAG_AUTONEG;
#endif

	/* Select default pin state */
	pinctrl_pm_select_default_state(&pdev->dev);

	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);		//获取reg 资源，保存到fep->hwp
	fep->hwp = devm_ioremap_resource(&pdev->dev, r);
	if (IS_ERR(fep->hwp)) {
		ret = PTR_ERR(fep->hwp);
		goto failed_ioremap;
	}

	fep->pdev = pdev;
	fep->dev_id = dev_id++;

	platform_set_drvdata(pdev, ndev);			//设置driver data

	if ((of_machine_is_compatible("fsl,imx6q") ||
	     of_machine_is_compatible("fsl,imx6dl")) &&
	    !of_property_read_bool(np, "fsl,err006687-workaround-present"))
		fep->quirks |= FEC_QUIRK_ERR006687;

	if (of_get_property(np, "fsl,magic-packet", NULL))
		fep->wol_flag |= FEC_WOL_HAS_MAGIC_PACKET;

	phy_node = of_parse_phandle(np, "phy-handle", 0);	//获取phy 节点
	if (!phy_node && of_phy_is_fixed_link(np)) {
		ret = of_phy_register_fixed_link(np);
		if (ret < 0) {
			dev_err(&pdev->dev,
				"broken fixed-link specification\n");
			goto failed_phy;
		}
		phy_node = of_node_get(np);
	}
	fep->phy_node = phy_node;

	ret = of_get_phy_mode(pdev->dev.of_node);			//获取phy 接口模式
	if (ret < 0) {
		pdata = dev_get_platdata(&pdev->dev);
		if (pdata)
			fep->phy_interface = pdata->phy;
		else
			fep->phy_interface = PHY_INTERFACE_MODE_MII;
	} else {
		fep->phy_interface = ret;
	}

	fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg");		//获取各路时钟
	if (IS_ERR(fep->clk_ipg)) {
		ret = PTR_ERR(fep->clk_ipg);
		goto failed_clk;
	}

	......					//省略一部分关于时钟的代码
	ret = fec_enet_clk_enable(ndev, true);		//使能时钟
	if (ret)
		goto failed_clk;

	ret = clk_prepare_enable(fep->clk_ipg);
	if (ret)
		goto failed_clk_ipg;

	fep->reg_phy = devm_regulator_get(&pdev->dev, "phy");
	if (!IS_ERR(fep->reg_phy)) {
		ret = regulator_enable(fep->reg_phy);
		if (ret) {
			dev_err(&pdev->dev,
				"Failed to enable phy regulator: %d\n", ret);
			clk_disable_unprepare(fep->clk_ipg);
			goto failed_regulator;
		}
	} else {
		if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) {
			ret = -EPROBE_DEFER;
			goto failed_regulator;
		}
		fep->reg_phy = NULL;
	}

	pm_runtime_set_autosuspend_delay(&pdev->dev, FEC_MDIO_PM_TIMEOUT);
	pm_runtime_use_autosuspend(&pdev->dev);
	pm_runtime_get_noresume(&pdev->dev);
	pm_runtime_set_active(&pdev->dev);
	pm_runtime_enable(&pdev->dev);

	ret = fec_reset_phy(pdev);			//使用gpio 复位phy 芯片
	if (ret)
		goto failed_reset;

	if (fep->bufdesc_ex)
		fec_ptp_init(pdev);

	ret = fec_enet_init(ndev);	//申请txq、rxq 以及他们的bufdesc，初始化bufdesc、设置netdev_ops、ethtool_ops，添加napi 处理函数
	if (ret)					//fec_restart(ndev); 初始化一些mac 寄存器、初始化rxq 的bufdesc，释放了txq 的bufdesc、设置rxq，rxq ring_buffer在ddr 中的地址
		goto failed_init;

	for (i = 0; i < FEC_IRQ_NUM; i++) {
		irq = platform_get_irq(pdev, i);
		if (irq < 0) {
			if (i)
				break;
			ret = irq;
			goto failed_irq;
		}
		ret = devm_request_irq(&pdev->dev, irq, fec_enet_interrupt,	//注册中断处理函数
				       0, pdev->name, ndev);
		if (ret)
			goto failed_irq;

		fep->irq[i] = irq;
	}

	init_completion(&fep->mdio_done);
	ret = fec_enet_mii_init(pdev);  //读取dts mdio 节点，注册mdio 总线（用一个struct mii_bus 来描述，其中包含mdio 读写phy 的函数）
	if (ret)						//读取dts mdio节点下phy子节点，并注册phy_device
		goto failed_mii_init;

	/* Carrier starts down, phylib will bring it up */
	netif_carrier_off(ndev);		//通知内核不允许发送数据包
	fec_enet_clk_enable(ndev, false);
	pinctrl_pm_select_sleep_state(&pdev->dev);

	ret = register_netdev(ndev);	//注册网络设备
	if(ret)
		goto failed_register;

	device_init_wakeup(&ndev->dev, fep->wol_flag &
			   FEC_WOL_HAS_MAGIC_PACKET);

	if (fep->bufdesc_ex && fep->ptp_clock)
		netdev_info(ndev, "registered PHC device %d\n", fep->dev_id);

	fep->rx_copybreak = COPYBREAK_DEFAULT;
	INIT_WORK(&fep->tx_timeout_work, fec_enet_timeout_work);

	pm_runtime_mark_last_busy(&pdev->dev);
	pm_runtime_put_autosuspend(&pdev->dev);

	return 0;
}

alloc_etherdev_mqs

这里主要是做了以下事情：
① netdev的申请，以及一些以太网通用的设置ether_setup 函数中
② 私有数据内存（struct fec_enet_private）申请
③ net_device->netdev_rx_queue、net_device->netdev_queue 申请，这里的tx、rx 队列作用不太清楚；
imx6ull 驱动中使用fep->fec_enet_priv_tx_q、fep->fec_enet_priv_rx_q 来描述收发队列。

netdev和private的空间在一块内存中去申请，netdev前后都做了32字节的对齐。
在这里插入图片描述

//linux-5.4.47\net\ethernet\eth.c
struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
				      unsigned int rxqs)
{
	return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
				ether_setup, txqs, rxqs);		//ether_setup: 设置netdev 一些通用的值 
}

struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
		unsigned char name_assign_type,
		void (*setup)(struct net_device *),
		unsigned int txqs, unsigned int rxqs)
{
	struct net_device *dev;
	unsigned int alloc_size;
	struct net_device *p;

	BUG_ON(strlen(name) >= sizeof(dev->name));				//当设置名字的长度 > netdev->name 的空间长度时，BUG_ON(1),发生bug

	if (txqs < 1) {
		pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
		return NULL;
	}

	if (rxqs < 1) {
		pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
		return NULL;
	}
	alloc_size = sizeof(struct net_device);					//计算netdev 长度 alloc_size
	if (sizeof_priv) {
		/* ensure 32-byte alignment of private area */
		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);		//将netdev长度以32字节对齐
		alloc_size += sizeof_priv;							//alloc_size + 私有数据长度
	}
	/* ensure 32-byte alignment of whole construct */
	alloc_size += NETDEV_ALIGN - 1;							//alloc_size +32 -1

	p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
	if (!p)
		return NULL;

	dev = PTR_ALIGN(p, NETDEV_ALIGN);		//p以32 字节对齐，作为netdev的地址
	dev->padded = (char *)dev - (char *)p;	//netdev->padded 申请的内存首地址与netdev首地址之间做了多少填充

	dev->pcpu_refcnt = alloc_percpu(int);	
	if (!dev->pcpu_refcnt)
		goto free_dev;

	if (dev_addr_init(dev))[]				//初始化mac地址
		goto free_pcpu;

	dev_mc_init(dev);					
	dev_uc_init(dev);						

	dev_net_set(dev, &init_net);

	netdev_register_lockdep_key(dev);

	/*
		 TSO：全称是 TCP Segmentation Offload，利用网卡对大数据包进行自动分段处理，降低 CPU负载。
		 GSO：全称是 Generic Segmentation Offload，在发送数据之前先检查一下网卡是否支持 TSO，
		如果支持的话就让网卡分段，如果不支持的话就由协议栈进行分段处理，分段处理完成以后再
		交给网卡去发送。
	*/
	dev->gso_max_size = GSO_MAX_SIZE;
	dev->gso_max_segs = GSO_MAX_SEGS;
	dev->upper_level = 1;
	dev->lower_level = 1;

	INIT_LIST_HEAD(&dev->napi_list);				//初始化napi 链表
	INIT_LIST_HEAD(&dev->unreg_list);
	INIT_LIST_HEAD(&dev->close_list);
	INIT_LIST_HEAD(&dev->link_watch_list);
	INIT_LIST_HEAD(&dev->adj_list.upper);
	INIT_LIST_HEAD(&dev->adj_list.lower);
	INIT_LIST_HEAD(&dev->ptype_all);				//初始化ptype_all 链表，协议栈收包处理的时候会用到
	INIT_LIST_HEAD(&dev->ptype_specific);			//初始化ptype_specific 链表
#ifdef CONFIG_NET_SCHED
	hash_init(dev->qdisc_hash);
#endif
	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
	setup(dev);

	if (!dev->tx_queue_len) {						//队列长度设置
		dev->priv_flags |= IFF_NO_QUEUE;
		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
	}

	dev->num_tx_queues = txqs;
	dev->real_num_tx_queues = txqs;
	if (netif_alloc_netdev_queues(dev))	//tx 队列申请
		goto free_all;
	
	dev->num_rx_queues = rxqs;
	dev->real_num_rx_queues = rxqs;
	if (netif_alloc_rx_queues(dev))		//rx 队列申请
		goto free_all;

	strcpy(dev->name, name);
	dev->name_assign_type = name_assign_type;
	dev->group = INIT_NETDEV_GROUP;
	if (!dev->ethtool_ops)
		dev->ethtool_ops = &default_ethtool_ops;	//设置默认ethtool ops

	nf_hook_ingress_init(dev);

	return dev;

free_all:
	free_netdev(dev);
	return NULL;

free_pcpu:
	free_percpu(dev->pcpu_refcnt);
free_dev:
	netdev_freemem(dev);
	return NULL;
}

fec_enet_init

这里主要内容
① 分配txq、rxq ring buffer 的bufdesc内存
② 设置netdev_ops、设置ethtool_ops
③ 添加napi 处理函数
④ 设置一些特性quirks

static int fec_enet_init(struct net_device *ndev)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	struct bufdesc *cbd_base;					//缓冲区秒速符
	dma_addr_t bd_dma;
	int bd_size;
	unsigned int i;
	unsigned dsize = fep->bufdesc_ex ? sizeof(struct bufdesc_ex) : sizeof(struct bufdesc);	//计算缓冲描述符大小
	unsigned dsize_log2 = __fls(dsize);

	WARN_ON(dsize != (1 << dsize_log2));
#if defined(CONFIG_ARM)
	fep->rx_align = 0xf;
	fep->tx_align = 0xf;
#else
	fep->rx_align = 0x3;
	fep->tx_align = 0x3;
#endif

	fec_enet_alloc_queue(ndev);				//申请 fec_enet_priv_tx_q、fec_enet_priv_rx_q 并填充信息、dma申请tx tso 头部的大小txq->tso_hdrs
											//主要就是txq 和rxq 的ring_buffer 大小，ring_buffer 用来存储bufdesc，以struct bufdesc 为单位计算大小
											
	bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize;	//txq+rxq 总的bufdesc 大小

	/* Allocate memory for buffer descriptors. */
	cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma,		//dma 申请txq、rxq所有bufdesc 的内存
				       GFP_KERNEL);
	if (!cbd_base) {
		return -ENOMEM;
	}

	memset(cbd_base, 0, bd_size);

	/* Get the Ethernet address */
	fec_get_mac(ndev);
	/* make sure MAC we just acquired is programmed into the hw */
	fec_set_mac_address(ndev, NULL);

	/* Set receive and transmit descriptor base. */
	for (i = 0; i < fep->num_rx_queues; i++) {
		struct fec_enet_priv_rx_q *rxq = fep->rx_queue[i];
		unsigned size = dsize * rxq->bd.ring_size;	//rxq ring_buffer大小

		rxq->bd.qid = i;					//队列id
		rxq->bd.base = cbd_base;			//缓冲区描述符基地址：一个ring_buffer 中有多个bufdesc，cbd_base 代表第一个bufdesc 的地址
		rxq->bd.cur = cbd_base;				//当前可用的bufdesc 地址
		rxq->bd.dma = bd_dma;				//dma 物理地址
		rxq->bd.dsize = dsize;				//一个 bufdesc 的大小
		rxq->bd.dsize_log2 = dsize_log2;
		rxq->bd.reg_desc_active = fep->hwp + offset_des_active_rxq[i];	//ENETx_RDAR 寄存器地址
		bd_dma += size;						//移动dma 物理地址到txq
		cbd_base = (struct bufdesc *)(((void *)cbd_base) + size);	//rxq->bd.base +size 移动地址到ring buffer 的尾部
		rxq->bd.last = (struct bufdesc *)(((void *)cbd_base) - dsize);	//rxq 最后一个bufdesc
	}

	for (i = 0; i < fep->num_tx_queues; i++) {
		struct fec_enet_priv_tx_q *txq = fep->tx_queue[i];
		unsigned size = dsize * txq->bd.ring_size;

		txq->bd.qid = i;
		txq->bd.base = cbd_base;
		txq->bd.cur = cbd_base;
		txq->bd.dma = bd_dma;
		txq->bd.dsize = dsize;
		txq->bd.dsize_log2 = dsize_log2;
		txq->bd.reg_desc_active = fep->hwp + offset_des_active_txq[i];	ENETx_TDAR 寄存器地址
		bd_dma += size;
		cbd_base = (struct bufdesc *)(((void *)cbd_base) + size);
		txq->bd.last = (struct bufdesc *)(((void *)cbd_base) - dsize);
	}

	/* The FEC Ethernet specific entries in the device structure */
	ndev->watchdog_timeo = TX_TIMEOUT;				//发送超时时长
	ndev->netdev_ops = &fec_netdev_ops;				//设置netdev_ops
	ndev->ethtool_ops = &fec_enet_ethtool_ops;		//设置ethtool_ops

	writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK);			//设置中断掩码寄存
	writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK);			//设置中断掩码寄存器，允许中断发生
	netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, NAPI_POLL_WEIGHT);   //添加napi 处理函数

	if (fep->quirks & FEC_QUIRK_HAS_VLAN)				//添加vlan 特性
		/* enable hw VLAN support */
		ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;

	if (fep->quirks & FEC_QUIRK_HAS_CSUM) {
		ndev->gso_max_segs = FEC_MAX_TSO_SEGS;

		/* enable hw accelerator */					//硬件特性支持：数据包校验、TSO 等等		
		ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
				| NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO);
		fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
	}

	if (fep->quirks & FEC_QUIRK_HAS_AVB) {
		fep->tx_align = 0;
		fep->rx_align = 0x3f;
	}

	ndev->hw_features = ndev->features;		//硬件特性

	fec_restart(ndev);		//初始化一些mac 寄存器、初始化rxq 的bufdesc，释放了txq 的bufdesc、设置rxq，rxq ring_buffer在ddr 中的地址
	

	if (fep->quirks & FEC_QUIRK_MIB_CLEAR)
		fec_enet_clear_ethtool_stats(ndev);
	else
		fec_enet_update_ethtool_stats(ndev);

	return 0;
}

fec_enet_alloc_queue

dma 分配内存时物理地址一定要时连续的，且需要一致性DMA分配。
在这个函数指定了 ring buffer的大小（一个能容纳多少个struct bufdesc）

struct bufdesc_prop {
	int qid;
	/* Address of Rx and Tx buffers */
	struct bufdesc	*base;			//ring buffer中第一个bufdesc 的地址
	struct bufdesc	*last;			//ring buffer 中最后一个bufdesc 的地址
	struct bufdesc	*cur;			//ring buffer 中空闲的bufdesc 的地址
	void __iomem	*reg_desc_active;
	dma_addr_t	dma;
	unsigned short ring_size;
	unsigned char dsize;
	unsigned char dsize_log2;
};

struct fec_enet_priv_tx_q {
	struct bufdesc_prop bd;
	unsigned char *tx_bounce[TX_RING_SIZE];
	struct  sk_buff *tx_skbuff[TX_RING_SIZE];

	unsigned short tx_stop_threshold;
	unsigned short tx_wake_threshold;

	struct bufdesc	*dirty_tx;
	char *tso_hdrs;
	dma_addr_t tso_hdrs_dma;
};

struct fec_enet_priv_rx_q {
	struct bufdesc_prop bd;
	struct  sk_buff *rx_skbuff[RX_RING_SIZE];
};

static int fec_enet_alloc_queue(struct net_device *ndev)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	int i;
	int ret = 0;
	struct fec_enet_priv_tx_q *txq;

	for (i = 0; i < fep->num_tx_queues; i++) {
		txq = kzalloc(sizeof(*txq), GFP_KERNEL);			//fec_enet_priv_tx_q 用来描述一个fec 的发送队列
		if (!txq) {
			ret = -ENOMEM;
			goto alloc_failed;
		}

		fep->tx_queue[i] = txq;										//一个mac 可能有多个tx 队列
		txq->bd.ring_size = TX_RING_SIZE;							//队列大小
		fep->total_tx_ring_size += fep->tx_queue[i]->bd.ring_size;	//所有tx 队列大小总和

		txq->tx_stop_threshold = FEC_MAX_SKB_DESCS;					//TSO 分片大小
		txq->tx_wake_threshold = (txq->bd.ring_size - txq->tx_stop_threshold) / 2;



		txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev,			//dma申请 tso 头部的大小txq->tso_hdrs
					txq->bd.ring_size * TSO_HEADER_SIZE,
					&txq->tso_hdrs_dma,
					GFP_KERNEL);
		if (!txq->tso_hdrs) {
			ret = -ENOMEM;
			goto alloc_failed;
		}
	}

	for (i = 0; i < fep->num_rx_queues; i++) {
		fep->rx_queue[i] = kzalloc(sizeof(*fep->rx_queue[i]),	//fec_enet_priv_rx_q 用来描述一个fec 的接收队列	
					   GFP_KERNEL);
		if (!fep->rx_queue[i]) {
			ret = -ENOMEM;
			goto alloc_failed;
		}
	
		fep->rx_queue[i]->bd.ring_size = RX_RING_SIZE;
		fep->total_rx_ring_size += fep->rx_queue[i]->bd.ring_size;
	}
	return ret;

alloc_failed:
	fec_enet_free_queue(ndev);
	return ret;
}

fec_restart

static void
fec_restart(struct net_device *ndev)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	u32 val;
	u32 temp_mac[2];
	u32 rcntl = OPT_FRAME_SIZE | 0x04;
	u32 ecntl = 0x2; /* ETHEREN */

	if (fep->quirks & FEC_QUIRK_HAS_AVB) {
		writel(0, fep->hwp + FEC_ECNTRL);
	} else {
		writel(1, fep->hwp + FEC_ECNTRL);
		udelay(10);
	}

	/*
	 * enet-mac reset will reset mac address registers too,
	 * so need to reconfigure it.
	 */
	memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN);
	writel((__force u32)cpu_to_be32(temp_mac[0]),
	       fep->hwp + FEC_ADDR_LOW);
	writel((__force u32)cpu_to_be32(temp_mac[1]),
	       fep->hwp + FEC_ADDR_HIGH);

	/* Clear any outstanding interrupt. */
	writel(0xffffffff, fep->hwp + FEC_IEVENT);			//清除所有中断事件

	fec_enet_bd_init(ndev);			//初始化rxq  的bufdesc，释放了txq 的bufdesc

	fec_enet_enable_ring(ndev);		//设置rxq，rxq ring_buffer在ddr 中的地址

	/* Reset tx SKB buffers. */
	fec_enet_reset_skb(ndev);

	/* Enable MII mode */
	if (fep->full_duplex == DUPLEX_FULL) {				//设置全双工
		/* FD enable */
		writel(0x04, fep->hwp + FEC_X_CNTRL);
	} else {
		/* No Rcv on Xmit */
		rcntl |= 0x02;
		writel(0x0, fep->hwp + FEC_X_CNTRL);
	}

	/* Set MII speed */
	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);	//设置速度

#if !defined(CONFIG_M5272)
	if (fep->quirks & FEC_QUIRK_HAS_RACC) {
		val = readl(fep->hwp + FEC_RACC);
		/* align IP header */
		val |= FEC_RACC_SHIFT16;
		if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
			/* set RX checksum */
			val |= FEC_RACC_OPTIONS;
		else
			val &= ~FEC_RACC_OPTIONS;
		writel(val, fep->hwp + FEC_RACC);
		writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_FTRL);
	}
#endif

	if (fep->quirks & FEC_QUIRK_ENET_MAC) {		//设置接口模式、速度
		/* Enable flow control and length check */
		rcntl |= 0x40000000 | 0x00000020;

		/* RGMII, RMII or MII */
		if (fep->phy_interface == PHY_INTERFACE_MODE_RGMII ||
		    fep->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
		    fep->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID ||
		    fep->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID)
			rcntl |= (1 << 6);
		else if (fep->phy_interface == PHY_INTERFACE_MODE_RMII)
			rcntl |= (1 << 8);
		else
			rcntl &= ~(1 << 8);

		/* 1G, 100M or 10M */
		if (ndev->phydev) {
			if (ndev->phydev->speed == SPEED_1000)
				ecntl |= (1 << 5);
			else if (ndev->phydev->speed == SPEED_100)
				rcntl &= ~(1 << 9);
			else
				rcntl |= (1 << 9);
		}
	} else {
#ifdef FEC_MIIGSK_ENR
	if (fep->quirks & FEC_QUIRK_USE_GASKET) {
			u32 cfgr;
			/* disable the gasket and wait */
			writel(0, fep->hwp + FEC_MIIGSK_ENR);
			while (readl(fep->hwp + FEC_MIIGSK_ENR) & 4)
				udelay(1);

			/*
			 * configure the gasket:
			 *   RMII, 50 MHz, no loopback, no echo
			 *   MII, 25  MHz, no loopback, no echo
			 */
			cfgr = (fep->phy_interface == PHY_INTERFACE_MODE_RMII)
				? BM_MIIGSK_CFGR_RMII : BM_MIIGSK_CFGR_MII;
			if (ndev->phydev && ndev->phydev->speed == SPEED_10)
				cfgr |= BM_MIIGSK_CFGR_FRCONT_10M;
			writel(cfgr, fep->hwp + FEC_MIIGSK_CFGR);

			/* re-enable the gasket */
			writel(2, fep->hwp + FEC_MIIGSK_ENR);
		}
#endif
	}

#if !defined(CONFIG_M5272)
	/* enable pause frame*/
	if ((fep->pause_flag & FEC_PAUSE_FLAG_ENABLE) ||
	    ((fep->pause_flag & FEC_PAUSE_FLAG_AUTONEG) &&
	     ndev->phydev && ndev->phydev->pause)) {
		rcntl |= FEC_ENET_FCE;

		/* set FIFO threshold parameter to reduce overrun */
		writel(FEC_ENET_RSEM_V, fep->hwp + FEC_R_FIFO_RSEM);
		writel(FEC_ENET_RSFL_V, fep->hwp + FEC_R_FIFO_RSFL);
		writel(FEC_ENET_RAEM_V, fep->hwp + FEC_R_FIFO_RAEM);
		writel(FEC_ENET_RAFL_V, fep->hwp + FEC_R_FIFO_RAFL);

		/* OPD */
		writel(FEC_ENET_OPD_V, fep->hwp + FEC_OPD);
	} else {
		rcntl &= ~FEC_ENET_FCE;
	}
#endif /* !defined(CONFIG_M5272) */

	writel(rcntl, fep->hwp + FEC_R_CNTRL);

	/* Setup multicast filter. */
	set_multicast_list(ndev);
#ifndef CONFIG_M5272
	writel(0, fep->hwp + FEC_HASH_TABLE_HIGH);
	writel(0, fep->hwp + FEC_HASH_TABLE_LOW);
#endif

	if (fep->quirks & FEC_QUIRK_ENET_MAC) {
		/* enable ENET endian swap */
		ecntl |= (1 << 8);
		/* enable ENET store and forward mode */
		writel(1 << 8, fep->hwp + FEC_X_WMRK);
	}

	if (fep->bufdesc_ex)
		ecntl |= (1 << 4);

#ifndef CONFIG_M5272
	/* Enable the MIB statistic event counters */
	writel(0 << 31, fep->hwp + FEC_MIB_CTRLSTAT);
#endif

	/* And last, enable the transmit and receive processing */
	writel(ecntl, fep->hwp + FEC_ECNTRL);
	fec_enet_active_rxring(ndev);					//激活接收描述符 Receive Descriptor Active

	if (fep->bufdesc_ex)
		fec_ptp_start_cyclecounter(ndev);

	/* Enable interrupts we wish to service */
	if (fep->link)
		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
	else
		writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);

	/* Init the interrupt coalescing */
	fec_enet_itr_coal_init(ndev);

}

fec_enet_bd_init

队列中的缓冲区描述符初始化

/* Init RX & TX buffer descriptors
 */
static void fec_enet_bd_init(struct net_device *dev)
{
	struct fec_enet_private *fep = netdev_priv(dev);
	struct fec_enet_priv_tx_q *txq;
	struct fec_enet_priv_rx_q *rxq;
	struct bufdesc *bdp;
	unsigned int i;
	unsigned int q;

	for (q = 0; q < fep->num_rx_queues; q++) {		//遍历所有rxq
		/* Initialize the receive buffer descriptors. */
		rxq = fep->rx_queue[q];
		bdp = rxq->bd.base;

		for (i = 0; i < rxq->bd.ring_size; i++) {			//遍历rxq 中所有bufdesc

			/* Initialize the BD for every fragment in the page. */
			if (bdp->cbd_bufaddr)									//初始化缓冲区描述符控制和状态信息
				bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);		//描述符状态设为空
			else
				bdp->cbd_sc = cpu_to_fec16(0);
			bdp = fec_enet_get_nextdesc(bdp, &rxq->bd);
		}

		/* Set the last buffer to wrap */
		bdp = fec_enet_get_prevdesc(bdp, &rxq->bd);			//设置rxq 最后一个bufdesc 标志
		bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);

		rxq->bd.cur = rxq->bd.base;							//将空闲 bufdesc 位置移到初始地址
	}

	for (q = 0; q < fep->num_tx_queues; q++) {
		/* ...and the same for transmit */
		txq = fep->tx_queue[q];
		bdp = txq->bd.base;
		txq->bd.cur = bdp;

		for (i = 0; i < txq->bd.ring_size; i++) {			//释放txq 缓冲区描述符
			/* Initialize the BD for every fragment in the page. */
			bdp->cbd_sc = cpu_to_fec16(0);
			if (bdp->cbd_bufaddr &&
			    !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
				dma_unmap_single(&fep->pdev->dev,
						 fec32_to_cpu(bdp->cbd_bufaddr),
						 fec16_to_cpu(bdp->cbd_datlen),
						 DMA_TO_DEVICE);
			if (txq->tx_skbuff[i]) {
				dev_kfree_skb_any(txq->tx_skbuff[i]);
				txq->tx_skbuff[i] = NULL;
			}
			bdp->cbd_bufaddr = cpu_to_fec32(0);
			bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
		}

		/* Set the last buffer to wrap */
		bdp = fec_enet_get_prevdesc(bdp, &txq->bd);
		bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
		txq->dirty_tx = bdp;
	}
}

fec_enet_enable_ring 使能队列：将队列首地址写入ENETx_RDSR 寄存器。
imx系列最多支持3个队列，imx6ull 只支持1个队列。
在这里插入图片描述

static void fec_enet_enable_ring(struct net_device *ndev)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	struct fec_enet_priv_tx_q *txq;
	struct fec_enet_priv_rx_q *rxq;
	int i;

	for (i = 0; i < fep->num_rx_queues; i++) {	//imx系列最多支持3个接收队列，imx6ull只支持1个，队列0的起始地址保存在寄存器FEC_R_DES_START_0（ENETx_RDSR）
		rxq = fep->rx_queue[i];
		writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i));		//设置rxq ring_buffer在ddr 中的地址（dma 物理地址），ENETx_RDSR
		writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));	

		/* enable DMA1/2 */
		if (i)
			writel(RCMR_MATCHEN | RCMR_CMP(i),		//多队列情况下，使能其它dma通道
			       fep->hwp + FEC_RCMR(i));
	}

	for (i = 0; i < fep->num_tx_queues; i++) {
		txq = fep->tx_queue[i];
		writel(txq->bd.dma, fep->hwp + FEC_X_DES_START(i));		//设置txq ring_buffer在ddr 中的地址，ENETx_TDSR 

		/* enable DMA1/2 */
		if (i)
			writel(DMA_CLASS_EN | IDLE_SLOPE(i),
			       fep->hwp + FEC_DMA_CFG(i));
	}
}

fec_enet_mii_init

注册mdio 总线，Linux 驱动使用struct mii_bus 来描述一个mdio总线。
其中mii_bus 最重要的成员就是mii_bus->read 和 mii_bus->write，他们用来读写phy寄存器。这两个函数由nxp 编写，因为每个cpu它的控制器都不尽相同，需要厂家自己编写。
从设备树读取mdio 设备并注册，读取mdio 子节点中的phy 设备，注册phy设备，phy设备使用struct phy_device 来描述。

static int fec_enet_mii_init(struct platform_device *pdev)
{
	static struct mii_bus *fec0_mii_bus;
	static bool *fec_mii_bus_share;
	struct net_device *ndev = platform_get_drvdata(pdev);
	struct fec_enet_private *fep = netdev_priv(ndev);
	struct device_node *node;
	int err = -ENXIO;
	u32 mii_speed, holdtime;

	...... 省略一些代码

	fep->mii_bus = mdiobus_alloc();
	if (fep->mii_bus == NULL) {
		err = -ENOMEM;
		goto err_out;
	}

	fep->mii_bus->name = "fec_enet_mii_bus";
	fep->mii_bus->read = fec_enet_mdio_read;		//mii 读/写 函数设置，（读写phy芯片）
	fep->mii_bus->write = fec_enet_mdio_write;
	snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
		pdev->name, fep->dev_id + 1);
	fep->mii_bus->priv = fep;
	fep->mii_bus->parent = &pdev->dev;

	node = of_get_child_by_name(pdev->dev.of_node, "mdio");	//获取mdio节点
	err = of_mdiobus_register(fep->mii_bus, node);			//向内核注册mdio
	of_node_put(node);
	if (err)
		goto err_out_free_mdiobus;

	mii_cnt++;

	/* save fec0 mii_bus */
	if (fep->quirks & FEC_QUIRK_SINGLE_MDIO) {
		fec0_mii_bus = fep->mii_bus;
		fec_mii_bus_share = &fep->mii_bus_share;
	}

	return 0;

err_out_free_mdiobus:
	mdiobus_free(fep->mii_bus);
err_out:
	return err;
}

of_mdiobus_register

mdiobus_register(mdio); 向内核注册mii_bus 总线；
for_each_available_child_of_node(np, child) {} 查找设备树mdio节点下所有子节点——phy节点。
addr = of_mdio_parse_addr(&mdio->dev, child); 解析phy节点，获取节点下reg的值——phy地址。
of_mdiobus_register_phy(mdio, child, addr); 注册phy设备。

//linux-5.4.47\drivers\of\of_mdio.c
int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
{
	struct device_node *child;
	bool scanphys = false;
	int addr, rc;

	if (!np)
		return mdiobus_register(mdio);

	/* Do not continue if the node is disabled */
	if (!of_device_is_available(np))
		return -ENODEV;

	/*Register the MDIO bus */
	rc = mdiobus_register(mdio);
	if (rc)
		return rc;

	/* Loop over the child nodes and register a phy_device for each phy */
	for_each_available_child_of_node(np, child) {
		addr = of_mdio_parse_addr(&mdio->dev, child);
		if (addr < 0) {
			scanphys = true;
			continue;
		}

		if (of_mdiobus_child_is_phy(child))
			rc = of_mdiobus_register_phy(mdio, child, addr);
		else
			rc = of_mdiobus_register_device(mdio, child, addr);

		if (rc == -ENODEV)
			dev_err(&mdio->dev,
				"MDIO device at address %d is missing.\n",
				addr);
		else if (rc)
			goto unregister;
	}

	if (!scanphys)
		return 0;

	/* auto scan for PHYs with empty reg property */
	for_each_available_child_of_node(np, child) {
		/* Skip PHYs with reg property set */
		if (of_find_property(child, "reg", NULL))
			continue;

		for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
			/* skip already registered PHYs */
			if (mdiobus_is_registered_device(mdio, addr))
				continue;

			/* be noisy to encourage people to set reg property */
			dev_info(&mdio->dev, "scan phy %pOFn at address %i\n",
				 child, addr);

			if (of_mdiobus_child_is_phy(child)) {
				rc = of_mdiobus_register_phy(mdio, child, addr);
				if (rc && rc != -ENODEV)
					goto unregister;
				break;
			}
		}
	}

	return 0;

unregister:
	mdiobus_unregister(mdio);
	return rc;
}
EXPORT_SYMBOL(of_mdiobus_register);

of_mdiobus_register_phy

注册phy设备

static int of_mdiobus_register_phy(struct mii_bus *mdio,
				    struct device_node *child, u32 addr)
{
	struct phy_device *phy;
	bool is_c45;
	int rc;
	u32 phy_id;

	is_c45 = of_device_is_compatible(child,
					 "ethernet-phy-ieee802.3-c45");
					 
	if (!is_c45 && !of_get_phy_id(child, &phy_id))
		phy = phy_device_create(mdio, addr, phy_id, 0, NULL);
	else
		phy = get_phy_device(mdio, addr, is_c45);
	if (IS_ERR(phy))
		return PTR_ERR(phy);

	rc = of_irq_get(child, 0);
	if (rc == -EPROBE_DEFER) {
		phy_device_free(phy);
		return rc;
	}
	if (rc > 0) {
		phy->irq = rc;
		mdio->irq[addr] = rc;
	} else {
		phy->irq = mdio->irq[addr];
	}

	if (of_property_read_bool(child, "broken-turn-around"))
		mdio->phy_ignore_ta_mask |= 1 << addr;

	of_property_read_u32(child, "reset-assert-us",
			     &phy->mdio.reset_assert_delay);

	/* Associate the OF node with the device structure so it
	 * can be looked up later */
	of_node_get(child);
	phy->mdio.dev.of_node = child;
	phy->mdio.dev.fwnode = of_fwnode_handle(child);

	/* All data is now stored in the phy struct;
	 * register it */
	rc = phy_device_register(phy);
	if(rc) {
		phy_device_free(phy);
		of_node_put(child);
		return rc;
	}

	dev_dbg(&mdio->dev, "registered phy %pOFn at address %i\n",
		child, addr);
	return 0;
}

phy = phy_device_create(mdio, addr, phy_id, 0, NULL); 创建phy设备，返回的是struct phy_device 类型，Linux内核用struct phy_device来描述一个phy设备。
rc = phy_device_register(phy); 注册phy设备。

net_device_ops

Linux 以太网驱动会向上层提供net_device_ops ，方便应用层控制网卡。

static const struct net_device_ops fec_netdev_ops = {
	.ndo_open		= fec_enet_open,
	.ndo_stop		= fec_enet_close,
	.ndo_start_xmit		= fec_enet_start_xmit,
	.ndo_set_rx_mode	= set_multicast_list,
	.ndo_validate_addr	= eth_validate_addr,
	.ndo_tx_timeout		= fec_timeout,
	.ndo_set_mac_address	= fec_set_mac_address,
	.ndo_do_ioctl		= fec_enet_ioctl,
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller	= fec_poll_controller,
#endif
	.ndo_set_features	= fec_set_features,
};

fec_enet_open

在fec_enet_open 中主要做了以下几点：

分配rxq bufdesc.bd_bufaddr、rxq skb数组; 分配 txq->tx_bounce 数组
初始化mac ：设置一些mac 寄存器；初始化rxq bufdesc（将所有缓冲区恢复空状态）；清除所有中断事件等等
连接phy设备，将phy_device 附着在netdev上，完成后会在phy_init_hw() 调用phy驱动初始化phy；启动phy 状态机
使能napi，允许napi调度
打开所有发送队列，允许发送

static int
fec_enet_open(struct net_device *ndev)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	int ret;

	ret = pm_runtime_get_sync(&fep->pdev->dev);
	if (ret < 0)
		return ret;

	pinctrl_pm_select_default_state(&fep->pdev->dev);
	ret = fec_enet_clk_enable(ndev, true);
	if (ret)
		goto clk_enable;

	/* I should reset the ring buffers here, but I don't yet know
	 * a simple way to do that.
	 */

	ret = fec_enet_alloc_buffers(ndev);		//dma 动态分配rxq bufdesc.bd_bufaddr、rxq skb数组; 分配 txq->tx_bounce[] ，接收数据和发送时真正存放数据包的内存地址
	if (ret)
		goto err_enet_alloc;

	/* Init MAC prior to mii bus probe */
	fec_restart(ndev);						//初始化mac ：设置一些mac 寄存器；初始化rxq bufdesc（将所有缓冲区恢复空状态）；清除所有中断事件等等

	/* Probe and connect to PHY when open the interface */
	ret = fec_enet_mii_probe(ndev);			//连接phy设备，将phy_device 附着在netdev上，完成后会在phy_init_hw() 调用phy驱动初始化phy；//启动phy 状态机
	if (ret)
		goto err_enet_mii_probe;

	if (fep->quirks & FEC_QUIRK_ERR006687)
		imx6q_cpuidle_fec_irqs_used();

	napi_enable(&fep->napi);				//使能napi，允许napi调度
	phy_start(ndev->phydev);
	netif_tx_start_all_queues(ndev);		//打开所有发送队列，允许发送

	device_set_wakeup_enable(&ndev->dev, fep->wol_flag &
				 FEC_WOL_FLAG_ENABLE);

	return 0;
}

fec_enet_alloc_buffers

static int fec_enet_alloc_buffers(struct net_device *ndev)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	unsigned int i;

	for (i = 0; i < fep->num_rx_queues; i++)
		if (fec_enet_alloc_rxq_buffers(ndev, i))	//分配rxq bufdesc->cbd_bufaddr，分配rxq->rx_skbuff[]
			return -ENOMEM;

	for (i = 0; i < fep->num_tx_queues; i++)
		if (fec_enet_alloc_txq_buffers(ndev, i))	//txq->tx_bounce[]   这个应该是发送时存放数据包的内存地址
			return -ENOMEM;
	return 0;
}

fec_enet_alloc_rxq_buffers
保存网卡传输数据的缓冲区应该使用流式的DMA buffer；网卡驱动和网卡DMA控制器往往是通过一些内存中的描述符（形成环或者链）进行交互，这些保存描述符的memory一般采用一致性dma 映射。
dma 流式分配传输缓冲区的内存给存储数据的skb->data，和bufdesc->cbd_bufaddr；前者保存的是dma地址，应该是给设备使用的，后者是虚拟地址，供驱动使用。

static int
fec_enet_alloc_rxq_buffers(struct net_device *ndev, unsigned int queue)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	unsigned int i;
	struct sk_buff *skb;
	struct bufdesc	*bdp;
	struct fec_enet_priv_rx_q *rxq;

	rxq = fep->rx_queue[queue];
	bdp = rxq->bd.base;
	for (i = 0; i < rxq->bd.ring_size; i++) {
		skb = netdev_alloc_skb(ndev, FEC_ENET_RX_FRSIZE);		//申请skb
		if (!skb)
			goto err_alloc;

		if (fec_enet_new_rxbdp(ndev, bdp, skb)) {	//dma 动态分配bufdesc->cbd_bufaddr 内存，同时与skb->data绑定；一个skb 对应一个bufdesc
			dev_kfree_skb(skb);						//设备将数据dma到bufdesc->cbd_bufaddr，内核便可以从skb->data 获取到数据包
			goto err_alloc;
		}

		rxq->rx_skbuff[i] = skb;					//rxq->rx_skbuff[] 数组的大小和 ring_buffer 中bufdesc 的数量一致
		bdp->cbd_sc = cpu_to_fec16(BD_ENET_RX_EMPTY);

		if (fep->bufdesc_ex) {
			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
			ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
		}

		bdp = fec_enet_get_nextdesc(bdp, &rxq->bd);
	}

	/* Set the last buffer to wrap. */
	bdp = fec_enet_get_prevdesc(bdp, &rxq->bd);
	bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);
	return 0;

 err_alloc:
	fec_enet_free_buffers(ndev);
	return -ENOMEM;
}

fec_enet_new_rxbdp

static int
fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct sk_buff *skb)
{
	struct  fec_enet_private *fep = netdev_priv(ndev);
	int off;

	off = ((unsigned long)skb->data) & fep->rx_align;
	if (off)
		skb_reserve(skb, fep->rx_align + 1 - off);	//调整skb 缓冲区头部大小，skb头部=skb->head - skb->data

	//分配bdp->cbd_bufaddr 内存，同时skb->data 也指向这段内存。说明bufdesc 已经和skb 绑定，一个skb 对应一个bufdesc
	//dma 动态映射需要指明方向：DMA_FROM_DEVICE，从device dma到ddr
	bdp->cbd_bufaddr = cpu_to_fec32(dma_map_single(&fep->pdev->dev, skb->data, FEC_ENET_RX_FRSIZE - fep->rx_align, DMA_FROM_DEVICE));
	if (dma_mapping_error(&fep->pdev->dev, fec32_to_cpu(bdp->cbd_bufaddr))) {
		if (net_ratelimit())
			netdev_err(ndev, "Rx DMA memory map failed\n");
		return -ENOMEM;
	}

	return 0;
}

fec_enet_alloc_txq_buffers

static int
fec_enet_alloc_txq_buffers(struct net_device *ndev, unsigned int queue)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	unsigned int i;
	struct bufdesc  *bdp;
	struct fec_enet_priv_tx_q *txq;

	txq = fep->tx_queue[queue];
	bdp = txq->bd.base;
	for (i = 0; i < txq->bd.ring_size; i++) {
		txq->tx_bounce[i] = kmalloc(FEC_ENET_TX_FRSIZE, GFP_KERNEL);		//txq->tx_bounce[]   这个应该是发送时存放数据包的内存地址
		if (!txq->tx_bounce[i])
			goto err_alloc;

		bdp->cbd_sc = cpu_to_fec16(0);
		bdp->cbd_bufaddr = cpu_to_fec32(0);

		if (fep->bufdesc_ex) {
			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
			ebdp->cbd_esc = cpu_to_fec32(BD_ENET_TX_INT);
		}

		bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
	}

	/* Set the last buffer to wrap. */
	bdp = fec_enet_get_prevdesc(bdp, &txq->bd);
	bdp->cbd_sc |= cpu_to_fec16(BD_SC_WRAP);

	return 0;

 err_alloc:
	fec_enet_free_buffers(ndev);
	return -ENOMEM;
}

fec_enet_mii_probe

of_phy_connect 的调用
of_phy_connect 会调用到phy_attach_direct，在其中绑定了phydev 与netdev，并且在phy_init_hw 中会调用到phy驱动。

static int fec_enet_mii_probe(struct net_device *ndev)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	struct phy_device *phy_dev = NULL;
	char mdio_bus_id[MII_BUS_ID_SIZE];
	char phy_name[MII_BUS_ID_SIZE + 3];
	int phy_id;
	int dev_id = fep->dev_id;

	if (fep->phy_node) {
		phy_dev = of_phy_connect(ndev, fep->phy_node,	//连接phy设备，将phy_device 附着在netdev上，完成后会在phy_init_hw() 调用phy驱动初始化phy；
					 &fec_enet_adjust_link, 0,			//启动phy 状态机，phy 状态机(其实就是一个工作队列，不断地监测phy link状态)
					 fep->phy_interface);
		if (!phy_dev) {
			netdev_err(ndev, "Unable to connect to phy\n");
			return -ENODEV;
		}
	} else {
		/* check for attached phy */
		for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) {
			if (!mdiobus_is_registered_device(fep->mii_bus, phy_id))
				continue;
			if (dev_id--)
				continue;
			strlcpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE);
			break;
		} 

		if (phy_id >= PHY_MAX_ADDR) {
			netdev_info(ndev, "no PHY, assuming direct connection to switch\n");
			strlcpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE);
			phy_id = 0;
		}
		snprintf(phy_name, sizeof(phy_name),
			 PHY_ID_FMT, mdio_bus_id, phy_id);
		phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link,
				      fep->phy_interface);
	}
	if (IS_ERR(phy_dev)) {
		netdev_err(ndev, "could not attach to PHY\n");
		return PTR_ERR(phy_dev);
	}

	/* mask with MAC supported features */
	if (fep->quirks & FEC_QUIRK_HAS_GBIT) {
		phy_set_max_speed(phy_dev, 1000);
		phy_remove_link_mode(phy_dev,
				     ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
#if !defined(CONFIG_M5272)
		phy_support_sym_pause(phy_dev);
#endif
	}
	else
		phy_set_max_speed(phy_dev, 100);

	fep->link = 0;
	fep->full_duplex = 0;

	phy_attached_info(phy_dev);

	return 0;
}

fec_enet_close

fec_enet_close 的内容与open 相反：

禁用napi
netif_tx_stop_queue 停止发送队列
fec_stop mac一些寄存器设置
phy_disconnect 断开phydev 与netdev的关系
fec_enet_free_buffers 释放txq、rxq 缓冲区

static int
fec_enet_close(struct net_device *ndev)
{
	struct fec_enet_private *fep = netdev_priv(ndev);

	phy_stop(ndev->phydev);

	if (netif_device_present(ndev)) {
		napi_disable(&fep->napi);			//禁用napi
		netif_tx_disable(ndev);				//禁用发送队列
		fec_stop(ndev);						//关闭mac
	}

	phy_disconnect(ndev->phydev);			//停止phy 状态机；断开netdev 与phy_device，取消两者附着关系

	if (fep->quirks & FEC_QUIRK_ERR006687)
		imx6q_cpuidle_fec_irqs_unused();

	fec_enet_update_ethtool_stats(ndev);

	fec_enet_clk_enable(ndev, false);			//禁用mac 时钟
	pinctrl_pm_select_sleep_state(&fep->pdev->dev);
	pm_runtime_mark_last_busy(&fep->pdev->dev);
	pm_runtime_put_autosuspend(&fep->pdev->dev);

	fec_enet_free_buffers(ndev);			//释放rxq bufdesc.bd_bufaddr、rxq skb数组; 分配 txq->tx_bounce[]

	return 0;
}

网络收包流程

Linux 网络收包采用napi 技术，napi——中断+轮询。
在网卡将数据搬运到内存中的环形缓冲区后，会发出硬件中断通知cpu 数据到达，cpu跳入中断处理函数，会根据中断事件类型分别处理，当类型为接收数据时，启动napi 调度，napi 会将网卡驱动中的napi_poll （各种网卡的设计不同，各自的收包程序也会不同）添加到cpu 的网络软中断私有数据中，并标记网络接收的软中断。
软中断线程会不断判断是否有软中断标志位被标记，每一个标记位对应一个实例函数（这些函数是在内核启动时与标志位绑定），一旦发现标记软中断线程便会跳入该处理函数，也就是在这个处理函数中调用网卡驱动的napi_poll 开始真正的收包流程。
在这里插入图片描述
napi 调用流程

//在网卡驱动中会注册中断 与 napi（一般在初始化时，probe 函数中）
ret = devm_request_irq(&pdev->dev, irq, fec_enet_interrupt,
				       0, pdev->name, ndev);
				       
netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, NAPI_POLL_WEIGHT);

/*
中断处理函数负责的事
1、读取中断事件
2、清除中断标志位
3、禁用napi 中断
4、将napi_struct->poll_list 添加到softirq_data->poll_list
5、napi 调度
*/
static irqreturn_t fec_enet_interrupt(int irq, void *dev_id)
{
	struct net_device *ndev = dev_id;
	struct fec_enet_private *fep = netdev_priv(ndev);
	uint int_events;
	irqreturn_t ret = IRQ_NONE;

	int_events = readl(fep->hwp + FEC_IEVENT);				//读取中断事件
	writel(int_events, fep->hwp + FEC_IEVENT);				//清除中断标志位
	fec_enet_collect_events(fep, int_events);				//此处根据事件标记fep->work_rx 或fep->work_tx

	if ((fep->work_tx || fep->work_rx) && fep->link) {
		ret = IRQ_HANDLED;

		if (napi_schedule_prep(&fep->napi)) {				//检测是否可以进行napi调度
			/* Disable the NAPI interrupts */
			writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);	
			__napi_schedule(&fep->napi);					//开始napi调度
		}
	}

	if (int_events & FEC_ENET_MII) {		//napi 调度完成
		ret = IRQ_HANDLED;
		complete(&fep->mdio_done);
	}
	return ret;
}	

void __napi_schedule(struct napi_struct *n)
{
	unsigned long flags;

	local_irq_save(flags);
	____napi_schedule(this_cpu_ptr(&softnet_data), n);		//this_cpu_ptr(&softnet_data) 获取到当前cpu的 网络软中断私有数据
	local_irq_restore(flags);
}

static inline void ____napi_schedule(struct softnet_data *sd,
				     struct napi_struct *napi)
{												//sd->poll_list 中应该保存着各种网络设备注册的poll_list，包括wifi、eth等等，通过list 就可以找到napi_struct
	list_add_tail(&napi->poll_list, &sd->poll_list);	//将napi  的链表节点添加到softnet_data.poll_list（在软中断中可以凭借softnet_data.poll_list 找到以太网注册的poll函数）
	__raise_softirq_irqoff(NET_RX_SOFTIRQ);				//标记 NET_RX_SOFTIRQ 收包软中断标识位，所有的网络设备收包中断中应该都会标记这个位。
}

//__raise_softirq_irqoff 标志其实就是将对应标志位 或上 1
void __raise_softirq_irqoff(unsigned int nr)
{
	trace_softirq_raise(nr);
	or_softirq_pending(1UL << nr);
}

软中断

软中断：在多核的cpu中，每一个cpu都会运行一个软中断线程，它会处理硬中断来不及处理的事务，由硬件中断处理函数去标记软中断。硬件中断在哪个cpu上处理，对应的软中断也在哪个cpu上处理。单核cpu只有一个软中断线程。
使用napi的好处：提高了收包效率；比较早的内核版本中完全使用中断来接收数据包，这样在处理超长包时，cpu需要长时间陷入中断处理函数，导致其它程序无法调度，降低cpu性能。如果单纯使用轮询时，在大量的小包时，处理效率也会比较低。

软中断初始化

内核在启动时在spawn_ksoftirqd 函数中调用了smpboot_register_percpu_thread 为每一个cpu 创建了一个软中断线程。

//file: kernel/softirq.c
static struct smp_hotplug_thread softirq_threads = {
    .store          = &ksoftirqd,
    .thread_should_run  = ksoftirqd_should_run,
    .thread_fn      = run_ksoftirqd,
    .thread_comm        = "ksoftirqd/%u",
};

static __init int spawn_ksoftirqd(void)
{
    register_cpu_notifier(&cpu_nfb);

    BUG_ON(smpboot_register_percpu_thread(&softirq_threads));

    return 0;
}
early_initcall(spawn_ksoftirqd);

smpboot_register_percpu_thread 定义在kernel/smpboot.c。

//kernel/smpboot.c
int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
{
	unsigned int cpu;
	int ret = 0;

	get_online_cpus();
	mutex_lock(&smpboot_threads_lock);
	for_each_online_cpu(cpu) {
		ret = __smpboot_create_thread(plug_thread, cpu);
		if (ret) {
			smpboot_destroy_threads(plug_thread);
			goto out;
		}
		smpboot_unpark_thread(plug_thread, cpu);
	}
	list_add(&plug_thread->list, &hotplug_threads);
out:
	mutex_unlock(&smpboot_threads_lock);
	put_online_cpus();
	return ret;
}

Linux内核启动时会调用网络子系统初始化函数 net_dev_init。
在其中注册了网络发送和接收的action，并与它们的标识符绑定。
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);

//file: net/core/dev.c
static int __init net_dev_init(void)
{
    ......

    for_each_possible_cpu(i) {
        struct softnet_data *sd = &per_cpu(softnet_data, i);

        memset(sd, 0, sizeof(*sd));
        skb_queue_head_init(&sd->input_pkt_queue);
        skb_queue_head_init(&sd->process_queue);
        sd->completion_queue = NULL;
        INIT_LIST_HEAD(&sd->poll_list);

        ......
    }

    ......

    open_softirq(NET_TX_SOFTIRQ, net_tx_action);
    open_softirq(NET_RX_SOFTIRQ, net_rx_action);
}
subsys_initcall(net_dev_init);

内核中不仅仅只有网络的软中断函数，还有起来类型的。

//file: include/linux/interrupt.h
enum
{
    HI_SOFTIRQ=0,
    TIMER_SOFTIRQ,
    NET_TX_SOFTIRQ,
    NET_RX_SOFTIRQ,
    BLOCK_SOFTIRQ,
    BLOCK_IOPOLL_SOFTIRQ,
    TASKLET_SOFTIRQ,
    SCHED_SOFTIRQ,
    HRTIMER_SOFTIRQ,
    RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */

    NR_SOFTIRQS
};

软中断调用

softirq_threads 描述一个软中断线程，当软中断线程注册后，会不断调用ksoftirqd_should_run 判断是否有软中断发生。
在硬件中断 napi 调度的时候调用了or_softirq_pending 标识了软中断标志位，local_softirq_pending就可以读取到是否有软中断发生，发生软中断后会调用run_ksoftirqd 。

#define local_softirq_pending()	(__this_cpu_read(local_softirq_pending_ref))
#define set_softirq_pending(x)	(__this_cpu_write(local_softirq_pending_ref, (x)))
#define or_softirq_pending(x)	(__this_cpu_or(local_softirq_pending_ref, (x)))

static int ksoftirqd_should_run(unsigned int cpu)
{
	return local_softirq_pending();
}

进入软中断前需要禁用硬件中断（不太清楚为什么关闭，在韦东山的教程中软中断是可以被硬件中断打断的）

static void run_ksoftirqd(unsigned int cpu)
{
	local_irq_disable();		//禁用硬件中断
	if (local_softirq_pending()) {
		/*
		 * We can safely run softirq on inline stack, as we are not deep
		 * in the task stack here.
		 */
		__do_softirq();
		local_irq_enable();
		cond_resched();
		return;
	}
	local_irq_enable();
}

__do_softirq
h->action(h); 执行了action，我们这里执行的就是net_rx_action，它就是网络接收的软中断

asmlinkage void __do_softirq(void)
{
    do {
        if (pending & 1) {
            unsigned int vec_nr = h - softirq_vec;
            int prev_count = preempt_count();

            ...
            trace_softirq_entry(vec_nr);
            h->action(h);
            trace_softirq_exit(vec_nr);
            ...
        }
        h++;
        pending >>= 1;
    } while (pending);
}

函数开头的time_limit和budget是用来控制net_rx_action函数主动退出的，目的是保证网络包的接收不霸占CPU不放。等下次网卡再有硬中断过来的时候再处理剩下的接收数据包。其中budget可以通过内核参数调整。这个函数中剩下的核心逻辑是获取到当前CPU变量softnet_data，对其poll_list进行遍历, 然后执行到网卡驱动注册到的poll函数。imx6ull 的poll 函数就是fec_enet_rx_napi.

static void net_rx_action(struct softirq_action *h)
{
    struct softnet_data *sd = &__get_cpu_var(softnet_data);
    unsigned long time_limit = jiffies + 2;
    int budget = netdev_budget;
    void *have;

    local_irq_disable();

    while (!list_empty(&sd->poll_list)) {
        ......
        n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);	//取出链表的第一个节点

        work = 0;
        if (test_bit(NAPI_STATE_SCHED, &n->state)) {
            work = n->poll(n, weight);											//调用驱动注册的poll
            trace_napi_poll(n);
        }

        budget -= work;
    }
}

fec_enet_rx_napi

上面的代码，在软中断net_rx_action 函数中调用了网卡驱动注册的napi poll函数，imx6ull 的网卡驱动注册的napi poll函数就是fec_enet_rx_napi，接下去看一下imx6ull 网卡驱动收包函数的调用流程：

fec_enet_rx_napi
	->fec_enet_rx
		->fec_enet_rx_queue			//重点是这个函数，在这里会将数据包从ringbuffer 拷贝到skb_buff,并交付给协议栈

static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
{
	struct net_device *ndev = napi->dev;
	struct fec_enet_private *fep = netdev_priv(ndev);
	int pkts;		//packet 数量

	pkts = fec_enet_rx(ndev, budget);	//budget 是用来控制函数主动退出的，目的是保证网络包的接收不霸占CPU不放。

	fec_enet_tx(ndev);

	if (pkts < budget) {
		napi_complete_done(napi, pkts);
		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
	}
	return pkts;
}

static int
fec_enet_rx(struct net_device *ndev, int budget)
{
	int     pkt_received = 0;
	u16	queue_id;				//队列id					
	struct fec_enet_private *fep = netdev_priv(ndev);

	/*
		#define for_each_set_bit(bit, addr, size) \
		for ((bit) = find_first_bit((addr), (size));   (bit) < (size);   (bit) = find_next_bit((addr), (size), (bit) + 1))
		{   }
		
	    for 循环获取三个队列的queue_id
	*/
	for_each_set_bit(queue_id, &fep->work_rx, FEC_ENET_MAX_RX_QS) {	
		int ret;

		ret = fec_enet_rx_queue(ndev,
					budget - pkt_received, queue_id);

		if (ret < budget - pkt_received)
			clear_bit(queue_id, &fep->work_rx);

		pkt_received += ret;	//pkt_received：已接收的数据包数量，budget：总共要接收的
	}
	return pkt_received;
}

static int
fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
{
	struct fec_enet_private *fep = netdev_priv(ndev);
	struct fec_enet_priv_rx_q *rxq;
	struct bufdesc *bdp;
	unsigned short status;
	struct  sk_buff *skb_new = NULL;
	struct  sk_buff *skb;
	ushort	pkt_len;
	__u8 *data;
	int	pkt_received = 0;
	struct	bufdesc_ex *ebdp = NULL;
	bool	vlan_packet_rcvd = false;
	u16	vlan_tag;
	int	index = 0;
	bool	is_copybreak;
	bool	need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;

#ifdef CONFIG_M532x
	flush_cache_all();
#endif
	queue_id = FEC_ENET_GET_QUQUE(queue_id);
	rxq = fep->rx_queue[queue_id];						//获取队列

	/* First, grab all of the stats for the incoming packet.
	 * These get messed up if we get called due to a busy condition.
	 */
	bdp = rxq->bd.cur;	//当前剩余可用的描述符

	while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {	//判断缓冲区有数据后开始收包

		if (pkt_received >= budget)			//达到budget 数量，退出收包
			break;
		pkt_received++;

		writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT);						//清除数据到达的事件

		/* Check for errors. */
		status ^= BD_ENET_RX_LAST;				//BD_ENET_RX_LAST：判断该缓冲区，是否是帧的最后一个缓冲区（一个帧的数据可能会有多个缓冲区存放）
		if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |	//错误检测：帧长度检测、不以8位对齐的帧、CRC 校验错误、
			   BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_LAST |		//Overrun（溢出,接收帧超出FIFO 限制）
			   BD_ENET_RX_CL)) {
			ndev->stats.rx_errors++;
			if (status & BD_ENET_RX_OV) {	//FIFO 溢出
				/* FIFO overrun */
				ndev->stats.rx_fifo_errors++;
				goto rx_processing_done;
			}
			if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH
						| BD_ENET_RX_LAST)) {
				/* Frame too long or too short. */
				ndev->stats.rx_length_errors++;
				if (status & BD_ENET_RX_LAST)
					netdev_err(ndev, "rcv is not +last\n");
			}
			if (status & BD_ENET_RX_CR)	/* CRC Error */
				ndev->stats.rx_crc_errors++;
			/* Report late collisions as a frame error. */
			if (status & (BD_ENET_RX_NO | BD_ENET_RX_CL))
				ndev->stats.rx_frame_errors++;
			goto rx_processing_done;
		}

		/* Process the incoming frame. */
		ndev->stats.rx_packets++;										//netdev收包计数增加
		pkt_len = fec16_to_cpu(bdp->cbd_datlen);						//统计字节长度
		ndev->stats.rx_bytes += pkt_len;

		index = fec_enet_get_bd_index(bdp, &rxq->bd);	//获取缓冲描述符的索引：第几个描述符
		skb = rxq->rx_skbuff[index];					//获取与描述符相对应的skb

		is_copybreak = fec_enet_copybreak(ndev, &skb, bdp, pkt_len - 4,
						  need_swap);					//将数据从缓冲区拷贝到一个新的sk_buff, 并剥离FCS字段
		if (!is_copybreak) {
			skb_new = netdev_alloc_skb(ndev, FEC_ENET_RX_FRSIZE);
			if (unlikely(!skb_new)) {
				ndev->stats.rx_dropped++;
				goto rx_processing_done;
			}
			dma_unmap_single(&fep->pdev->dev,
					 fec32_to_cpu(bdp->cbd_bufaddr),
					 FEC_ENET_RX_FRSIZE - fep->rx_align,
					 DMA_FROM_DEVICE);
		}

		prefetch(skb->data - NET_IP_ALIGN);
		skb_put(skb, pkt_len - 4);	//扩展skb_buff 数据区；data~tail 地址范围是存放数据区域，它的长度是 pkt_len - 4
		data = skb->data;			//减4： 减去FCS 的长度

		if (!is_copybreak && need_swap)
			swap_buffer(data, pkt_len);

#if !defined(CONFIG_M5272)
		if (fep->quirks & FEC_QUIRK_HAS_RACC)
			data = skb_pull_inline(skb, 2);
#endif

		/* Extract the enhanced buffer descriptor */
		ebdp = NULL;
		if (fep->bufdesc_ex)
			ebdp = (struct bufdesc_ex *)bdp;

		/* If this is a VLAN packet remove the VLAN Tag */
		vlan_packet_rcvd = false;
		if ((ndev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
		    fep->bufdesc_ex &&
		    (ebdp->cbd_esc & cpu_to_fec32(BD_ENET_RX_VLAN))) {
			/* Push and remove the vlan tag */
			struct vlan_hdr *vlan_header =					//获取数据包 vlan 头部
					(struct vlan_hdr *) (data + ETH_HLEN);
			vlan_tag = ntohs(vlan_header->h_vlan_TCI);

			vlan_packet_rcvd = true;		//vlan 截取完成

			memmove(skb->data + VLAN_HLEN, data, ETH_ALEN * 2);	//从以太网帧中删除vlan 头部
			skb_pull(skb, VLAN_HLEN);
		}
			
		skb->protocol = eth_type_trans(skb, ndev);	//确定协议

		/* Get receive timestamp from the skb */	//获取时间戳
		if (fep->hwts_rx_en && fep->bufdesc_ex)
			fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts),
					  skb_hwtstamps(skb));

		if (fep->bufdesc_ex &&
		    (fep->csum_flags & FLAG_RX_CSUM_ENABLED)) {
			if (!(ebdp->cbd_esc & cpu_to_fec32(FLAG_RX_CSUM_ERROR))) {
				/* don't check it */
				skb->ip_summed = CHECKSUM_UNNECESSARY;
			} else {
				skb_checksum_none_assert(skb);
			}
		}

		/* Handle received VLAN packets */
		if (vlan_packet_rcvd)
			__vlan_hwaccel_put_tag(skb,
					       htons(ETH_P_8021Q),
					       vlan_tag);
		
		/*	在napi_gro_receive 会调用dev_gro_receive
			dev_gro_receive这个函数代表的是网卡GRO特性，可以简单理解成把相关的小包合并成一个大包就行，
			目的是减少传送给网络栈的包数，这有助于减少 CPU 的使用量。
		*/		
		napi_gro_receive(&fep->napi, skb);		//从这里开始就将数据包递交给协议栈处理了

		if (is_copybreak) {
			dma_sync_single_for_device(&fep->pdev->dev,
						   fec32_to_cpu(bdp->cbd_bufaddr),
						   FEC_ENET_RX_FRSIZE - fep->rx_align,
						   DMA_FROM_DEVICE);
		} else {
			rxq->rx_skbuff[index] = skb_new;
			fec_enet_new_rxbdp(ndev, bdp, skb_new);
		}

rx_processing_done:
		/* Clear the status flags for this buffer */
		status &= ~BD_ENET_RX_STATS;

		/* Mark the buffer empty */
		status |= BD_ENET_RX_EMPTY;

		if (fep->bufdesc_ex) {
			struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
			ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
			ebdp->cbd_prot = 0;
			ebdp->cbd_bdu = 0;
		}
		/* Make sure the updates to rest of the descriptor are
		 * performed before transferring ownership.
		 */
		wmb();
		bdp->cbd_sc = cpu_to_fec16(status);

		/* Update BD pointer to next entry */
		bdp = fec_enet_get_nextdesc(bdp, &rxq->bd);		

		/* Doing this here will keep the FEC running while we process
		 * incoming frames.  On a heavily loaded network, we should be
		 * able to keep up at the expense of system resources.
		 */
		writel(0, rxq->bd.reg_desc_active);
	}
	rxq->bd.cur = bdp;
	return pkt_received;
}