ndev = alloc_etherdev(sizeof(struct fec_enet_private));
struct fec_enet_private *fep = netdev_priv(ndev);
这种通过结构体内部指针传递私有数据的方式在driver中非常常见。函数开头即为Ethernet Controller的DMA 控制器分配相应的buffer描述符:
/* Allocate memory for buffer descriptors. */
cbd_base = dma_alloc_noncacheable(NULL, BUFDES_SIZE, &fep->bd_dma,
GFP_KERNEL);
if (!cbd_base) {
printk("FEC: allocate descriptor memory failed?\n");
return -ENOMEM;
}
这里分配的缓冲区大小是(tx buffer个数+rx buffer个数)×buffer描述符大小:
#define BUFDES_SIZE ((RX_RING_SIZE + TX_RING_SIZE) * sizeof(struct bufdesc))
由于buffer描述符会被CPU以及DMA控制器访问,因此会存在Cache一致性问题,这里采用了dma_alloc_noncacheable()函数,即DMA一致性映射。这里采用一致性映射是因为CPU或者DMA控制器会以不可预知的方式去访问这段内存区,在Linux Kernel中解决Cache一致性问题有两种方案:DMA流式映射和DMA一致性映射,关于这两者的区别在《Understanding Linux Kernel》以及《LDD3》中均有介绍,我个人也总结了一篇博文初步讲述了这两者的区别:http://blog.163.com/thinki_cao/blog/static/83944875201362142939337。
这里分析一下DMA控制器,i.MX6的DMA控制器采用了环形buffer描述符,这里buffer分为两种,Legacy buffer descriptor是为了保持对前代Freescale器件的兼容性,而Enhanced buffer descriptor则提供了更多的功能,引用i.MX6Q的reference manual中的图:
而Enhanced buffer descriptor一个有64字节,也是采用大端存储模式的,个人觉得这个Ethernet IP有点像是从PowerPC那边扣过来的。
可以从fec.h文件中找到对这两个描述符的定义:
struct bufdesc {
unsigned short cbd_datlen; /* Data length */
unsigned short cbd_sc; /* Control and status info */
unsigned long cbd_bufaddr; /* Buffer address */
#ifdef CONFIG_ENHANCED_BD
unsigned long cbd_esc;
unsigned long cbd_prot;
unsigned long cbd_bdu;
unsigned long ts;
unsigned short res0[4];
#endif
如果定义了CONFIG_ENHANCED_BD宏,则开启Enhanced buffer descriptor的支持。不过纵观整个driver程序,3.0.35的内核并没有使用enhanced buffer descriptor使用的一些功能,比如Enhanced transmit buffer descriptor中的offset+8位置的PINS和IINS位,提供了采用MAC提供的IP accelerator进行硬件校验,提供对协议的校验和IP头的校验。而在yocto 3.10.17内核上,这些已经支持了!这也是为什么3.0.35上的Ethernet driver的性能不如3.10.17上的原因之一吧。下面继续分析代码:
spin_lock_init(&fep->hw_lock); /* 初始化自旋锁 */
fep->netdev = ndev; /*把net_device的地址传给netdev*/
/* Get the Ethernet address */ fec_get_mac(ndev);
fec_get_mac会从多个地方获取mac地址:
static void __inline__ fec_get_mac(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
unsigned char *iap, tmpaddr[ETH_ALEN];
/*
* try to get mac address in following order:
*
* 1) module parameter via kernel command line in form
* fec.macaddr=0x00,0x04,0x9f,0x01,0x30,0xe0
*/
iap = macaddr;
/*
* 2) from flash or fuse (via platform data)
*/
if (!is_valid_ether_addr(iap)) {
if (pdata)
memcpy(iap, pdata->mac, ETH_ALEN);
}
/*
* 3) FEC mac registers set by bootloader
*/
if (!is_valid_ether_addr(iap)) {
*((unsigned long *) &tmpaddr[0]) =
be32_to_cpu(readl(fep->hwp + FEC_ADDR_LOW));
*((unsigned short *) &tmpaddr[4]) =
be16_to_cpu(readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
iap = &tmpaddr[0];
}
memcpy(ndev->dev_addr, iap, ETH_ALEN);
/* Adjust MAC if using macaddr */
if (iap == macaddr)
ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->pdev->id;
}
1)首先是从全局变量macaddr获取ip地址,macaddr定义相关的代码如下:
static unsigned char macaddr[ETH_ALEN];
module_param_array(macaddr, byte, NULL, 0);
MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
__setup("fec_mac=", fec_mac_addr_setup);
这里的__setup是用来从uboot传给内核的启动参数中捕获fec_mac(即mac地址)参数,并将该参数传递给fec_mac_addr_setup(char *mac_addr)函数进行解析的。如果uboot中没有传递mac参数,那么macaddr数组中的成员全是0。
2)检测1)中获取的mac地址是否合法,如果不合法,则从设备的私有数据结构(如果pdata指针不为空)struct fec_platform_data中获取mac数组的值。
3)检测2)中获取的mac地址是否合法,如果不合法,则读取Ethernet控制器的mac地址寄存器来获取mac地址。
最后把mac地址传递给内核中net_device结构体中的dev_addr字段。
/* Set receive and transmit descriptor base. */fep -> rx_bd_base = cbd_base ;fep -> tx_bd_base = cbd_base + RX_RING_SIZE ;
设置tx_bd_base和rx_bd_base,即tx buffer descriptor base 和rx buffer descriptor base,示意图如下:
接着就是net_device已经fec_enet_private等结构体的设置:/* The FEC Ethernet specific entries in the device structure */ndev -> watchdog_timeo = TX_TIMEOUT ; /* watchdong定时器唤醒间隔 */ndev -> netdev_ops = & fec_netdev_ops ;ndev -> ethtool_ops = & fec_enet_ethtool_ops ;fep -> use_napi = FEC_NAPI_ENABLE ; fep -> napi_weight = FEC_NAPI_WEIGHT ; if ( fep -> use_napi ) { fec_rx_int_is_enabled ( ndev , false ); netif_napi_add ( ndev , & fep -> napi , fec_rx_poll , fep -> napi_weight ); }
/* Initialize the receive buffer descriptors. */bdp = fep -> rx_bd_base ;for ( i = 0 ; i < RX_RING_SIZE ; i ++) {/* Initialize the BD for every fragment in the page. */bdp -> cbd_sc = 0 ;bdp -> cbd_bufaddr = 0 ;bdp ++;}/* Set the last buffer to wrap */bdp --;bdp -> cbd_sc |= BD_SC_WRAP ;
fec_restart(ndev, 0);
/* Whack a reset. We should wait for this. */writel ( 1 , fep -> hwp + FEC_ECNTRL );udelay ( 10 );/* if uboot don't set MAC address, get MAC address* from command line; if command line don't set MAC* address, get from OCOTP; otherwise, allocate random* address.*/memcpy (& temp_mac , dev -> dev_addr , ETH_ALEN );writel ( cpu_to_be32 ( temp_mac [ 0 ]), fep -> hwp + FEC_ADDR_LOW );writel ( cpu_to_be32 ( temp_mac [ 1 ]), fep -> hwp + FEC_ADDR_HIGH );/* Clear any outstanding interrupt. */writel ( 0xffc00000 , fep -> hwp + FEC_IEVENT );/* Reset all multicast. */writel ( 0 , fep -> hwp + FEC_GRP_HASH_TABLE_HIGH );writel ( 0 , fep -> hwp + FEC_GRP_HASH_TABLE_LOW );/* Set maximum receive buffer size. */writel ( PKT_MAXBLR_SIZE , fep -> hwp + FEC_R_BUFF_SIZE );/* Set receive and transmit descriptor base. */writel ( fep -> bd_dma , fep -> hwp + FEC_R_DES_START );writel (( unsigned long ) fep -> bd_dma + sizeof ( struct bufdesc ) * RX_RING_SIZE ,fep -> hwp + FEC_X_DES_START );
/* Reinit transmit descriptors */fec_enet_txbd_init ( dev );fep -> dirty_tx = fep -> cur_tx = fep -> tx_bd_base ;fep -> cur_rx = fep -> rx_bd_base ;/* Reset SKB transmit buffers. */fep -> skb_cur = fep -> skb_dirty = 0 ;for ( i = 0 ; i <= TX_RING_MOD_MASK ; i ++) {if ( fep -> tx_skbuff [ i ]) {dev_kfree_skb_any ( fep -> tx_skbuff [ i ]);fep -> tx_skbuff [ i ] = NULL ;}}
接下来设置半双工或者全双工模式,默认情况下是半双工模式(即发送时不接受数据)/* Enable MII mode */if ( duplex ) {/* MII enable / FD enable */writel ( OPT_FRAME_SIZE | 0x04 , fep -> hwp + FEC_R_CNTRL );writel ( 0x04 , fep -> hwp + FEC_X_CNTRL );} else {/* MII enable / No Rcv on Xmit */writel ( OPT_FRAME_SIZE | 0x06 , fep -> hwp + FEC_R_CNTRL );writel ( 0x0 , fep -> hwp + FEC_X_CNTRL );}fep -> full_duplex = duplex ;/* Set MII speed */writel ( fep -> phy_speed , fep -> hwp + FEC_MII_SPEED );
if ( fep -> ptimer_present ) {/* Set Timer count */ret = fec_ptp_start ( fep -> ptp_priv );if ( ret ) {fep -> ptimer_present = 0 ;reg = 0x0 ;} elsereg = 0x0 ;} elsereg = 0x0 ;
static const struct net_device_ops fec_netdev_ops = {. ndo_open = fec_enet_open ,. ndo_stop = fec_enet_close ,. ndo_start_xmit = fec_enet_start_xmit ,. ndo_set_multicast_list = set_multicast_list ,. ndo_change_mtu = eth_change_mtu ,. ndo_validate_addr = eth_validate_addr ,. ndo_tx_timeout = fec_timeout ,. ndo_set_mac_address = fec_set_mac_address ,. ndo_do_ioctl = fec_enet_ioctl ,#ifdef CONFIG_NET_POLL_CONTROLLER. ndo_poll_controller = fec_enet_netpoll ,#endif};
if ( fep -> use_napi )napi_enable (& fep -> napi );
clk_enable(fep->clk);
ret = fec_enet_alloc_buffers ( ndev );if ( ret )return ret ;
/* Probe and connect to PHY when open the interface */ret = fec_enet_mii_probe ( ndev );if ( ret ) {fec_enet_free_buffers ( ndev );return ret ;}phy_start ( fep -> phy_dev );netif_start_queue ( ndev );fep -> opened = 1 ;ret = -EINVAL;if ( pdata -> init && pdata -> init ( fep -> phy_dev ))return ret ;return 0
spin_lock_irqsave(&fep->hw_lock, flags);
if (! fep -> link ) {/* Link is down or autonegotiation is in progress. */netif_stop_queue ( ndev );spin_unlock_irqrestore (& fep -> hw_lock , flags );return NETDEV_TX_BUSY ;}
/* Fill in a Tx ring entry */bdp = fep -> cur_tx ;status = bdp -> cbd_sc ;
if ( status & BD_ENET_TX_READY ) {/* Ooops. All transmit buffers are full. Bail out.* This should not happen, since ndev->tbusy should be set.*/printk ( "%s: tx queue full!.\n" , ndev -> name );netif_stop_queue ( ndev );spin_unlock_irqrestore (& fep -> hw_lock , flags );return NETDEV_TX_BUSY ;}
/* Clear all of the status flags */status &= ~ BD_ENET_TX_STATS ;/* Set buffer length and buffer pointer */bufaddr = skb -> data ;bdp -> cbd_datlen = skb -> len ;
/** On some FEC implementations data must be aligned on* 4-byte boundaries. Use bounce buffers to copy data* and get it aligned. Ugh.*/if ((( unsigned long ) bufaddr ) & FEC_ALIGNMENT ) {unsigned int index ;index = bdp - fep -> tx_bd_base ;bufaddr = PTR_ALIGN ( fep -> tx_bounce [ index ], FEC_ALIGNMENT + 1 );memcpy ( bufaddr , ( void *) skb -> data , skb -> len );}
if ( fep -> ptimer_present ) {if ( fec_ptp_do_txstamp ( skb )) {estatus = BD_ENET_TX_TS ;status |= BD_ENET_TX_PTP ;} elseestatus = 0 ;#ifdef CONFIG_ENHANCED_BDbdp -> cbd_esc = ( estatus | BD_ENET_TX_INT );bdp -> cbd_bdu = 0 ;#endif}
/** Some design made an incorrect assumption on endian mode of* the system that it's running on. As the result, driver has to* swap every frame going to and coming from the controller.*/if ( id_entry -> driver_data & FEC_QUIRK_SWAP_FRAME )swap_buffer ( bufaddr , skb -> len );
/* Save skb pointer */fep -> tx_skbuff [ fep -> skb_cur ] = skb ;ndev -> stats . tx_bytes += skb -> len ;fep -> skb_cur = ( fep -> skb_cur + 1 ) & TX_RING_MOD_MASK ;
/* Push the data cache so the CPM does not get stale memory* data.*/bdp -> cbd_bufaddr = dma_map_single (& fep -> pdev -> dev , bufaddr ,FEC_ENET_TX_FRSIZE , DMA_TO_DEVICE );/* Send it on its way. Tell FEC it's ready, interrupt when done,* it's the last BD of the frame, and to put the CRC on the end.*/status |= ( BD_ENET_TX_READY | BD_ENET_TX_INTR| BD_ENET_TX_LAST | BD_ENET_TX_TC );bdp -> cbd_sc = status ;
/* Trigger transmission start */writel ( 0 , fep -> hwp + FEC_X_DES_ACTIVE );
bdp_pre = fec_enet_get_pre_txbd ( ndev );if (( id_entry -> driver_data & FEC_QUIRK_BUG_TKT168103 ) &&!( bdp_pre -> cbd_sc & BD_ENET_TX_READY ))schedule_delayed_work (& fep -> fixup_trigger_tx ,msecs_to_jiffies ( 1 ));
/* If this was the last BD in the ring, start at the beginning again. */if ( status & BD_ENET_TX_WRAP )bdp = fep -> tx_bd_base ;elsebdp ++;
if ( bdp == fep -> dirty_tx ) {fep -> tx_full = 1 ;netif_stop_queue ( ndev );}fep -> cur_tx = bdp ;
spin_unlock_irqrestore (& fep -> hw_lock , flags );return NETDEV_TX_OK ;
do {int_events = readl ( fep -> hwp + FEC_IEVENT );writel ( int_events , fep -> hwp + FEC_IEVENT );……………………} while ( int_events );
if ( int_events & FEC_ENET_RXF ) {ret = IRQ_HANDLED ;spin_lock_irqsave (& fep -> hw_lock , flags );if ( fep -> use_napi ) {/* Disable the RX interrupt */if ( napi_schedule_prep (& fep -> napi )) {fec_rx_int_is_enabled ( ndev , false );__napi_schedule (& fep -> napi );}} elsefec_enet_rx ( ndev );spin_unlock_irqrestore (& fep -> hw_lock , flags );}
/* Transmit OK, or non-fatal error. Update the buffer* descriptors. FEC handles all errors, we just discover* them as part of the transmit process.*/if ( int_events & FEC_ENET_TXF ) {ret = IRQ_HANDLED ;fec_enet_tx ( ndev );}
bdp = fep->dirty_tx;
while ((( status = bdp -> cbd_sc ) & BD_ENET_TX_READY ) == 0 ) {……………………/* Update pointer to next buffer descriptor to be transmitted */if ( status & BD_ENET_TX_WRAP )bdp = fep -> tx_bd_base ;elsebdp ++;}
if ( bdp == fep -> cur_tx && fep -> tx_full == 0 )break ;
if ( bdp -> cbd_bufaddr )dma_unmap_single (& fep -> pdev -> dev , bdp -> cbd_bufaddr ,FEC_ENET_TX_FRSIZE , DMA_TO_DEVICE );bdp -> cbd_bufaddr = 0 ;
skb = fep -> tx_skbuff [ fep -> skb_dirty ];if (! skb )break ;
/* Check for errors. */if ( status & ( BD_ENET_TX_HB | BD_ENET_TX_LC |BD_ENET_TX_RL | BD_ENET_TX_UN |BD_ENET_TX_CSL )) {ndev -> stats . tx_errors ++;if ( status & BD_ENET_TX_HB ) /* No heartbeat */ndev -> stats . tx_heartbeat_errors ++;if ( status & BD_ENET_TX_LC ) /* Late collision */ndev -> stats . tx_window_errors ++;if ( status & BD_ENET_TX_RL ) /* Retrans limit */ndev -> stats . tx_aborted_errors ++;if ( status & BD_ENET_TX_UN ) /* Underrun */ndev -> stats . tx_fifo_errors ++;if ( status & BD_ENET_TX_CSL ) /* Carrier lost */ndev -> stats . tx_carrier_errors ++;} else {ndev -> stats . tx_packets ++;}
if ( status & BD_ENET_TX_READY )printk ( "HEY! Enet xmit interrupt and TX_READY.\n" );
/* Deferred means some collisions occurred during transmit,* but we eventually sent the packet OK.*/if ( status & BD_ENET_TX_DEF )ndev -> stats . collisions ++;
#if defined(CONFIG_ENHANCED_BD)if (fep->ptimer_present) {if (bdp->cbd_esc & BD_ENET_TX_TS)fec_ptp_store_txstamp(fpp, skb, bdp);}#elif defined(CONFIG_IN_BAND)if (fep->ptimer_present) {if (status & BD_ENET_TX_PTP)fec_ptp_store_txstamp(fpp, skb, bdp);}#endif
/* Free the sk buffer associated with this last transmit */dev_kfree_skb_any ( skb );fep -> tx_skbuff [ fep -> skb_dirty ] = NULL ;fep -> skb_dirty = ( fep -> skb_dirty + 1 ) & TX_RING_MOD_MASK ;
/* Since we have freed up a buffer, the ring is no longer full*/if ( fep -> tx_full ) {fep -> tx_full = 0 ;if ( netif_queue_stopped ( ndev ))netif_wake_queue ( ndev );}
fep -> dirty_tx = bdp ;
bdp = fep -> cur_rx ;while (!(( status = bdp -> cbd_sc ) & BD_ENET_RX_EMPTY )) {………………/* Update BD pointer to next entry */if ( status & BD_ENET_RX_WRAP )bdp = fep -> rx_bd_base ;elsebdp ++;}fep -> cur_rx = bdp ;
/* Since we have allocated space to hold a complete frame,* the last indicator should be set.*/if (( status & BD_ENET_RX_LAST ) == 0 )printk ( "FEC ENET: rcv is not +last\n" );
if (! fep -> opened )goto rx_processing_done ;
/* Check for errors. */if ( status & ( BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |BD_ENET_RX_CR | BD_ENET_RX_OV )) {ndev -> stats . rx_errors ++;if ( status & ( BD_ENET_RX_LG | BD_ENET_RX_SH )) {/* Frame too long or too short. */ndev -> stats . rx_length_errors ++;}if ( status & BD_ENET_RX_NO ) /* Frame alignment */ndev -> stats . rx_frame_errors ++;if ( status & BD_ENET_RX_CR ) /* CRC Error */ndev -> stats . rx_crc_errors ++;if ( status & BD_ENET_RX_OV ) /* FIFO overrun */ndev -> stats . rx_fifo_errors ++;}/* Report late collisions as a frame error.* On this error, the BD is closed, but we don't know what we* have in the buffer. So, just drop this frame on the floor.*/if ( status & BD_ENET_RX_CL ) {ndev -> stats . rx_errors ++;ndev -> stats . rx_frame_errors ++;goto rx_processing_done ;}
/* Process the incoming frame. */ndev -> stats . rx_packets ++;pkt_len = bdp -> cbd_datlen ;ndev -> stats . rx_bytes += pkt_len ;
data = ( __u8 *) __va ( bdp -> cbd_bufaddr );if ( bdp -> cbd_bufaddr )dma_unmap_single (& fep -> pdev -> dev , bdp -> cbd_bufaddr ,FEC_ENET_RX_FRSIZE , DMA_FROM_DEVICE );if ( id_entry -> driver_data & FEC_QUIRK_SWAP_FRAME )swap_buffer ( data , pkt_len );
/* This does 16 byte alignment, exactly what we need.* The packet length includes FCS, but we don't want to* include that when passing upstream as it messes up* bridging applications.*/skb = dev_alloc_skb ( pkt_len - 4 + NET_IP_ALIGN );if ( unlikely (! skb )) { printk ( "%s: Memory squeeze, dropping packet.\n" , ndev -> name ); ndev -> stats . rx_dropped ++; } else { skb_reserve ( skb , NET_IP_ALIGN ); skb_put ( skb , pkt_len - 4 ); /* Make room */ skb_copy_to_linear_data ( skb , data , pkt_len - 4 ); /* 1588 messeage TS handle */ if ( fep -> ptimer_present ) fec_ptp_store_rxstamp ( fpp , skb , bdp ); skb -> protocol = eth_type_trans ( skb , ndev ); netif_rx ( skb ); }
bdp -> cbd_bufaddr = dma_map_single (& fep -> pdev -> dev , data ,FEC_ENET_TX_FRSIZE , DMA_FROM_DEVICE );
rx_processing_done :/* Clear the status flags for this buffer */status &= ~ BD_ENET_RX_STATS ;/* Mark the buffer empty */status |= BD_ENET_RX_EMPTY ;bdp -> cbd_sc = status ;#ifdef CONFIG_ENHANCED_BDbdp -> cbd_esc = BD_ENET_RX_INT ;bdp -> cbd_prot = 0 ;bdp -> cbd_bdu = 0 ;#endif/* Update BD pointer to next entry */if ( status & BD_ENET_RX_WRAP )bdp = fep -> rx_bd_base ;elsebdp ++;
/* Doing this here will keep the FEC running while we process* incoming frames. On a heavily loaded network, we should be* able to keep up at the expense of system resources.*/writel ( 0 , fep -> hwp + FEC_R_DES_ACTIVE );
fep->cur_rx = bdp;