linux网络子系统分析

/*接下来的几节分析Linux网络*/

/**首先: 分析Linux网络子系统的构成,以及Linux网络子系统的作用*/

/*Linux网络子系统结构图*/

/*从上图可以看出:

用户空间有:
									应用层
内核空间包含的有:
                  系统调用接口: 为应用程序提供访问网络子系统的统一方法
                  协议无关接口: 提供通用的方法来使用传输层协议 
                  网络协议栈:	 实现具体的网络协议
                  设备无关接口: 协议与设备驱动之前通信的通用接口
                  设备驱动:		 驱动设备接受和发送数据

*/


/*在linux内核中, 每个网卡都有一个net_device描述结构来描述
  列举一些重要的成员:
*/

struct net_device {
	
	char			name[IFNAMSIZ];		/*网卡对应的名称。 例如: eth0*/

	/*
	 *	I/O specific fields
	 *	FIXME: Merge these and struct ifmap into one
	 */
	unsigned long		mem_end;	/* shared mem end	*/
	unsigned long		mem_start;	/* shared mem start	*/
	unsigned long		base_addr;	/* device I/O address	*/
	unsigned int		irq;		/* device IRQ number	*/
	
	/* Management operations */
	const struct net_device_ops *netdev_ops;		/*设备操作集合*/
	const struct ethtool_ops *ethtool_ops;

	/* Hardware header description */
	const struct header_ops *header_ops;				/*组装包头的操作集合*/
	
}


/*网络操作集合结构: net_device_ops. 列举几个常用的*/
struct net_device_ops {

	int			(*ndo_open)(struct net_device *dev);
	int			(*ndo_stop)(struct net_device *dev);
	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,struct net_device *dev);	/*发送操作函数*/
	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);				/*获取当前网卡的状态*/
	int			(*ndo_set_mac_address)(struct net_device *dev,void *addr);
	int			(*ndo_validate_addr)(struct net_device *dev);
	int			(*ndo_do_ioctl)(struct net_device *dev,struct ifreq *ifr, int cmd);
}

/*还有就是网络的数据包的结构: struct sk_buff. 通常将指向sk_buff的指针成为skb*/
struct sk_buff {
		sk_buff_data_t		transport_header;
	sk_buff_data_t		network_header;
	sk_buff_data_t		mac_header;
	/* These elements must be at the end, see alloc_skb() for details.  */
	sk_buff_data_t		tail;   /*这个包中数据部分的结束地址*/
	sk_buff_data_t		end; 		/*整个包的结束地址包含包头和包尾*/
	unsigned char		*head, *data;  /*head是整个包的开始地址, data是包中数据的开始地址*/
}

/*既然大概熟悉了整个网络的几个重要的结构,那我们就接着分析一个网卡驱动。 注意: 先分析主要干了什么,具体实现先不管*/

/*分析cs89x0.c  路径: linux/driver/net/cs89x0.c*/

/*分析一个驱动程序,首先是找到该驱动的模块入口地址。但是cs89x0.c却没有module_init。 原因是cs89x0.c是早期的驱动程序。早期的驱动程序入口地址是: init_module*/

/*
1. 分配net_device结构
2. 初始化net_device结构。包括中断号,基地址,网络设备的私有数据
3. 调用cs89x0_probe1函数进一步初始化
*/

int __init init_module(void)
{
	struct net_device *dev = alloc_etherdev(sizeof(struct net_local)); /*分配net_device结构*/
	struct net_local *lp;
	int ret = 0;

/*初始化net_device结构。包括中断号,基地址,网络设备的私有数据*/
	dev->irq = irq;
	dev->base_addr = io;
	lp = netdev_priv(dev);


	spin_lock_init(&lp->lock);

        /* boy, they'd better get these right */
        if (!strcmp(media, "rj45"))
		lp->adapter_cnf = A_CNF_MEDIA_10B_T | A_CNF_10B_T;
	else if (!strcmp(media, "aui"))
		lp->adapter_cnf = A_CNF_MEDIA_AUI   | A_CNF_AUI;
	else if (!strcmp(media, "bnc"))
		lp->adapter_cnf = A_CNF_MEDIA_10B_2 | A_CNF_10B_2;
	else
		lp->adapter_cnf = A_CNF_MEDIA_10B_T | A_CNF_10B_T;

        if (duplex==-1)
		lp->auto_neg_cnf = AUTO_NEG_ENABLE;

        if (io == 0) {
                printk(KERN_ERR "cs89x0.c: Module autoprobing not allowed.\n");
                printk(KERN_ERR "cs89x0.c: Append io=0xNNN\n");
                ret = -EPERM;
		goto out;
        } else if (io <= 0x1ff) {
		ret = -ENXIO;
		goto out;
	}

/*调用cs89x0_probe1函数进一步初始化*/
	ret = cs89x0_probe1(dev, io, 1);
	if (ret)
		goto out;

	dev_cs89x0 = dev;
	return 0;

}

/*
* 1. 初始化硬件的相关参数: 比如网卡适配器的设置
* 2. 进一步初始化dev结构: 比如MAC地址, netdev_ops
*    主要的还是初始化硬件的操作,这里先不关心到底如何实现的
* 3. 注册net_dev结构
*/
static int __init cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
{
	struct net_local *lp = netdev_priv(dev);
	static unsigned version_printed;
	int i;
	int tmp;
	unsigned rev_type = 0;
	int eeprom_buff[CHKSUM_LEN];
	int retval;

	/* Initialize the device structure. */
	if (!modular) {
		memset(lp, 0, sizeof(*lp));
		spin_lock_init(&lp->lock);
#ifndef MODULE
#if ALLOW_DMA
		if (g_cs89x0_dma) {
			lp->use_dma = 1;
			lp->dma = g_cs89x0_dma;
			lp->dmasize = 16;	/* Could make this an option... */
		}
#endif
		lp->force = g_cs89x0_media__force;
#endif

#if defined(CONFIG_MACH_QQ2440)
		lp->force |= FORCE_RJ45 | FORCE_FULL;
#endif
        }

	/* Grab the region so we can find another board if autoIRQ fails. */
	/* WTF is going on here? */
	if (!request_region(ioaddr & ~3, NETCARD_IO_EXTENT, DRV_NAME)) {
		printk(KERN_ERR "%s: request_region(0x%x, 0x%x) failed\n",
				DRV_NAME, ioaddr, NETCARD_IO_EXTENT);
		retval = -EBUSY;
		goto out1;
	}

	/* if they give us an odd I/O address, then do ONE write to
           the address port, to get it back to address zero, where we
           expect to find the EISA signature word. An IO with a base of 0x3
	   will skip the test for the ADD_PORT. */
	if (ioaddr & 1) {
		if (net_debug > 1)
			printk(KERN_INFO "%s: odd ioaddr 0x%x\n", dev->name, ioaddr);
	        if ((ioaddr & 2) != 2)
	        	if ((readword(ioaddr & ~3, ADD_PORT) & ADD_MASK) != ADD_SIG) {
				printk(KERN_ERR "%s: bad signature 0x%x\n",
					dev->name, readword(ioaddr & ~3, ADD_PORT));
		        	retval = -ENODEV;
				goto out2;
			}
	}

	ioaddr &= ~3;
	printk(KERN_DEBUG "PP_addr at %x[%x]: 0x%x\n",
			ioaddr, ADD_PORT, readword(ioaddr, ADD_PORT));
	writeword(ioaddr, ADD_PORT, PP_ChipID);

	tmp = readword(ioaddr, DATA_PORT);
	if (tmp != CHIP_EISA_ID_SIG) {
		printk(KERN_DEBUG "%s: incorrect signature at %x[%x]: 0x%x!="
			CHIP_EISA_ID_SIG_STR "\n",
			dev->name, ioaddr, DATA_PORT, tmp);
  		retval = -ENODEV;
  		goto out2;
	}

	/* Fill in the 'dev' fields. */
	dev->base_addr = ioaddr;

	/* get the chip type */
	rev_type = readreg(dev, PRODUCT_ID_ADD);
	lp->chip_type = rev_type &~ REVISON_BITS;
	lp->chip_revision = ((rev_type & REVISON_BITS) >> 8) + 'A';

	/* Check the chip type and revision in order to set the correct send command
	CS8920 revision C and CS8900 revision F can use the faster send. */
	lp->send_cmd = TX_AFTER_381;
	if (lp->chip_type == CS8900 && lp->chip_revision >= 'F')
		lp->send_cmd = TX_NOW;
	if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
		lp->send_cmd = TX_NOW;

	if (net_debug  &&  version_printed++ == 0)
		printk(version);

	printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#3lx ",
	       dev->name,
	       lp->chip_type==CS8900?'0':'2',
	       lp->chip_type==CS8920M?"M":"",
	       lp->chip_revision,
	       dev->base_addr);

	reset_chip(dev);

        /* Here we read the current configuration of the chip. If there
	   is no Extended EEPROM then the idea is to not disturb the chip
	   configuration, it should have been correctly setup by automatic
	   EEPROM read on reset. So, if the chip says it read the EEPROM
	   the driver will always do *something* instead of complain that
	   adapter_cnf is 0. */


        if ((readreg(dev, PP_SelfST) & (EEPROM_OK | EEPROM_PRESENT)) ==
	      (EEPROM_OK|EEPROM_PRESENT)) {
	        /* Load the MAC. */
		for (i=0; i < ETH_ALEN/2; i++) {
	                unsigned int Addr;
			Addr = readreg(dev, PP_IA+i*2);
		        dev->dev_addr[i*2] = Addr & 0xFF;
		        dev->dev_addr[i*2+1] = Addr >> 8;
		}

	   	/* Load the Adapter Configuration.
		   Note:  Barring any more specific information from some
		   other source (ie EEPROM+Schematics), we would not know
		   how to operate a 10Base2 interface on the AUI port.
		   However, since we  do read the status of HCB1 and use
		   settings that always result in calls to control_dc_dc(dev,0)
		   a BNC interface should work if the enable pin
		   (dc/dc converter) is on HCB1. It will be called AUI
		   however. */

		lp->adapter_cnf = 0;
		i = readreg(dev, PP_LineCTL);
		/* Preserve the setting of the HCB1 pin. */
		if ((i & (HCB1 | HCB1_ENBL)) ==  (HCB1 | HCB1_ENBL))
			lp->adapter_cnf |= A_CNF_DC_DC_POLARITY;
		/* Save the sqelch bit */
		if ((i & LOW_RX_SQUELCH) == LOW_RX_SQUELCH)
			lp->adapter_cnf |= A_CNF_EXTND_10B_2 | A_CNF_LOW_RX_SQUELCH;
		/* Check if the card is in 10Base-t only mode */
		if ((i & (AUI_ONLY | AUTO_AUI_10BASET)) == 0)
			lp->adapter_cnf |=  A_CNF_10B_T | A_CNF_MEDIA_10B_T;
		/* Check if the card is in AUI only mode */
		if ((i & (AUI_ONLY | AUTO_AUI_10BASET)) == AUI_ONLY)
			lp->adapter_cnf |=  A_CNF_AUI | A_CNF_MEDIA_AUI;
		/* Check if the card is in Auto mode. */
		if ((i & (AUI_ONLY | AUTO_AUI_10BASET)) == AUTO_AUI_10BASET)
			lp->adapter_cnf |=  A_CNF_AUI | A_CNF_10B_T |
			A_CNF_MEDIA_AUI | A_CNF_MEDIA_10B_T | A_CNF_MEDIA_AUTO;

		if (net_debug > 1)
			printk(KERN_INFO "%s: PP_LineCTL=0x%x, adapter_cnf=0x%x\n",
					dev->name, i, lp->adapter_cnf);

		/* IRQ. Other chips already probe, see below. */
		if (lp->chip_type == CS8900)
			lp->isa_config = readreg(dev, PP_CS8900_ISAINT) & INT_NO_MASK;

		printk( "[Cirrus EEPROM] ");
	}

        printk("\n");

	/* First check to see if an EEPROM is attached. */

	if ((readreg(dev, PP_SelfST) & EEPROM_PRESENT) == 0)
		printk(KERN_WARNING "cs89x0: No EEPROM, relying on command line....\n");
	else if (get_eeprom_data(dev, START_EEPROM_DATA,CHKSUM_LEN,eeprom_buff) < 0) {
		printk(KERN_WARNING "\ncs89x0: EEPROM read failed, relying on command line.\n");
        } else if (get_eeprom_cksum(START_EEPROM_DATA,CHKSUM_LEN,eeprom_buff) < 0) {
		/* Check if the chip was able to read its own configuration starting
		   at 0 in the EEPROM*/
		if ((readreg(dev, PP_SelfST) & (EEPROM_OK | EEPROM_PRESENT)) !=
		    (EEPROM_OK|EEPROM_PRESENT))
                	printk(KERN_WARNING "cs89x0: Extended EEPROM checksum bad and no Cirrus EEPROM, relying on command line\n");

        } else {
		/* This reads an extended EEPROM that is not documented
		   in the CS8900 datasheet. */

                /* get transmission control word  but keep the autonegotiation bits */
                if (!lp->auto_neg_cnf) lp->auto_neg_cnf = eeprom_buff[AUTO_NEG_CNF_OFFSET/2];
                /* Store adapter configuration */
                if (!lp->adapter_cnf) lp->adapter_cnf = eeprom_buff[ADAPTER_CNF_OFFSET/2];
                /* Store ISA configuration */
                lp->isa_config = eeprom_buff[ISA_CNF_OFFSET/2];
                dev->mem_start = eeprom_buff[PACKET_PAGE_OFFSET/2] << 8;

                /* eeprom_buff has 32-bit ints, so we can't just memcpy it */
                /* store the initial memory base address */
                for (i = 0; i < ETH_ALEN/2; i++) {
                        dev->dev_addr[i*2] = eeprom_buff[i];
                        dev->dev_addr[i*2+1] = eeprom_buff[i] >> 8;
                }
		if (net_debug > 1)
			printk(KERN_DEBUG "%s: new adapter_cnf: 0x%x\n",
				dev->name, lp->adapter_cnf);
        }

        /* allow them to force multiple transceivers.  If they force multiple, autosense */
        {
		int count = 0;
		if (lp->force & FORCE_RJ45)	{lp->adapter_cnf |= A_CNF_10B_T; count++; }
		if (lp->force & FORCE_AUI) 	{lp->adapter_cnf |= A_CNF_AUI; count++; }
		if (lp->force & FORCE_BNC)	{lp->adapter_cnf |= A_CNF_10B_2; count++; }
		if (count > 1)			{lp->adapter_cnf |= A_CNF_MEDIA_AUTO; }
		else if (lp->force & FORCE_RJ45){lp->adapter_cnf |= A_CNF_MEDIA_10B_T; }
		else if (lp->force & FORCE_AUI)	{lp->adapter_cnf |= A_CNF_MEDIA_AUI; }
		else if (lp->force & FORCE_BNC)	{lp->adapter_cnf |= A_CNF_MEDIA_10B_2; }
        }

	if (net_debug > 1)
		printk(KERN_DEBUG "%s: after force 0x%x, adapter_cnf=0x%x\n",
			dev->name, lp->force, lp->adapter_cnf);

        /* FIXME: We don't let you set dc-dc polarity or low RX squelch from the command line: add it here */

        /* FIXME: We don't let you set the IMM bit from the command line: add it to lp->auto_neg_cnf here */

        /* FIXME: we don't set the Ethernet address on the command line.  Use
           ifconfig IFACE hw ether AABBCCDDEEFF */

	printk(KERN_INFO "cs89x0 media %s%s%s",
	       (lp->adapter_cnf & A_CNF_10B_T)?"RJ-45,":"",
	       (lp->adapter_cnf & A_CNF_AUI)?"AUI,":"",
	       (lp->adapter_cnf & A_CNF_10B_2)?"BNC,":"");

	lp->irq_map = 0xffff;

	/* If this is a CS8900 then no pnp soft */
	if (lp->chip_type != CS8900 &&
	    /* Check if the ISA IRQ has been set  */
		(i = readreg(dev, PP_CS8920_ISAINT) & 0xff,
		 (i != 0 && i < CS8920_NO_INTS))) {
		if (!dev->irq)
			dev->irq = i;
	} else {
		i = lp->isa_config & INT_NO_MASK;
		if (lp->chip_type == CS8900) {
#ifdef CONFIG_CS89x0_NONISA_IRQ
		        i = cs8900_irq_map[0];
#else
			/* Translate the IRQ using the IRQ mapping table. */
			if (i >= ARRAY_SIZE(cs8900_irq_map))
				printk("\ncs89x0: invalid ISA interrupt number %d\n", i);
			else
				i = cs8900_irq_map[i];

			lp->irq_map = CS8900_IRQ_MAP; /* fixed IRQ map for CS8900 */
		} else {
			int irq_map_buff[IRQ_MAP_LEN/2];

			if (get_eeprom_data(dev, IRQ_MAP_EEPROM_DATA,
					    IRQ_MAP_LEN/2,
					    irq_map_buff) >= 0) {
				if ((irq_map_buff[0] & 0xff) == PNP_IRQ_FRMT)
					lp->irq_map = (irq_map_buff[0]>>8) | (irq_map_buff[1] << 8);
			}
#endif
		}

	dev->netdev_ops	= &net_ops;	/*初始化ner_dev*/
	dev->watchdog_timeo = HZ;  /*超时时间*/

	retval = register_netdev(dev);  /*注册net_dev结构*/

	return retval;
}

/*接下来当网卡收到协议栈传输下来的包,那网卡处理的流程是如何的。
* 对应的函数是net_ops中的ndo_start_xmit函数
*/

/*
* 1.  通知协议栈,暂停向驱动传输数据。 因为当前要发送数据,驱动程序为了流控暂停接受数据  
* 2.  将包中的数据也就是skb写入到相应的寄存器中
* 3.  释放skb结构
* 4. 按理说前面停止传输队列,到最后要唤醒队列。但是却没有。 从注释上看,"We DO NOT call netif_wake_queue() here" 也就是在别去唤醒了传输队列.
*    从注释上看,应该在发送数据包函数之前唤醒队列的。那么发送数据包的函数在那里实现了?
*/
static netdev_tx_t net_send_packet(struct sk_buff *skb,struct net_device *dev)
{
	struct net_local *lp = netdev_priv(dev);
	unsigned long flags;


	/* keep the upload from being interrupted, since we
                  ask the chip to start transmitting before the
                  whole packet has been completely uploaded. */

	spin_lock_irqsave(&lp->lock, flags);
	
	//通知协议栈,暂停向驱动传输数据。 因为当前要发送数据,驱动程序为了流控暂停接受数据  
	netif_stop_queue(dev);

	/* initiate a transmit sequence */
	writeword(dev->base_addr, TX_CMD_PORT, lp->send_cmd);
	writeword(dev->base_addr, TX_LEN_PORT, skb->len);

	/* Test to see if the chip has allocated memory for the packet */
	if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) {
		/*
		 * Gasp!  It hasn't.  But that shouldn't happen since
		 * we're waiting for TxOk, so return 1 and requeue this packet.
		 */

		spin_unlock_irqrestore(&lp->lock, flags);
		if (net_debug) printk("cs89x0: Tx buffer not free!\n");
		return NETDEV_TX_BUSY;
	}
	/* Write the contents of the packet */
	// 将包中的数据也就是skb写入到相应的寄存器中
	writewords(dev->base_addr, TX_FRAME_PORT,skb->data,(skb->len+1) >>1);
	spin_unlock_irqrestore(&lp->lock, flags);
	dev->stats.tx_bytes += skb->len;
	//释放skb结构
	dev_kfree_skb (skb);

	/*
	 * We DO NOT call netif_wake_queue() here.
	 * We also DO NOT call netif_start_queue().
	 *
	 * Either of these would cause another bottom half run through
	 * net_send_packet() before this packet has fully gone out.  That causes
	 * us to hit the "Gasp!" above and the send is rescheduled.  it runs like
	 * a dog.  We just return and wait for the Tx completion interrupt handler
	 * to restart the netdevice layer
	 */

	return NETDEV_TX_OK;
}

/*其实当这次传输完成后,会触发一次发生完成的中断。 再那时候唤醒接受时被暂停的队列*/

/* The typical workload of the driver:
   Handle the network interface interrupts. */

static irqreturn_t net_interrupt(int irq, void *dev_id)
{
	struct net_device *dev = dev_id;
	struct net_local *lp;
	int ioaddr, status;
 	int handled = 0;

	ioaddr = dev->base_addr;
	lp = netdev_priv(dev);

	while ((status = readword(dev->base_addr, ISQ_PORT))) {
		if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
		handled = 1;
		switch(status & ISQ_EVENT_MASK) {
		case ISQ_RECEIVER_EVENT:
			/* Got a packet(s). */       /*当接受到一个包,也会触发一次中断。然后调用net_rx函数*/
			net_rx(dev);
			break;
		case ISQ_TRANSMITTER_EVENT:
			dev->stats.tx_packets++;
			netif_wake_queue(dev);	 /*这里唤醒队列*/
			if ((status & (	TX_OK |
					TX_LOST_CRS |
					TX_SQE_ERROR |
					TX_LATE_COL |
					TX_16_COL)) != TX_OK) {
				if ((status & TX_OK) == 0)
					dev->stats.tx_errors++;
				if (status & TX_LOST_CRS)
					dev->stats.tx_carrier_errors++;
				if (status & TX_SQE_ERROR)
					dev->stats.tx_heartbeat_errors++;
				if (status & TX_LATE_COL)
					dev->stats.tx_window_errors++;
				if (status & TX_16_COL)
					dev->stats.tx_aborted_errors++;
			}
			break;

	return IRQ_RETVAL(handled);
}

/*
* 1. 读取接受的状态
* 2. 读取接受的长度
* 3. 分频skb结构
* 4. 将数据填充到skb中的daya区
* 5. 取得该包的协议id。 当包交给上层需要知道你的包对应的是那个协议。才能对你这个包进行处理
* 6. 将skb提交到网络协议栈
*/

static void net_rx(struct net_device *dev)
{
	struct sk_buff *skb;
	int status, length;

	int ioaddr = dev->base_addr;
	//读取接受的状态
	//读取接受的长度
	status = readword(ioaddr, RX_FRAME_PORT);
	length = readword(ioaddr, RX_FRAME_PORT);

	if ((status & RX_OK) == 0) {
		count_rx_errors(status, dev);
		return;
	}

	/* Malloc up new buffer. */
	skb = dev_alloc_skb(length + 2);
	if (skb == NULL) {
#if 0		/* Again, this seems a cruel thing to do */
		printk(KERN_WARNING "%s: Memory squeeze, dropping packet.\n", dev->name);
#endif
		dev->stats.rx_dropped++;
		return;
	}
	skb_reserve(skb, 2);	/* longword align L3 header */

  // 将数据填充到skb中的data区
	readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);
	if (length & 1)
		skb->data[length-1] = readword(ioaddr, RX_FRAME_PORT);

	if (net_debug > 3) {
		printk(	"%s: received %d byte packet of type %x\n",
			dev->name, length,
			(skb->data[ETH_ALEN+ETH_ALEN] << 8) | skb->data[ETH_ALEN+ETH_ALEN+1]);
	}

	//取得该包的协议id。 当包交给上层需要知道你的包对应的是那个协议。才能对你这个包进行处理
	 skb->protocol=eth_type_trans(skb,dev);
  //将skb提交到网络协议栈
	netif_rx(skb);
	dev->stats.rx_packets++;
	dev->stats.rx_bytes += length;
}

/*这就是网络工作的大致过程,以后还会进一步分析细致的过程**/



  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值