tcp/ip协议栈初始化源码解析

在这里插入图片描述
首先在操作系统初始化的时候会执行sock_init函数,该函数执行proto_init和dev_init,并且注册一个下半部分的处理函数,用于数据包到达后的处理。

// 操作系统初始化时,在main函数里执行该函数
void sock_init(void)
{
	int i;

	printk("Swansea University Computer Society NET3.019\n");

	/*
	 *	Initialize all address (protocol) families. 
	 */
	// 清空props数组 
	for (i = 0; i < NPROTO; ++i) pops[i] = NULL;

	/*
	 *	Initialize the protocols module. 
	 */

	proto_init();

#ifdef CONFIG_NET
	/* 
	 *	Initialize the DEV module. 
	 */
	// 初始化链路层设备
	dev_init();
  
	/*
	 *	And the bottom half handler 
	 */
	/*
		网络数据包到达时会触发系统中断,中断处理函数只是构造一个skb挂载到mac头的backlog队列,
		然后中断处理结束,在下半部分的时候再处理这个数据包,处理函数即net_bh
	*/
	bh_base[NET_BH].routine= net_bh;
	enable_bh(NET_BH);
#endif  
}

dev_init函数代码如下

...还有很多设备
#if defined(CONFIG_PPP)
extern int ppp_init(struct device *);
static struct device ppp3_dev = {
    "ppp3", 0x0, 0x0, 0x0, 0x0, 3, 0, 0, 0, 0, NEXT_DEV,  ppp_init, };
static struct device ppp2_dev = {
    "ppp2", 0x0, 0x0, 0x0, 0x0, 2, 0, 0, 0, 0, &ppp3_dev, ppp_init, };
static struct device ppp1_dev = {
    "ppp1", 0x0, 0x0, 0x0, 0x0, 1, 0, 0, 0, 0, &ppp2_dev, ppp_init, };
static struct device ppp0_dev = {
    "ppp0", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, &ppp1_dev, ppp_init, };
#undef NEXT_DEV
#define NEXT_DEV (&ppp0_dev)
#endif   /* PPP */

#ifdef CONFIG_DUMMY
    extern int dummy_init(struct device *dev);
    static struct device dummy_dev = {
	"dummy", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NEXT_DEV, dummy_init, };
#   undef	NEXT_DEV
#   define	NEXT_DEV	(&dummy_dev)
#endif

extern int loopback_init(struct device *dev);
struct device loopback_dev = {
	"lo",			/* Software Loopback interface		*/
	0x0,			/* recv memory end			*/
	0x0,			/* recv memory start			*/
	0x0,			/* memory end				*/
	0x0,			/* memory start				*/
	0,			/* base I/O address			*/
	0,			/* IRQ					*/
	0, 0, 0,		/* flags				*/
	NEXT_DEV,		/* next device				*/
	loopback_init		/* loopback_init should set up the rest	*/
};
// 一个device链表
struct device *dev_base = &loopback_dev;

	/*
 *	Initialize the DEV module. At boot time this walks the device list and
 *	unhooks any devices that fail to initialise (normally hardware not 
 *	present) and leaves us with a valid list of present and active devices.
 *
 *	The PCMCIA code may need to change this a little, and add a pair
 *	of register_inet_device() unregister_inet_device() calls. This will be
 *	needed for ethernet as modules support.
 */
 
void dev_init(void)
{
	struct device *dev, *dev2;

	/*
	 *	Add the devices.
	 *	If the call to dev->init fails, the dev is removed
	 *	from the chain disconnecting the device until the
	 *	next reboot.
	 */
	 
	dev2 = NULL;
	for (dev = dev_base; dev != NULL; dev=dev->next) 
	{	/*
			dev2记录上一个执行init成功的节点,如果有init函数并且执行失败,
			1 开始的一个或多个节点都失败,即dev2等于null,则dev_base执行后续节点,剔除失败的节点
			2 在中间节点执行失败,则dev2记录上一个成功的节点,dev2->next执行执行失败节点的下一个节点
		*/
		if (dev->init && dev->init(dev)) 
		{
			/*
			 *	It failed to come up. Unhook it.
			 */
			 
			if (dev2 == NULL) 
				dev_base = dev->next;
			else 
				dev2->next = dev->next;
		} 
		else
		{
			dev2 = dev;
		}
	}
}

由dev_init的代码可以知道,主要是执行dev_base链表中每个节点的init函数,这里以lookback设备为例。该设备的init函数代码如下。主要是设置发送函数、始化一些字段和一些配置。

/* Initialize the rest of the LOOPBACK device. */
int loopback_init(struct device *dev)
{
  int i;

  dev->mtu		= 2000;			/* MTU			*/
  dev->tbusy		= 0;
  // 发送函数
  dev->hard_start_xmit	= loopback_xmit;
  dev->open		= NULL;
#if 1
  dev->hard_header	= eth_header;
  // mac头长度
  dev->hard_header_len	= ETH_HLEN;		/* 14			*/
  // 地址长度
  dev->addr_len		= ETH_ALEN;		/* 6			*/
  dev->type		= ARPHRD_ETHER;		/* 0x0001		*/
  // 判断上传协议的函数
  dev->type_trans	= eth_type_trans;
  dev->rebuild_header	= eth_rebuild_header;
  dev->open		= loopback_open;
#else
  dev->hard_header_length = 0;
  dev->addr_len		= 0;
  dev->type		= 0;			/* loopback_type (0)	*/
  dev->hard_header	= NULL;
  dev->type_trans	= NULL;
  dev->rebuild_header	= NULL;
#endif

  /* New-style flags. */
  dev->flags		= IFF_LOOPBACK|IFF_BROADCAST;
  dev->family		= AF_INET;
#ifdef CONFIG_INET    
  dev->pa_addr		= in_aton("127.0.0.1");
  dev->pa_brdaddr	= in_aton("127.255.255.255");
  dev->pa_mask		= in_aton("255.0.0.0");
  dev->pa_alen		= sizeof(unsigned long);
#endif  
  dev->priv = kmalloc(sizeof(struct enet_statistics), GFP_KERNEL);
  memset(dev->priv, 0, sizeof(struct enet_statistics));
  dev->get_stats = get_stats;

  /* Fill in the generic fields of the device structure. */
  // 初始化发送队列
  for (i = 0; i < DEV_NUMBUFFS; i++)
	skb_queue_head_init(&dev->buffs[i]);
  
  return(0);
};

接下来看一下proto_init函数的代码。proto_init函数遍历protocols数组,然后执行里面注册的函数。

struct net_proto protocols[] = {
#ifdef	CONFIG_UNIX
  { "UNIX",	unix_proto_init	},
#endif
#if defined(CONFIG_IPX)||defined(CONFIG_ATALK)  
  { "802.2",	p8022_proto_init },
  { "SNAP",	snap_proto_init },
#endif
#ifdef CONFIG_AX25  
  { "AX.25",	ax25_proto_init },
#endif  
#ifdef	CONFIG_INET
  { "INET",	inet_proto_init	},
#endif
#ifdef  CONFIG_IPX
  { "IPX",	ipx_proto_init },
#endif
#ifdef CONFIG_ATALK
  { "DDP",	atalk_proto_init },
#endif
  { NULL,	NULL		}
};

void proto_init(void)
{	// 该变量在protocols.c中定义
	extern struct net_proto protocols[];	/* Network protocols */
	struct net_proto *pro;

	/* Kick all configured protocols. */
	pro = protocols;
	// 执行每个协议的初始化函数,每个协议的初始化函数执行的操作是把协议本身的信息注册到pops数组,见sock_register函数
	while (pro->name != NULL) 
	{
		(*pro->init_func)(pro);
		pro++;
	}
	/* We're all done... */
}

下面以INET的inet_proto_init函数为例。

// 系统根据用户层传入的协议获取数组中的一个元素,然后执行对应的函数
static struct proto_ops *pops[NPROTO];
/*
 *	This function is called by a protocol handler that wants to
 *	advertise its address family, and have it linked into the
 *	SOCKET module.
 */
// 注册协议簇对应的操作函数集,在pops数组变量中保存 
int sock_register(int family, struct proto_ops *ops)
{
	int i;

	cli();
	for(i = 0; i < NPROTO; i++) 
	{	// 找到一个空的slot
		if (pops[i] != NULL) 
			continue;
		// 注册
		pops[i] = ops;
		pops[i]->family = family;
		sti();
		return(i);
	}
	sti();
	return(-ENOMEM);
}
// 记录ip层的上层协议,ip数据包根据ip头的协议字段获取该数组中的一个元素,然后执行元素中的函数进行数据包处理。
struct inet_protocol *inet_protos[MAX_INET_PROTOS] = {
  NULL
};
void inet_add_protocol(struct inet_protocol *prot)
{
  unsigned char hash;
  struct inet_protocol *p2;

  hash = prot->protocol & (MAX_INET_PROTOS - 1);
  prot ->next = inet_protos[hash]; // 头插法插到inet_protos哈希链表
  inet_protos[hash] = prot;
  prot->copy = 0;

  /* Set the copy bit if we need to. */
  p2 = (struct inet_protocol *) prot->next;
  while(p2 != NULL) {
	if (p2->protocol == prot->protocol) {
		prot->copy = 1;
		break;
	}
	p2 = (struct inet_protocol *) prot->next;
  }
}

void inet_add_protocol(struct inet_protocol *prot)
{
  unsigned char hash;
  struct inet_protocol *p2;

  hash = prot->protocol & (MAX_INET_PROTOS - 1);
  prot ->next = inet_protos[hash]; // 头插法插到inet_protos哈希链表
  inet_protos[hash] = prot;
  prot->copy = 0;

  /* Set the copy bit if we need to. */
  p2 = (struct inet_protocol *) prot->next;
  while(p2 != NULL) {
	if (p2->protocol == prot->protocol) {
		prot->copy = 1;
		break;
	}
	p2 = (struct inet_protocol *) prot->next;
  }
}

void inet_proto_init(struct net_proto *pro)
{
	struct inet_protocol *p;
	int i;
	/*
	 *	Tell SOCKET that we are alive... 
	 */
   
  	(void) sock_register(inet_proto_ops.family, &inet_proto_ops);

  	seq_offset = CURRENT_TIME*250;
	/*
	 *	Add all the protocols. 
	 */
	for(i = 0; i < SOCK_ARRAY_SIZE; i++) 
	{
		tcp_prot.sock_array[i] = NULL;
		udp_prot.sock_array[i] = NULL;
		raw_prot.sock_array[i] = NULL;
  	}
	tcp_prot.inuse = 0;
	tcp_prot.highestinuse = 0;
	udp_prot.inuse = 0;
	udp_prot.highestinuse = 0;
	raw_prot.inuse = 0;
	raw_prot.highestinuse = 0;
	printk("IP Protocols: ");
	for(p = inet_protocol_base; p != NULL;) 
	{
		struct inet_protocol *tmp = (struct inet_protocol *) p->next;
		inet_add_protocol(p);
		printk("%s%s",p->name,tmp?", ":"\n");
		p = tmp;
	}
	/*
	 *	Set the ARP module up
	 */
	arp_init();
  	/*
  	 *	Set the IP module up
  	 */
	ip_init();
}

注册完各种协议后,最后执行arp_init和ip_init函数,这个两个函数的功能类似,下面只讲arp_init函数的代码

static struct packet_type arp_packet_type =
{
	0,	
	NULL,		/* All devices */
	arp_rcv,
	NULL,
	NULL
};

static struct notifier_block arp_dev_notifier={
	arp_device_event,
	NULL,
	0
};
static struct timer_list arp_timer = { NULL, NULL, ARP_CHECK_INTERVAL, 0L, &arp_check_expire };

void arp_init (void)
{
	/* Register the packet type */
	// 注册到mac层的arp_packet_type链表,mac层接收到数据包时遍历该链表进行数据的分发
	arp_packet_type.type=htons(ETH_P_ARP);
	dev_add_pack(&arp_packet_type);
	/* Start with the regular checks for expired arp entries. */
	// 定时检查arp缓存是否过期
	add_timer(&arp_timer);
	/* Register for device down reports */
	// 监听mac头设备的状态,状态变化执行对应的回调
	register_netdevice_notifier(&arp_dev_notifier);
}

下面看一下dev_add_pack的代码和mac头收到数据包时的处理代码。

// 新增一个节点到链表,该链表用于管理上层协议
void dev_add_pack(struct packet_type *pt)
{
	if(pt->type==htons(ETH_P_ALL))
		dev_nit++;
	pt->next = ptype_base;
	ptype_base = pt;
}
/*
 *	When we are called the queue is ready to grab, the interrupts are
 *	on and hardware can interrupt and queue to the receive queue a we
 *	run with no problems.
 *	This is run as a bottom half after an interrupt handler that does
 *	mark_bh(NET_BH);
 */
 
void net_bh(void *tmp)
{
	struct sk_buff *skb;
	struct packet_type *ptype;
	struct packet_type *pt_prev;
	unsigned short type;

	/*
	 *	Atomically check and mark our BUSY state. 
	 */

	if (set_bit(1, (void*)&in_bh))
		return;

	/*
	 *	Can we send anything now? We want to clear the
	 *	decks for any more sends that get done as we
	 *	process the input.
	 */

	dev_transmit();
  
	/*
	 *	Any data left to process. This may occur because a
	 *	mark_bh() is done after we empty the queue including
	 *	that from the device which does a mark_bh() just after
	 */

	cli();
	
	/*
	 *	While the queue is not empty
	 */
	 
	while((skb=skb_dequeue(&backlog))!=NULL)
	{
		/*
		 *	We have a packet. Therefore the queue has shrunk
		 */
  		backlog_size--;

		sti();
		
	       /*
		*	Bump the pointer to the next structure.
		*	This assumes that the basic 'skb' pointer points to
		*	the MAC header, if any (as indicated by its "length"
		*	field).  Take care now!
		*/
		// 指向ip头
		skb->h.raw = skb->data + skb->dev->hard_header_len;
		// 减去mac头长度
		skb->len -= skb->dev->hard_header_len;

	       /*
		* 	Fetch the packet protocol ID.  This is also quite ugly, as
		* 	it depends on the protocol driver (the interface itself) to
		* 	know what the type is, or where to get it from.  The Ethernet
		* 	interfaces fetch the ID from the two bytes in the Ethernet MAC
		*	header (the h_proto field in struct ethhdr), but other drivers
		*	may either use the ethernet ID's or extra ones that do not
		*	clash (eg ETH_P_AX25). We could set this before we queue the
		*	frame. In fact I may change this when I have time.
		*/
		// 判断上层协议
		type = skb->dev->type_trans(skb, skb->dev);

		/*
		 *	We got a packet ID.  Now loop over the "known protocols"
		 *	table (which is actually a linked list, but this will
		 *	change soon if I get my way- FvK), and forward the packet
		 *	to anyone who wants it.
		 *
		 *	[FvK didn't get his way but he is right this ought to be
		 *	hashed so we typically get a single hit. The speed cost
		 *	here is minimal but no doubt adds up at the 4,000+ pkts/second
		 *	rate we can hit flat out]
		 */
		pt_prev = NULL;
		for (ptype = ptype_base; ptype != NULL; ptype = ptype->next) 
		{
			if ((ptype->type == type || ptype->type == htons(ETH_P_ALL)) && (!ptype->dev || ptype->dev==skb->dev))
			{
				/*
				 *	We already have a match queued. Deliver
				 *	to it and then remember the new match
				 */
				// 如果有匹配的项则要单独复制一份skb
				if(pt_prev)
				{
					struct sk_buff *skb2;

					skb2=skb_clone(skb, GFP_ATOMIC);

					/*
					 *	Kick the protocol handler. This should be fast
					 *	and efficient code.
					 */

					if(skb2)
						pt_prev->func(skb2, skb->dev, pt_prev);
				}
				/* Remember the current last to do */
				// 记录最近匹配的项
				pt_prev=ptype;
			}
		} /* End of protocol list loop */
		
		/*
		 *	Is there a last item to send to ?
		 */
		// 大于一个匹配项,则把skb复制给最后一项,否则销毁skb
		if(pt_prev)
			pt_prev->func(skb, skb->dev, pt_prev);
		/*
		 * 	Has an unknown packet has been received ?
		 */
	 
		else
			kfree_skb(skb, FREE_WRITE);

		/*
		 *	Again, see if we can transmit anything now. 
		 *	[Ought to take this out judging by tests it slows
		 *	 us down not speeds us up]
		 */

		dev_transmit();
		cli();
  	}	/* End of queue loop */
  	
  	/*
  	 *	We have emptied the queue
  	 */
  	 
  	in_bh = 0;
	sti();
	
	/*
	 *	One last output flush.
	 */
	 
	dev_transmit();
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值