首先在操作系统初始化的时候会执行sock_init函数,该函数执行proto_init和dev_init,并且注册一个下半部分的处理函数,用于数据包到达后的处理。
// 操作系统初始化时,在main函数里执行该函数
void sock_init(void)
{
int i;
printk("Swansea University Computer Society NET3.019\n");
/*
* Initialize all address (protocol) families.
*/
// 清空props数组
for (i = 0; i < NPROTO; ++i) pops[i] = NULL;
/*
* Initialize the protocols module.
*/
proto_init();
#ifdef CONFIG_NET
/*
* Initialize the DEV module.
*/
// 初始化链路层设备
dev_init();
/*
* And the bottom half handler
*/
/*
网络数据包到达时会触发系统中断,中断处理函数只是构造一个skb挂载到mac头的backlog队列,
然后中断处理结束,在下半部分的时候再处理这个数据包,处理函数即net_bh
*/
bh_base[NET_BH].routine= net_bh;
enable_bh(NET_BH);
#endif
}
dev_init函数代码如下
...还有很多设备
#if defined(CONFIG_PPP)
extern int ppp_init(struct device *);
static struct device ppp3_dev = {
"ppp3", 0x0, 0x0, 0x0, 0x0, 3, 0, 0, 0, 0, NEXT_DEV, ppp_init, };
static struct device ppp2_dev = {
"ppp2", 0x0, 0x0, 0x0, 0x0, 2, 0, 0, 0, 0, &ppp3_dev, ppp_init, };
static struct device ppp1_dev = {
"ppp1", 0x0, 0x0, 0x0, 0x0, 1, 0, 0, 0, 0, &ppp2_dev, ppp_init, };
static struct device ppp0_dev = {
"ppp0", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, &ppp1_dev, ppp_init, };
#undef NEXT_DEV
#define NEXT_DEV (&ppp0_dev)
#endif /* PPP */
#ifdef CONFIG_DUMMY
extern int dummy_init(struct device *dev);
static struct device dummy_dev = {
"dummy", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NEXT_DEV, dummy_init, };
# undef NEXT_DEV
# define NEXT_DEV (&dummy_dev)
#endif
extern int loopback_init(struct device *dev);
struct device loopback_dev = {
"lo", /* Software Loopback interface */
0x0, /* recv memory end */
0x0, /* recv memory start */
0x0, /* memory end */
0x0, /* memory start */
0, /* base I/O address */
0, /* IRQ */
0, 0, 0, /* flags */
NEXT_DEV, /* next device */
loopback_init /* loopback_init should set up the rest */
};
// 一个device链表
struct device *dev_base = &loopback_dev;
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
* present) and leaves us with a valid list of present and active devices.
*
* The PCMCIA code may need to change this a little, and add a pair
* of register_inet_device() unregister_inet_device() calls. This will be
* needed for ethernet as modules support.
*/
void dev_init(void)
{
struct device *dev, *dev2;
/*
* Add the devices.
* If the call to dev->init fails, the dev is removed
* from the chain disconnecting the device until the
* next reboot.
*/
dev2 = NULL;
for (dev = dev_base; dev != NULL; dev=dev->next)
{ /*
dev2记录上一个执行init成功的节点,如果有init函数并且执行失败,
1 开始的一个或多个节点都失败,即dev2等于null,则dev_base执行后续节点,剔除失败的节点
2 在中间节点执行失败,则dev2记录上一个成功的节点,dev2->next执行执行失败节点的下一个节点
*/
if (dev->init && dev->init(dev))
{
/*
* It failed to come up. Unhook it.
*/
if (dev2 == NULL)
dev_base = dev->next;
else
dev2->next = dev->next;
}
else
{
dev2 = dev;
}
}
}
由dev_init的代码可以知道,主要是执行dev_base链表中每个节点的init函数,这里以lookback设备为例。该设备的init函数代码如下。主要是设置发送函数、始化一些字段和一些配置。
/* Initialize the rest of the LOOPBACK device. */
int loopback_init(struct device *dev)
{
int i;
dev->mtu = 2000; /* MTU */
dev->tbusy = 0;
// 发送函数
dev->hard_start_xmit = loopback_xmit;
dev->open = NULL;
#if 1
dev->hard_header = eth_header;
// mac头长度
dev->hard_header_len = ETH_HLEN; /* 14 */
// 地址长度
dev->addr_len = ETH_ALEN; /* 6 */
dev->type = ARPHRD_ETHER; /* 0x0001 */
// 判断上传协议的函数
dev->type_trans = eth_type_trans;
dev->rebuild_header = eth_rebuild_header;
dev->open = loopback_open;
#else
dev->hard_header_length = 0;
dev->addr_len = 0;
dev->type = 0; /* loopback_type (0) */
dev->hard_header = NULL;
dev->type_trans = NULL;
dev->rebuild_header = NULL;
#endif
/* New-style flags. */
dev->flags = IFF_LOOPBACK|IFF_BROADCAST;
dev->family = AF_INET;
#ifdef CONFIG_INET
dev->pa_addr = in_aton("127.0.0.1");
dev->pa_brdaddr = in_aton("127.255.255.255");
dev->pa_mask = in_aton("255.0.0.0");
dev->pa_alen = sizeof(unsigned long);
#endif
dev->priv = kmalloc(sizeof(struct enet_statistics), GFP_KERNEL);
memset(dev->priv, 0, sizeof(struct enet_statistics));
dev->get_stats = get_stats;
/* Fill in the generic fields of the device structure. */
// 初始化发送队列
for (i = 0; i < DEV_NUMBUFFS; i++)
skb_queue_head_init(&dev->buffs[i]);
return(0);
};
接下来看一下proto_init函数的代码。proto_init函数遍历protocols数组,然后执行里面注册的函数。
struct net_proto protocols[] = {
#ifdef CONFIG_UNIX
{ "UNIX", unix_proto_init },
#endif
#if defined(CONFIG_IPX)||defined(CONFIG_ATALK)
{ "802.2", p8022_proto_init },
{ "SNAP", snap_proto_init },
#endif
#ifdef CONFIG_AX25
{ "AX.25", ax25_proto_init },
#endif
#ifdef CONFIG_INET
{ "INET", inet_proto_init },
#endif
#ifdef CONFIG_IPX
{ "IPX", ipx_proto_init },
#endif
#ifdef CONFIG_ATALK
{ "DDP", atalk_proto_init },
#endif
{ NULL, NULL }
};
void proto_init(void)
{ // 该变量在protocols.c中定义
extern struct net_proto protocols[]; /* Network protocols */
struct net_proto *pro;
/* Kick all configured protocols. */
pro = protocols;
// 执行每个协议的初始化函数,每个协议的初始化函数执行的操作是把协议本身的信息注册到pops数组,见sock_register函数
while (pro->name != NULL)
{
(*pro->init_func)(pro);
pro++;
}
/* We're all done... */
}
下面以INET的inet_proto_init函数为例。
// 系统根据用户层传入的协议获取数组中的一个元素,然后执行对应的函数
static struct proto_ops *pops[NPROTO];
/*
* This function is called by a protocol handler that wants to
* advertise its address family, and have it linked into the
* SOCKET module.
*/
// 注册协议簇对应的操作函数集,在pops数组变量中保存
int sock_register(int family, struct proto_ops *ops)
{
int i;
cli();
for(i = 0; i < NPROTO; i++)
{ // 找到一个空的slot
if (pops[i] != NULL)
continue;
// 注册
pops[i] = ops;
pops[i]->family = family;
sti();
return(i);
}
sti();
return(-ENOMEM);
}
// 记录ip层的上层协议,ip数据包根据ip头的协议字段获取该数组中的一个元素,然后执行元素中的函数进行数据包处理。
struct inet_protocol *inet_protos[MAX_INET_PROTOS] = {
NULL
};
void inet_add_protocol(struct inet_protocol *prot)
{
unsigned char hash;
struct inet_protocol *p2;
hash = prot->protocol & (MAX_INET_PROTOS - 1);
prot ->next = inet_protos[hash]; // 头插法插到inet_protos哈希链表
inet_protos[hash] = prot;
prot->copy = 0;
/* Set the copy bit if we need to. */
p2 = (struct inet_protocol *) prot->next;
while(p2 != NULL) {
if (p2->protocol == prot->protocol) {
prot->copy = 1;
break;
}
p2 = (struct inet_protocol *) prot->next;
}
}
void inet_add_protocol(struct inet_protocol *prot)
{
unsigned char hash;
struct inet_protocol *p2;
hash = prot->protocol & (MAX_INET_PROTOS - 1);
prot ->next = inet_protos[hash]; // 头插法插到inet_protos哈希链表
inet_protos[hash] = prot;
prot->copy = 0;
/* Set the copy bit if we need to. */
p2 = (struct inet_protocol *) prot->next;
while(p2 != NULL) {
if (p2->protocol == prot->protocol) {
prot->copy = 1;
break;
}
p2 = (struct inet_protocol *) prot->next;
}
}
void inet_proto_init(struct net_proto *pro)
{
struct inet_protocol *p;
int i;
/*
* Tell SOCKET that we are alive...
*/
(void) sock_register(inet_proto_ops.family, &inet_proto_ops);
seq_offset = CURRENT_TIME*250;
/*
* Add all the protocols.
*/
for(i = 0; i < SOCK_ARRAY_SIZE; i++)
{
tcp_prot.sock_array[i] = NULL;
udp_prot.sock_array[i] = NULL;
raw_prot.sock_array[i] = NULL;
}
tcp_prot.inuse = 0;
tcp_prot.highestinuse = 0;
udp_prot.inuse = 0;
udp_prot.highestinuse = 0;
raw_prot.inuse = 0;
raw_prot.highestinuse = 0;
printk("IP Protocols: ");
for(p = inet_protocol_base; p != NULL;)
{
struct inet_protocol *tmp = (struct inet_protocol *) p->next;
inet_add_protocol(p);
printk("%s%s",p->name,tmp?", ":"\n");
p = tmp;
}
/*
* Set the ARP module up
*/
arp_init();
/*
* Set the IP module up
*/
ip_init();
}
注册完各种协议后,最后执行arp_init和ip_init函数,这个两个函数的功能类似,下面只讲arp_init函数的代码
static struct packet_type arp_packet_type =
{
0,
NULL, /* All devices */
arp_rcv,
NULL,
NULL
};
static struct notifier_block arp_dev_notifier={
arp_device_event,
NULL,
0
};
static struct timer_list arp_timer = { NULL, NULL, ARP_CHECK_INTERVAL, 0L, &arp_check_expire };
void arp_init (void)
{
/* Register the packet type */
// 注册到mac层的arp_packet_type链表,mac层接收到数据包时遍历该链表进行数据的分发
arp_packet_type.type=htons(ETH_P_ARP);
dev_add_pack(&arp_packet_type);
/* Start with the regular checks for expired arp entries. */
// 定时检查arp缓存是否过期
add_timer(&arp_timer);
/* Register for device down reports */
// 监听mac头设备的状态,状态变化执行对应的回调
register_netdevice_notifier(&arp_dev_notifier);
}
下面看一下dev_add_pack的代码和mac头收到数据包时的处理代码。
// 新增一个节点到链表,该链表用于管理上层协议
void dev_add_pack(struct packet_type *pt)
{
if(pt->type==htons(ETH_P_ALL))
dev_nit++;
pt->next = ptype_base;
ptype_base = pt;
}
/*
* When we are called the queue is ready to grab, the interrupts are
* on and hardware can interrupt and queue to the receive queue a we
* run with no problems.
* This is run as a bottom half after an interrupt handler that does
* mark_bh(NET_BH);
*/
void net_bh(void *tmp)
{
struct sk_buff *skb;
struct packet_type *ptype;
struct packet_type *pt_prev;
unsigned short type;
/*
* Atomically check and mark our BUSY state.
*/
if (set_bit(1, (void*)&in_bh))
return;
/*
* Can we send anything now? We want to clear the
* decks for any more sends that get done as we
* process the input.
*/
dev_transmit();
/*
* Any data left to process. This may occur because a
* mark_bh() is done after we empty the queue including
* that from the device which does a mark_bh() just after
*/
cli();
/*
* While the queue is not empty
*/
while((skb=skb_dequeue(&backlog))!=NULL)
{
/*
* We have a packet. Therefore the queue has shrunk
*/
backlog_size--;
sti();
/*
* Bump the pointer to the next structure.
* This assumes that the basic 'skb' pointer points to
* the MAC header, if any (as indicated by its "length"
* field). Take care now!
*/
// 指向ip头
skb->h.raw = skb->data + skb->dev->hard_header_len;
// 减去mac头长度
skb->len -= skb->dev->hard_header_len;
/*
* Fetch the packet protocol ID. This is also quite ugly, as
* it depends on the protocol driver (the interface itself) to
* know what the type is, or where to get it from. The Ethernet
* interfaces fetch the ID from the two bytes in the Ethernet MAC
* header (the h_proto field in struct ethhdr), but other drivers
* may either use the ethernet ID's or extra ones that do not
* clash (eg ETH_P_AX25). We could set this before we queue the
* frame. In fact I may change this when I have time.
*/
// 判断上层协议
type = skb->dev->type_trans(skb, skb->dev);
/*
* We got a packet ID. Now loop over the "known protocols"
* table (which is actually a linked list, but this will
* change soon if I get my way- FvK), and forward the packet
* to anyone who wants it.
*
* [FvK didn't get his way but he is right this ought to be
* hashed so we typically get a single hit. The speed cost
* here is minimal but no doubt adds up at the 4,000+ pkts/second
* rate we can hit flat out]
*/
pt_prev = NULL;
for (ptype = ptype_base; ptype != NULL; ptype = ptype->next)
{
if ((ptype->type == type || ptype->type == htons(ETH_P_ALL)) && (!ptype->dev || ptype->dev==skb->dev))
{
/*
* We already have a match queued. Deliver
* to it and then remember the new match
*/
// 如果有匹配的项则要单独复制一份skb
if(pt_prev)
{
struct sk_buff *skb2;
skb2=skb_clone(skb, GFP_ATOMIC);
/*
* Kick the protocol handler. This should be fast
* and efficient code.
*/
if(skb2)
pt_prev->func(skb2, skb->dev, pt_prev);
}
/* Remember the current last to do */
// 记录最近匹配的项
pt_prev=ptype;
}
} /* End of protocol list loop */
/*
* Is there a last item to send to ?
*/
// 大于一个匹配项,则把skb复制给最后一项,否则销毁skb
if(pt_prev)
pt_prev->func(skb, skb->dev, pt_prev);
/*
* Has an unknown packet has been received ?
*/
else
kfree_skb(skb, FREE_WRITE);
/*
* Again, see if we can transmit anything now.
* [Ought to take this out judging by tests it slows
* us down not speeds us up]
*/
dev_transmit();
cli();
} /* End of queue loop */
/*
* We have emptied the queue
*/
in_bh = 0;
sti();
/*
* One last output flush.
*/
dev_transmit();
}