Linux中报文是如何从网卡传递到相应协议的

最新推荐文章于 2021-04-30 07:28:58 发布

wswifth

最新推荐文章于 2021-04-30 07:28:58 发布

阅读量1.1k

点赞数

分类专栏：计算机网络 TCP/IP协议族文章标签： linux struct protocols destructor buffer list

本文链接：https://blog.csdn.net/wswifth/article/details/5652204

版权

计算机网络 TCP/IP协议族专栏收录该内容

7 篇文章 0 订阅

订阅专栏

报文是如何从网卡传递到相应协议的
------------------------------------

先来看一看网络部分是如何被初始化的. 下面是函数被调用的过程:

start_kernel -> init -> do_basic_setup -> sock_init -> proto-init
 inet_proto_init -> ip_init -> dev_add_pack

下面的结构定义了网络协议的初始化入口:
struct net_proto
{
const char *name; /* Protocol name */
void (*init_func)(struct net_proto *); /* Bootstrap */
} protocols[];
每一个协议提供了一个自己的init_func. 如IP提供了ip_init.

dev_add_pack完成了实际的协议添加过程. 系统维护了两个协议表. 一个是单向链表,
另一个是hash表(使用了桶形的hash表).

每一个协议用一个struct packet_type来描述, 其中的func是其入口函数. 当系统从
读到一个报文就会调用相应协议的func来完成实际的处理工作.

struct packet_type
{
unsigned short type; /* This is really htons(ether_type). */
struct device *dev; /* NULL is wildcarded here */
int (*func) (struct sk_buff *, struct device *,
struct packet_type *);
void *data; /* Private to the packet type */
struct packet_type *next;
};

// 协议的hash表
struct packet_type *ptype_base[16]; /* Hashed types */
// 协议的单向链表的头指针
struct packet_type *ptype_all = NULL; /* Taps */

dev_add_pack完成的工作其实很简单, 它将一个struct packet_type指针加入到相应
的链表中. 请看源码:
void dev_add_pack(struct packet_type *pt)
{
int hash;

//由pt->type来判断加到那一个链表中
if(pt->type == htons(ETH_P_ALL))
{

netdev_nit++;
// 添加到单向链表中
pt->next = ptype_all;
ptype_all = pt;
}
else
{
// 添加到hash表中
hash = ntohs(pt->type)&15;
~~~~~~~~~~~~~~~~~~~~~~~~~~
//这是它的hash算法, 简单的只取低4位
pt->next = ptype_base[hash];
ptype_base[hash] = pt;
}
}
ip_init使用dev_add_pack完成了实际的添加过程.

下面是核心数据结构sk_buff
struct sk_buff {
struct sk_buff * next; /* Next buffer in list */
struct sk_buff * prev; /* Previous buffer in list */
struct sk_buff_head * list; /* List we are on */
struct sock *sk; /* Socket we are owned by */
struct timeval stamp; /* Time we arrived */
struct device *dev; /* Device we arrived on/are leaving by */

/* Transport layer header */
union
{
struct tcphdr *th;
struct udphdr *uh;
struct icmphdr *icmph;
struct igmphdr *igmph;
struct iphdr *ipiph;
struct spxhdr *spxh;
unsigned char *raw;
} h;

/* Network layer header */
union
{
struct iphdr *iph;
struct ipv6hdr *ipv6h;
struct arphdr *arph;
struct ipxhdr *ipxh;
unsigned char *raw;
} nh;

/* Link layer header */
union
{
struct ethhdr *ethernet;
unsigned char *raw;
} mac;

struct dst_entry *dst;

char cb[48];

unsigned int len; /* Length of actual data */
unsigned int csum; /* Checksum */
volatile char used; /* Data moved to user and not MSG_PEEK */
unsigned char is_clone, /* We are a clone */
cloned, /* head may be cloned(check refcnt to be sure)*/
pkt_type, /* Packet class */
pkt_bridged, /* Tracker for bridging */
ip_summed; /* Driver fed us an IP checksum */
__u32 priority; /* Packet queueing priority */
atomic_t users; /* User count - see datagram.c,tcp.c */
unsigned short protocol; /* Packet protocol from driver. */
unsigned short security; /* Security level of packet */
unsigned int truesize; /* Buffer size */

unsigned char *head; /* Head of buffer */
unsigned char *data; /* Data head pointer */
unsigned char *tail; /* Tail pointer */
unsigned char *end; /* End pointer */
void (*destructor)(struct sk_buff *); /* Destruct function */
#ifdef CONFIG_IP_FIREWALL
 __u32 fwmark; /* Label made by fwchains, used by pktsched */
#endif
#if defined(CONFIG_SHAPER) || defined(CONFIG_SHAPER_MODULE)
__u32 shapelatency; /* Latency on frame */
__u32 shapeclock; /* Time it should go out */
__u32 shapelen; /* Frame length in clocks */
__u32 shapestamp; /* Stamp for shaper */
__u16 shapepend; /* Pending */
#endif

#if defined(CONFIG_HIPPI)
union{
__u32 ifield;
} private;
#endif
};

* arch/i386/kernel/irq.c

void do_IRQ(unsigned int irq, struct pt_regs *regs)
{
 int cpu = smp_processor_id();

 kstat.irqs[cpu][irq]++;
 irq_desc[irq].handler->handle(irq, regs);

 /*
 * This should be conditional: we should really get
 * a return code from the irq handler to tell us
 * whether the handler wants us to do software bottom
 * half handling or not..
 */
 //每次处理中断, 都会进行底半处理. 现在还不符合底半的原意.
 if (1) {
 if (bh_active & bh_mask)
 do_bottom_half();
 }
 __sti();/*VY*/
}

* net/core/dev.c

/*
* Device drivers call our routines to queue packets here. We empty the
* queue in the bottom half handler.
*/
//所有的报文先存放在这里, 随后由再net_bh发给相应的网络层模块, 如 ip_input.
static struct sk_buff_head backlog;

/*
* Receive a packet from a device driver and queue it for the upper
* (protocol) levels. It always succeeds.
*/
/*
* 简单的将新到的包, 插到backlog中.
* 网卡驱动程序会调用这个函数
*/
void netif_rx(struct sk_buff *skb)
{
if(skb->stamp.tv_sec==0)
get_fast_time(&skb->stamp); //取当前的时间

/* The code is rearranged so that the path is the most
 short when CPU is congested, but is still operating.
*/

if (backlog.qlen <= netdev_max_backlog) {
if (backlog.qlen) {
if (netdev_dropping == 0) {
skb_queue_tail(&backlog, skb);
mark_bh(NET_BH);
return;
}
atomic_inc(&netdev_rx_dropped);
kfree_skb(skb);
return;
}

netdev_dropping = 0;

skb_queue_tail(&backlog, skb);
mark_bh(NET_BH);
return;
}
netdev_dropping = 1;
atomic_inc(&netdev_rx_dropped);
kfree_skb(skb);
}

/*
* 网络的底半处理程序, 将报文由backlog中取出, 发给相应的网络层模块,
* 如 ip_input. 有一点请大家注意一个报文可以被多个协议层所处理. 这
* 一点很重要.
*/
void net_bh(void)
{
struct packet_type *ptype;
struct packet_type *pt_prev;
unsigned short type;
unsigned long start_time = jiffies;

NET_PROFILE_ENTER(net_bh);
/*
* Can we send anything now? We want to clear the
* decks for any more sends that get done as we
* process the input. This also minimises the
* latency on a transmit interrupt bh.
*/

if (qdisc_head.forw != &qdisc_head)
qdisc_run_queues();

/*
* Any data left to process. This may occur because a
* mark_bh() is done after we empty the queue including
* that from the device which does a mark_bh() just after
*/

/*
* While the queue is not empty..
*
* Note that the queue never shrinks due to
* an interrupt, so we can do this test without
* disabling interrupts.
*/

while (!skb_queue_empty(&backlog))
{
struct sk_buff * skb;

/* Give chance to other bottom halves to run */
if (jiffies - start_time > 1)
goto net_bh_break;

/* We have a packet. Therefore the queue has shrunk */
skb = skb_dequeue(&backlog); //从backlog中取出一个包

/*
* Bump the pointer to the next structure.
*
* On entry to the protocol layer. skb->data and
* skb->nh.raw point to the MAC and encapsulated data
*/

/* XXX until we figure out every place to modify.. */
skb->h.raw = skb->nh.raw = skb->data;

if (skb->mac.raw < skb->head || skb->mac.raw > skb->data) {
printk(KERN_CRIT "%s: wrong mac.raw ptr, proto=%04x
",
 skb->dev->name, skb->protocol);
kfree_skb(skb);
continue;
}

/* Fetch the packet protocol ID. */

type = skb->protocol; //这里是指网络层协议

/*
* We got a packet ID. Now loop over the "known protocols"
* list. There are two lists. The ptype_all list of taps (normally empty)
* and the main protocol list which is hashed perfectly for normal protocols.
*/

pt_prev = NULL;
for (ptype = ptype_all; ptype!=NULL; ptype=ptype->next)
{
if (!ptype->dev || ptype->dev == skb->dev) {
if(pt_prev)
{
struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC);
if(skb2)
pt_prev->func(skb2,skb->dev, pt_prev);
}
pt_prev=ptype;
}
}

for (ptype = ptype_base[ntohs(type)&15]; ptype != NULL; ptype = ptype->next)
{
if (ptype->type == type && (!ptype->dev || ptype->dev==skb->dev))
{
/*
* We already have a match queued. Deliver
* to it and then remember the new match
*/
if(pt_prev)
{
struct sk_buff *skb2;

skb2=skb_clone(skb, GFP_ATOMIC);

/*
* Kick the protocol handler. This should be fast
* and efficient code.
*/

if(skb2)
pt_prev->func(skb2, skb->dev, pt_prev);
}
/* Remember the current last to do */
pt_prev=ptype;
}
} /* End of protocol list loop */

/* Is there a last item to send to ? */

if(pt_prev)
pt_prev->func(skb, skb->dev, pt_prev);
/* Has an unknown packet has been received ? */

else {
kfree_skb(skb);
}
} /* End of queue loop */

/* We have emptied the queue */
/* One last output flush. */

if (qdisc_head.forw != &qdisc_head)
qdisc_run_queues();

netdev_dropping = 0;

NET_PROFILE_LEAVE(net_bh);
return;

net_bh_break:
mark_bh(NET_BH);
NET_PROFILE_LEAVE(net_bh);
return;
}

为了理清头绪, 我重新描述一下上面的过程:

网卡驱动程序调用netif_rx将新收到的报文存在backlog队列中.
在底半处理中, net_bh调用相应的协议模块来处理报文. 而目前
linux的实现中, 每次中断都会调用底半处理.

wswifth

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
Linux中报文是如何从网卡传递到相应协议的

 报文是如何从网卡传递到相应协议的 ------------------------------------ 先来看一看网络部分是如何被初始化的. 下面是函数被调用的过程: start_kernel -> init -> do_basic_setup -> sock_init -> proto-init inet_proto_init -> ip_init -> dev_add_pack 
复制链接

扫一扫