Linux TCP/IP protocol stack, kernel version 1.0
转自:http://hi.baidu.com/linux%5Fkernel/blog/item/a2de38124527a151f819b882.html
数据的传递
2006年08月11日 星期五 下午 02:37
由于网卡接收数据的工作流程,及Linux内核的数据接收的处理都有多种方式,所以,网络上的数据包从到达网卡直至被TCP/IP协议栈处理的流程也会有多种方式。在这里,我们并不关心这些不同流程的细节及其差异,我们只给出一个一般性的描述,而把重点放在my_inet模块的数据接收的实现上。
网卡接收到数据,先申请sk_buff,把数据DMA到sk_buff,然后调用netif_rx,netif_rx只是简单地把数据放到一个由结构体 struct softnet_data描述的输入队列便返回。后续的在softirq中断处理中,会调用函数net_rx_action,接着会调用 process_backlog,最后到netif_receive_skb。
netif_receive_skb做了什么?不急,在这之前,先了解一些东西,以扫除理解上的障碍。
结构体struct packet_type用于在协议栈与网络设备之间构筑一个数据传递的桥梁:
struct packet_type{
__be16 type;
struct net_device *dev;
int (*func) (struct sk_buff *, struct net_device *,
struct packet_type *, struct net_device *);
void *af_packet_priv;
struct list_head list;
};
type是一个网络字节序的协议类型,在PF_INET域常用的是两种:ETH_P_IP和ETH_P_ARP。dev是期望数据包来自该设备,一般令 dev等于NULL,表示并不关心数据包来自哪个网络接口。func是数据包的处理函数。
有两个全局的变量:
static struct list_head ptype_base[16];
static struct list_head ptype_all;
对于type值为ETH_P_ALL的pakcet_type,如果要注册到内核中,会被放在ptype_all中构成一个链表,其它的,则以type为哈希键,在ptype_base数组中找到一项,然后加入链表。
在my_inet模块中,我们定义了两个packet_type结构体:myip_packet_type和myarp_packet_type。表示要接收arp包和ip包。其接收函数(func成员)分别为myip_rcv和myarp_rcv。并把它们注册到全局变量ptype_base中。
我们再来看netif_receive_skb函数,我们抛开其中的一些细节,它做的主要两件事情,第一件是遍历ptype_all,只要设备(dev) 匹配,就调用其func,第二件事情是再查看ptype_base,找到哈希项,然后遍历链表,调用所有设备匹配项的func。
这样一来,我们的myip_rcv和myarp_rcv就会被调用到,并且会传入数据包(skb)。但同时,PF_INET域的ip_rcv和arp_rcv也会被调用到,这是个问题,在接下来的开发过程中需要注意。
/**************************************************************************/
以下代码在linux kernel version 1.0中!
struct sk_buff {
unsigned long magic_debug_cookie;
struct sk_buff *volatile next;
struct sk_buff *volatile prev;
struct sk_buff *volatile link3;
struct sk_buff *volatile* list;
struct sock *sk;
volatile unsigned long when; /* used to compute rtt's */
struct device *dev;
void *mem_addr;
union {
struct tcphdr *th;
struct ethhdr *eth;
struct iphdr *iph;
struct udphdr *uh;
struct arphdr *arp;
unsigned char *raw;
unsigned long seq;
#ifdef CONFIG_IPX
ipx_packet *ipx;
#endif
} h;
struct iphdr *ip_hdr; /* For IPPROTO_RAW */
unsigned long mem_len;
unsigned long len;
unsigned long fraglen;
struct sk_buff *fraglist; /* Fragment list */
unsigned long truesize;
unsigned long saddr;
unsigned long daddr;
int magic;
volatile char acked,
used,
free,
arp;
unsigned char tries,lock; /* Lock is now unused */
unsigned short users; /* User count - see datagram.c (and soon seqpacket.c/stream.c) */
unsigned long padding[0];
unsigned char data[0];
};
/*
* Another mistake.
* This points to the next device in the "dev" chain. It will
* be moved to the "invisible" part of the structure as soon as
* it has been cleaned up. -FvK
*/
struct device *next;
/* The device initialization function. Called only once. */
int (*init)(struct device *dev);
/* Some hardware also needs these fields, but they are not part of the
usual set specified in Space.c. */
unsigned char if_port; /* Selectable AUI, TP,..*/
unsigned char dma; /* DMA channel */
struct enet_statistics* (*get_stats)(struct device *dev);
/*
* This marks the end of the "visible" part of the structure. All
* fields hereafter are internal to the system, and may change at
* will (read: may be cleaned up at will).
*/
/* These may be needed for future network-power-down code. */
unsigned long trans_start; /* Time (in jiffies) of last Tx */
unsigned long last_rx; /* Time of last Rx */
unsigned short flags; /* interface flags (a la BSD) */
unsigned short family; /* address family ID (AF_INET) */
unsigned short metric; /* routing metric (not used) */
unsigned short mtu; /* interface MTU value */
unsigned short type; /* interface hardware type */
unsigned short hard_header_len; /* hardware hdr length */
void *priv; /* pointer to private data */
/* Interface address info. */
unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */
unsigned char addr_len; /* harfware address length */
unsigned long pa_addr; /* protocol address */
unsigned long pa_brdaddr; /* protocol broadcast addr */
unsigned long pa_dstaddr; /* protocol P-P other side addr */
unsigned long pa_mask; /* protocol netmask */
unsigned short pa_alen; /* protocol address length */
/* Pointer to the interface buffers. */
struct sk_buff *volatile buffs[DEV_NUMBUFFS];
/* Pointers to interface service routines. */
int (*open)(struct device *dev);
int (*stop)(struct device *dev);
int (*hard_start_xmit) (struct sk_buff *skb,
struct device *dev);
int (*hard_header) (unsigned char *buff,
struct device *dev,
unsigned short type,
unsigned long daddr,
unsigned long saddr,
unsigned len);
void (*add_arp) (unsigned long addr,
struct sk_buff *skb,
struct device *dev);
void (*queue_xmit)(struct sk_buff *skb,
struct device *dev, int pri);
int (*rebuild_header)(void *eth, struct device *dev);
unsigned short (*type_trans) (struct sk_buff *skb,
struct device *dev);
#define HAVE_MULTICAST
void (*set_multicast_list)(struct device *dev,
int num_addrs, void *addrs);
#define HAVE_SET_MAC_ADDR
int (*set_mac_address)(struct device *dev, void *addr);
};
struct device有sk_buff成员变量, 在网卡驱动开放的接口有struct device形参,例如8390.c,
extern int ethdev_init(struct device *dev);
extern void NS8390_init(struct device *dev, int startp);
extern int ei_open(struct device *dev);
extern void ei_interrupt(int reg_ptr);
同时要理解网络数据包从网卡到达协议栈的过程,要看dev.c这个文件。
转自:http://hi.baidu.com/linux%5Fkernel/blog/item/a2de38124527a151f819b882.html
数据的传递
2006年08月11日 星期五 下午 02:37
由于网卡接收数据的工作流程,及Linux内核的数据接收的处理都有多种方式,所以,网络上的数据包从到达网卡直至被TCP/IP协议栈处理的流程也会有多种方式。在这里,我们并不关心这些不同流程的细节及其差异,我们只给出一个一般性的描述,而把重点放在my_inet模块的数据接收的实现上。
网卡接收到数据,先申请sk_buff,把数据DMA到sk_buff,然后调用netif_rx,netif_rx只是简单地把数据放到一个由结构体 struct softnet_data描述的输入队列便返回。后续的在softirq中断处理中,会调用函数net_rx_action,接着会调用 process_backlog,最后到netif_receive_skb。
netif_receive_skb做了什么?不急,在这之前,先了解一些东西,以扫除理解上的障碍。
结构体struct packet_type用于在协议栈与网络设备之间构筑一个数据传递的桥梁:
struct packet_type{
__be16 type;
struct net_device *dev;
int (*func) (struct sk_buff *, struct net_device *,
struct packet_type *, struct net_device *);
void *af_packet_priv;
struct list_head list;
};
type是一个网络字节序的协议类型,在PF_INET域常用的是两种:ETH_P_IP和ETH_P_ARP。dev是期望数据包来自该设备,一般令 dev等于NULL,表示并不关心数据包来自哪个网络接口。func是数据包的处理函数。
有两个全局的变量:
static struct list_head ptype_base[16];
static struct list_head ptype_all;
对于type值为ETH_P_ALL的pakcet_type,如果要注册到内核中,会被放在ptype_all中构成一个链表,其它的,则以type为哈希键,在ptype_base数组中找到一项,然后加入链表。
在my_inet模块中,我们定义了两个packet_type结构体:myip_packet_type和myarp_packet_type。表示要接收arp包和ip包。其接收函数(func成员)分别为myip_rcv和myarp_rcv。并把它们注册到全局变量ptype_base中。
我们再来看netif_receive_skb函数,我们抛开其中的一些细节,它做的主要两件事情,第一件是遍历ptype_all,只要设备(dev) 匹配,就调用其func,第二件事情是再查看ptype_base,找到哈希项,然后遍历链表,调用所有设备匹配项的func。
这样一来,我们的myip_rcv和myarp_rcv就会被调用到,并且会传入数据包(skb)。但同时,PF_INET域的ip_rcv和arp_rcv也会被调用到,这是个问题,在接下来的开发过程中需要注意。
/**************************************************************************/
以下代码在linux kernel version 1.0中!
struct sk_buff {
unsigned long magic_debug_cookie;
struct sk_buff *volatile next;
struct sk_buff *volatile prev;
struct sk_buff *volatile link3;
struct sk_buff *volatile* list;
struct sock *sk;
volatile unsigned long when; /* used to compute rtt's */
struct device *dev;
void *mem_addr;
union {
struct tcphdr *th;
struct ethhdr *eth;
struct iphdr *iph;
struct udphdr *uh;
struct arphdr *arp;
unsigned char *raw;
unsigned long seq;
#ifdef CONFIG_IPX
ipx_packet *ipx;
#endif
} h;
struct iphdr *ip_hdr; /* For IPPROTO_RAW */
unsigned long mem_len;
unsigned long len;
unsigned long fraglen;
struct sk_buff *fraglist; /* Fragment list */
unsigned long truesize;
unsigned long saddr;
unsigned long daddr;
int magic;
volatile char acked,
used,
free,
arp;
unsigned char tries,lock; /* Lock is now unused */
unsigned short users; /* User count - see datagram.c (and soon seqpacket.c/stream.c) */
unsigned long padding[0];
unsigned char data[0];
};
/*
* Another mistake.
* This points to the next device in the "dev" chain. It will
* be moved to the "invisible" part of the structure as soon as
* it has been cleaned up. -FvK
*/
struct device *next;
/* The device initialization function. Called only once. */
int (*init)(struct device *dev);
/* Some hardware also needs these fields, but they are not part of the
usual set specified in Space.c. */
unsigned char if_port; /* Selectable AUI, TP,..*/
unsigned char dma; /* DMA channel */
struct enet_statistics* (*get_stats)(struct device *dev);
/*
* This marks the end of the "visible" part of the structure. All
* fields hereafter are internal to the system, and may change at
* will (read: may be cleaned up at will).
*/
/* These may be needed for future network-power-down code. */
unsigned long trans_start; /* Time (in jiffies) of last Tx */
unsigned long last_rx; /* Time of last Rx */
unsigned short flags; /* interface flags (a la BSD) */
unsigned short family; /* address family ID (AF_INET) */
unsigned short metric; /* routing metric (not used) */
unsigned short mtu; /* interface MTU value */
unsigned short type; /* interface hardware type */
unsigned short hard_header_len; /* hardware hdr length */
void *priv; /* pointer to private data */
/* Interface address info. */
unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */
unsigned char addr_len; /* harfware address length */
unsigned long pa_addr; /* protocol address */
unsigned long pa_brdaddr; /* protocol broadcast addr */
unsigned long pa_dstaddr; /* protocol P-P other side addr */
unsigned long pa_mask; /* protocol netmask */
unsigned short pa_alen; /* protocol address length */
/* Pointer to the interface buffers. */
struct sk_buff *volatile buffs[DEV_NUMBUFFS];
/* Pointers to interface service routines. */
int (*open)(struct device *dev);
int (*stop)(struct device *dev);
int (*hard_start_xmit) (struct sk_buff *skb,
struct device *dev);
int (*hard_header) (unsigned char *buff,
struct device *dev,
unsigned short type,
unsigned long daddr,
unsigned long saddr,
unsigned len);
void (*add_arp) (unsigned long addr,
struct sk_buff *skb,
struct device *dev);
void (*queue_xmit)(struct sk_buff *skb,
struct device *dev, int pri);
int (*rebuild_header)(void *eth, struct device *dev);
unsigned short (*type_trans) (struct sk_buff *skb,
struct device *dev);
#define HAVE_MULTICAST
void (*set_multicast_list)(struct device *dev,
int num_addrs, void *addrs);
#define HAVE_SET_MAC_ADDR
int (*set_mac_address)(struct device *dev, void *addr);
};
struct device有sk_buff成员变量, 在网卡驱动开放的接口有struct device形参,例如8390.c,
extern int ethdev_init(struct device *dev);
extern void NS8390_init(struct device *dev, int startp);
extern int ei_open(struct device *dev);
extern void ei_interrupt(int reg_ptr);
同时要理解网络数据包从网卡到达协议栈的过程,要看dev.c这个文件。