struct sk_buff与struct socket及struct sock 结构体分析

sk_buff是Linux网络协议栈最重要的数据结构之一,该数据结构贯穿于整个数据包处理的流程。由于协议采用分层结构,上层向下层传递数据时需要增加包头,下层向上层数据时又需要去掉包头。sk_buff中保存了L2,L3,L4层的头指针,这样在层传递时只需要对数据缓冲区改变头部信息,并调整sk_buff中的指针,而不需要拷贝数据,这样大大减少了内存拷贝的需要。

/** 
 *	struct sk_buff - socket buffer
 *	@next: Next buffer in list
 *	@prev: Previous buffer in list
 *	@tstamp: Time we arrived
 *	@sk: Socket we are owned by
 *	@dev: Device we arrived on/are leaving by
 *	@cb: Control buffer. Free for use by every layer. Put private vars here
 *	@_skb_refdst: destination entry (with norefcount bit)
 *	@sp: the security path, used for xfrm
 *	@len: Length of actual data
 *	@data_len: Data length
 *	@mac_len: Length of link layer header
 *	@hdr_len: writable header length of cloned skb
 *	@csum: Checksum (must include start/offset pair)
 *	@csum_start: Offset from skb->head where checksumming should start
 *	@csum_offset: Offset from csum_start where checksum should be stored
 *	@priority: Packet queueing priority
 *	@local_df: allow local fragmentation
 *	@cloned: Head may be cloned (check refcnt to be sure)
 *	@ip_summed: Driver fed us an IP checksum
 *	@nohdr: Payload reference only, must not modify header
 *	@nfctinfo: Relationship of this skb to the connection
 *	@pkt_type: Packet class
 *	@fclone: skbuff clone status
 *	@ipvs_property: skbuff is owned by ipvs
 *	@peeked: this packet has been seen already, so stats have been
 *		done for it, don't do them again
 *	@nf_trace: netfilter packet trace flag
 *	@protocol: Packet protocol from driver
 *	@destructor: Destruct function
 *	@nfct: Associated connection, if any
 *	@nfct_reasm: netfilter conntrack re-assembly pointer
 *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
 *	@skb_iif: ifindex of device we arrived on
 *	@tc_index: Traffic control index
 *	@tc_verd: traffic control verdict
 *	@rxhash: the packet hash computed on receive
 *	@queue_mapping: Queue mapping for multiqueue devices
 *	@ndisc_nodetype: router type (from link layer)
 *	@ooo_okay: allow the mapping of a socket to a queue to be changed
 *	@l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
 *		ports.
 *	@wifi_acked_valid: wifi_acked was set
 *	@wifi_acked: whether frame was acked on wifi or not
 *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
 *	@dma_cookie: a cookie to one of several possible DMA operations
 *		done by skb DMA functions
 *	@secmark: security marking
 *	@mark: Generic packet mark
 *	@dropcount: total number of sk_receive_queue overflows
 *	@vlan_tci: vlan tag control information
 *	@inner_transport_header: Inner transport layer header (encapsulation)
 *	@inner_network_header: Network layer header (encapsulation)
 *	@transport_header: Transport layer header
 *	@network_header: Network layer header
 *	@mac_header: Link layer header
 *	@tail: Tail pointer
 *	@end: End pointer
 *	@head: Head of buffer
 *	@data: Data head pointer
 *	@truesize: Buffer size
 *	@users: User count - see {datagram,tcp}.c
 */

struct sk_buff {
	/* These two members must be first. */
	struct sk_buff		*next;
	struct sk_buff		*prev;

	ktime_t			tstamp;

	struct sock		*sk;
	struct net_device	*dev;

	/*
	 * This is the control buffer. It is free to use for every
	 * layer. Please put your private variables there. If you
	 * want to keep them across layers you have to do a skb_clone()
	 * first. This is owned by whoever has the skb queued ATM.
	 */
	char			cb[48] __aligned(8);

	unsigned long		_skb_refdst;
#ifdef CONFIG_XFRM
	struct	sec_path	*sp;
#endif
	unsigned int		len,
				data_len;
	__u16			mac_len,
				hdr_len;
	union {
		__wsum		csum;
		struct {
			__u16	csum_start;
			__u16	csum_offset;
		};
	};
	__u32			priority;
	kmemcheck_bitfield_begin(flags1);
	__u8			local_df:1,
				cloned:1,
				ip_summed:2,
				nohdr:1,
				nfctinfo:3;
	__u8			pkt_type:3,
				fclone:2,
				ipvs_property:1,
				peeked:1,
				nf_trace:1;
	kmemcheck_bitfield_end(flags1);
	__be16			protocol;

	void			(*destructor)(struct sk_buff *skb);
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
	struct nf_conntrack	*nfct;
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
	struct sk_buff		*nfct_reasm;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
	struct nf_bridge_info	*nf_bridge;
#endif

	int			skb_iif;

	__u32			rxhash;

	__u16			vlan_tci;

#ifdef CONFIG_NET_SCHED
	__u16			tc_index;	/* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
	__u16			tc_verd;	/* traffic control verdict */
#endif
#endif

	__u16			queue_mapping;
	kmemcheck_bitfield_begin(flags2);
#ifdef CONFIG_IPV6_NDISC_NODETYPE
	__u8			ndisc_nodetype:2;
#endif
	__u8			pfmemalloc:1;
	__u8			ooo_okay:1;
	__u8			l4_rxhash:1;
	__u8			wifi_acked_valid:1;
	__u8			wifi_acked:1;
	__u8			no_fcs:1;
	__u8			head_frag:1;
	/* Encapsulation protocol and NIC drivers should use
	 * this flag to indicate to each other if the skb contains
	 * encapsulated packet or not and maybe use the inner packet
	 * headers if needed
	 */
	__u8			encapsulation:1;
	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
	kmemcheck_bitfield_end(flags2);

#ifdef CONFIG_NET_DMA
	dma_cookie_t		dma_cookie;
#endif
#ifdef CONFIG_NETWORK_SECMARK
	__u32			secmark;
#endif
	union {
		__u32		mark;
		__u32		dropcount;
		__u32		reserved_tailroom;
	};

	sk_buff_data_t		inner_transport_header;
	sk_buff_data_t		inner_network_header;
	sk_buff_data_t		transport_header;
	sk_buff_data_t		network_header;
	sk_buff_data_t		mac_header;
	/* These elements must be at the end, see alloc_skb() for details.  */
	sk_buff_data_t		tail;
	sk_buff_data_t		end;
	unsigned char		*head,
				*data;
	unsigned int		truesize;
	atomic_t		users;
};

struct sk_buff {//介绍 
    struct sk_buff *next *prev;//双向链表指针
    ktime_t tstamp ;//时间撮
    struct sock *sk;   //对应于传输层,标示属于哪个socket ?
    struct net_device *dev;    //数据来自或者发送自哪个设备
    char cb[48];//控制信息buffer,在每个层都可以用,并且目前为止足够大
    int len;      实际总长度
    int data_len; 数据的长度 //也许是paged的data 
    __u16 mac_len; 数据链路层头的长度
    __u16 hdr_len; writable header length of cloned skb 
    
     sk_buff_data_t   transport_header;   传输层头指针
    sk_buff_data_t   network_header;    网络层头指针
    sk_buff_data_t   mac_header;        数据链路层头

    unsigned char *head; //buffer 头
    unsigned char *data; 数据头
    sk_buff_data_t tail; 数据结尾
    sk_buff_data_t end;  buffer 结尾
    unsigned int truesize; //bufffer 大小

    cloned 是不是cloned
    mark 数据包mark
    destructor 销毁函数指针 
    pkt_type : 根据二层头确定的包信息
    __be16 protocol : 三层协议 IP ARP 等,用于和全局数组qtype_base中的数据对比,该数组可以通过dev_add_pack()注册.
}

由于该结构将用于各个层,内核提供了一系列的sk_buff的操作函数
skb_put()  减小tailroom,buffer下后扩展
skb_push() 减小headroom,buffer向上扩张 
skb_trim() cut buffer到一个长度
skb_pull   从数据头cut一定长度的数据 
skb_reserve 增大headroom,减少tailroom,只能用于buffer为空时
skb_headroom headroom的大小
skb_tailroom tailroom的太小

alloc_skb() 分配一个sk_buff结构及buffer区域
kfree_slb() reference 减一,并且free skb和buffer如果不再有引用

dev_alloc_skb() 方便接收数据的sk_buff的分配函数
dev_free_skb()  

skb_shinfo() 获得和sk_buff 一块分配的struct skb_shared_info 

skb_clone() //复制sk_buff ,但是buffer不变 
pskb_copy()  //拷贝sk_buff和私有的头部,常用于需要修改sk_buff的头部时
skb_copy() //完全拷贝

skb_queue_head_init()
skb_queue_head()
skb_queue_tail()
skb_dequeue_head()
skb_dequeue_tail()
skb_queue_purge() //list 清空

skb_queue_walk() //遍历list用

在Linux2.6中,struct sk_buf承担了socket的输入输出的传输缓存的任务。

首先,还是先看struct socket的定义

/**
 * struct socket - general BSD socket
 * @state: socket state (%SS_CONNECTED, etc)
 * @type: socket type (%SOCK_STREAM, etc)
 * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
 * @ops: protocol specific socket operations
 * @file: File back pointer for gc
 * @sk: internal networking protocol agnostic socket representation
 * @wq: wait queue for several uses
 */

struct socket {
    socket_state        state;

    kmemcheck_bitfield_begin(type);
    short            type;
    kmemcheck_bitfield_end(type);

    unsigned long        flags;

    struct socket_wq    *wq;

    struct file        *file;
    struct sock        *sk;
    const struct proto_ops    *ops;
};

代码中的注释对于每一个变量说的都很清楚——看到这里,我先感叹一下,linux2.6的结构体的注释比老版本要清楚的多。到目前为止,我所看到的关键的结构体,都有清晰的注释。我们可以看出struct socket中的sock变量,是socket变量的工作核心。

那么现在跳转到struct sock的定义处。由于struct sock的定义过长,所以只展示一部分。

struct sock {
    /*
     * Now struct inet_timewait_sock also uses sock_common, so please just
     * don't add nothing before this first member (__sk_common) --acme
     */

    struct sock_common    __sk_common;
    /* skip some codes */
    int sk_rcvbuf;
    /* skip some codes */
    int sk_sndbuf;
    struct sk_buff_head    sk_receive_queue;
    struct sk_buff_head    sk_write_queue;

}

其中,sk_rcvbuf和sk_sendbuf分别是接收和发送缓存的字节数。
而struct sk_buff_head的定义如下:

struct sk_buff_head {
    /* These two members must be first. */
    struct sk_buff    *next;
    struct sk_buff    *prev;

    __u32        qlen;
    spinlock_t    lock;
};

可以看出socket的接收和发送缓存是使用一个双链表将sk_buff组织起来的。

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值