套接字缓冲区 sk_buff
想要了解网络是怎么进行部署和运行的,首先要做的就是对数据结构进行分析。首先我们对最重要的sk_buff这个数据结构的一些内容查看分析。
一、布局字段
1.表头数据结构
skbuff.h
struct sk_buff_head {
/* These two members must be first. */
// 双向链表指针 前驱指针和后继指针
struct sk_buff *next;
struct sk_buff *prev;
// 表中元素的数目
__u32 qlen;
// 用于防止对表的并发访问
spinlock_t lock;
};
2.sk_buff中的布局字段
本数据结构过于庞大,只挑出部分尽心分析
skbuff.h
union {
struct {
/* These two members must be first. */
// 双向链表指
struct sk_buff *next;
struct sk_buff *prev;
union {
// 指向网络设备的指针
struct net_device *dev;
/* Some protocols might use this space to store information,
* while device pointer would be NULL.
* UDP receive path is one user.
*/
unsigned long dev_scratch;
};
};
struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */
// 指向表头的指针
struct list_head list;
};
union {
// 指向拥有此缓冲区的套接字的sock数据结构。只有数据再本地产生或者接受的时候才有用,如果只是被转发,则为NULL
struct sock *sk;
int ip_defrag_offset;
};
// 缓冲区数据块的大小,包括报头
unsigned int len,
// 只表示数据大小
data_len;
// MAC报头的大小
__u16 mac_len,
/* private: */
__u32 headers_end[0];
/* public: */
/* These elements must be at the end, see alloc_skb() for details. */
// 指向数据尾部的指针
sk_buff_data_t tail;、
// 指向缓冲区尾部的指针
sk_buff_data_t end;
// 指向缓冲区头部的指针
unsigned char *head,
// 指向数据头部的指针
*data;
// 缓冲区总大小,包括sk_buff结构本身
unsigned int truesize;
// 引用计数,表示使用这个sk_buff缓冲区的实例的数目。避免在某人依然使用此sk_buff的时候,吧这个结构释放掉。缓冲区的每个用户都要递增递减这个字段
refcount_t users;
二、通用字段
union {
// 时间戳,表示封包何时被接收
ktime_t tstamp;
u64 skb_mstamp_ns; /* earliest departure time */
};
/*
* This is the control buffer. It is free to use for every
* layer. Please put your private variables there. If you
* want to keep them across layers you have to do a skb_clone()
* first. This is owned by whoever has the skb queued ATM.
*/
控制缓冲区,为每一层内部使用起维护作用,在每一层的代码都是通过宏进行访问。
char cb[48] __aligned(8);
/*
根据真的L2目的地址进行类型划分
接收帧的目的地址
#define PACKET_HOST 0
已接收的真的目的地址是该接口的广播地址
#define PACKET_BROADCAST 1
已接收的真的目的地址是该接口的多播地址
#define PACKET_MULTICAST 2
已接收的真的目的地址不属于与该接口相匹配的地址。如果转发机制使能,就会转发,否则将丢弃
封包正在被发送
#define PACKET_OTHERHOST 3
封包正传送至环回设备
#define PACKET_LOOPBACK 5
*/
__u8 pkt_type:3;
__u8 ignore_df:1;
__u8 nf_trace:1;
__u8 ip_summed:2;
__u8 ooo_okay:1;
union {
// 校验和
__wsum csum;
struct {
__u16 csum_start;
__u16 csum_offset;
};
};
// 正被转发或传输的封包的QoS等级
__u32 priority;
管理函数
skbuff.c
注:会发现文件中几乎所有的函数都有两个版本。do_something和__do_something。第一个是包裹函数,增加了额外的合理性检查或者在调用第二个函数前后加入上锁机制。__do_something一般不会被直接调用。// 分配缓冲区的主要函数
struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
int flags, int node)
{
struct kmem_cache *cache;
struct skb_shared_info *shinfo;
struct sk_buff *skb;
u8 *data;
bool pfmemalloc;
cache = (flags & SKB_ALLOC_FCLONE)
? skbuff_fclone_cache : skbuff_head_cache;
if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
gfp_mask |= __GFP_MEMALLOC;
/* Get the HEAD */
skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
if (!skb)
goto out;
prefetchw(skb);
/* We do our best to align skb_shared_info on a separate cache
* line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
* aligned memory blocks, unless SLUB/SLAB debug is enabled.
* Both skb->head and skb_shared_info are cache line aligned.
*/
size = SKB_DATA_ALIGN(size);
size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
if (!data)
goto nodata;
/* kmalloc(size) might give us more room than requested.
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
size = SKB_WITH_OVERHEAD(ksize(data));
prefetchw(data + size);
/*
* Only clear those fields we need to clear, not those that we will
* actually initialise below. Hence, don't put any more fields after
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
/* Account for allocated memory : skb + skb->head */
skb->truesize = SKB_TRUESIZE(size);
skb->pfmemalloc = pfmemalloc;
refcount_set(&skb->users, 1);
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
skb->end = skb->tail + size;
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
if (flags & SKB_ALLOC_FCLONE) {
struct sk_buff_fclones *fclones;
fclones = container_of(skb, struct sk_buff_fclones, skb1);
skb->fclone = SKB_FCLONE_ORIG;
refcount_set(&fclones->fclone_ref, 1);
fclones->skb2.fclone = SKB_FCLONE_CLONE;
}
out:
return skb;
nodata:
kmem_cache_free(cache, skb);
skb = NULL;
goto out;
}
释放内存
/**
* __kfree_skb - private function
* @skb: buffer
*
* Free an sk_buff. Release anything attached to the buffer.
* Clean the state. This is an internal helper function. Users should
* always call kfree_skb
*/
void __kfree_skb(struct sk_buff *skb)
{
skb_release_all(skb);
kfree_skbmem(skb);
}