[源码分析]-socket的创建
这节主要介绍下socket创建的大致流程,我们只关注总体流程,流程总有很多细节,我们暂不去讨论,随着阅读源码量的增加,这些细节问题会迎刃而解,有些机制会在后续其他章节介绍,以免影响此节主要内容分析。
下面是创建socket的主体流程。
static int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern)
{
struct socket *sock;
const struct net_proto_family *pf;
sock = sock_alloc();
sock->type = type;
if (net_families[family] == NULL)
request_module("net-pf-%d", family);
pf = rcu_dereference(net_families[family]);
pf->create(net, sock, protocol);
*res = sock;
return 0;
}
从上面的内容可以看出,create_socket 主体就两个流程: sock_alloc 以及pf->create钩子函数。下面具体分析
1. sock_alloc
static struct socket *sock_alloc(void)
{
struct inode *inode;
struct socket *sock;
inode = new_inode(sock_mnt->mnt_sb);
sock = SOCKET_I(inode);
...
return sock;
}
1.1 new_inode
从文件系统中获取socket相关的inode节点。
new_inode->alloc_inode->sock_alloc_inode[sb->s_op->alloc_inode]
struct socket_alloc {
struct socket socket; // socket 对象
struct inode vfs_inode; // inode 对象, socket_alloc 两者进行绑定
};
static struct inode *sock_alloc_inode(struct super_block *sb)
{
struct socket_alloc *ei;
ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
init_waitqueue_head(&ei->socket.wait);
ei->socket.fasync_list = NULL;
ei->socket.state = SS_UNCONNECTED; // 未连接状态
ei->socket.flags = 0;
ei->socket.ops = NULL;
ei->socket.sk = NULL;
ei->socket.file = NULL;
return &ei->vfs_inode;
}
其中sock_inode_cachep变量就是穿件的cache对象,在init函数中会通过kmem_cache_create来创建,可以通过下面的命令查看分配的情况
root@ubuntu:/# cat /proc/slabinfo | grep sock_inode_cache
sock_inode_cache 175 175 640 25 4 : tunables 0 0 0 : slabdata 7 7 0
1.2 SOCK_I
static inline struct socket *SOCKET_I(struct inode *inode)
{
return &container_of(inode, struct socket_alloc, vfs_inode)->socket;
}
#define container_of(ptr, type, member) ({ \
// tpyeof 参数不能为存储类说明符extern static,但是能为类型限定符const volatile
const typeof( ((type *)0->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) ); \
})
#define offsetof(type, member) ((size_t)&((type *)0 -> member)
对于这个技巧用的还是比较多的,常见如链表,一般会将链表的结构体放在对象里面,知道了链表节点地址,将其转换为对应对象的地址。
2. pf->create
2.1 协议族函数注册
以AF_INET(af_inet.c)协议为例
fs_initcall(inet_init); //将inet_init安装到initcall中去
关于initcall以后再学习介绍。// todo
通过这样的注册,linux在初始化的时候会执行inet_init()函数,下面分析下这个函数:
static int __init inet_init(void)
{
... //省略,只关注inet_family_ops这块
(void)sock_register(&inet_family_ops);
...
}
int sock_register(const struct net_proto_family *ops)
{
...
net_families[ops->family] = ops;
...
}
static struct net_proto_family inet_family_ops = {
.family = PF_INET, // 即AF_INET(IPV4 地址族、协议族) 2
.create = inet_create,
.owner = THIS_MODULE,
};
到这里,我么就清楚上面 pf->create
指的是什么了
2.2 协议结构体初始化
static int inet_create(struct net *net, struct socket *sock, int protocol)
{
...
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
err = 0;
/* Check the non-wild match. */
if (protocol == answer->protocol) {
if (protocol != IPPROTO_IP)
break;
} else {
/* Check for the two wild cases. */
if (IPPROTO_IP == protocol) {
protocol = answer->protocol;
break;
}
if (IPPROTO_IP == answer->protocol)
break;
}
err = -EPROTONOSUPPORT;
}
/* Upon startup we insert all the elements in inetsw_array[] into
* the linked list inetsw.
*/
static struct inet_protosw inetsw_array[] =
{
{
.type = SOCK_STREAM,
.protocol = IPPROTO_TCP,
.prot = &tcp_prot,
.ops = &inet_stream_ops,
.capability = -1,
.no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
},
{
.type = SOCK_DGRAM,
.protocol = IPPROTO_UDP,
.prot = &udp_prot,
.ops = &inet_dgram_ops,
.capability = -1,
.no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
},
{
.type = SOCK_RAW,
.protocol = IPPROTO_IP, /* wild card */
.prot = &raw_prot,
.ops = &inet_sockraw_ops,
.capability = CAP_NET_RAW,
.no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
}
};
上面这段代码根据类型和协议在inetsw_array数组中找到对应的协议处理接口。可以反映为什么在创建socket时,第三个参数protocal可以为0(IPPROTO_IP)。关于注册逻辑可以查看inet_register_protosw
if (unlikely(err)) { // 未能找到对应协议,则进行加载
if (try_loading_module < 2) {
rcu_read_unlock();
/*
* Be more specific, e.g. net-pf-2-proto-132-type-1
* (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
*/
if (++try_loading_module == 1)
request_module("net-pf-%d-proto-%d-type-%d",
PF_INET, protocol, sock->type);
/*
* Fall back to generic, e.g. net-pf-2-proto-132
* (net-pf-PF_INET-proto-IPPROTO_SCTP)
*/
else
request_module("net-pf-%d-proto-%d",
PF_INET, protocol);
goto lookup_protocol;
} else
goto out_rcu_unlock;
}
...
sock->ops = answer->ops;
answer_prot = answer->prot; // 传输层使用的协议结构(socket layer -> transport layer interface)
answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
WARN_ON(answer_prot->slab == NULL);
err = -ENOBUFS;
sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); // 返回sock指针,实际上分配的是tcp_sock(第一个元素是inet_connection_sock, 而inet_connection_sock 第一个元素是inet_sock, 同时inet_sock第一个元素是sock)
if (sk == NULL)
goto out;
err = 0;
sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = 1;
inet = inet_sk(sk); // 指针转换: sock => inet_sock
...
// sock结构数据初始化
sock_init_data(sock, sk);
...
// 协议初始化 sk_prot = sk->sk_prot_creator = prot (inetsw_array)
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
if (err)
sk_common_release(sk);
}
...
}
这样,socket结构的初始化工作就完成了,其中涉及的结构体有struct sock
struct socket
struct sk_buff
struct net_sock
struct inet_connect_sock
它们之间的关系看下源码也就清楚了,这里也就不展示了。
初始化的总体思路:首先通过sock_alloc进行内存资源分配创建sock_alloc(包含socket和vfs_inode),之后调用协议族net_families[family]注册的函数create进行协议初始化的工作,依次初始socket、sock、tcp_sock等结构体信息。
参考:
- 《追踪Linux TCP/IP 代码运行》
- linux-2.6.32