1. 开头
上一篇聊了一下协议族的大致过程,本篇主要仔细聊聊里面的具体细节。
2. 套接字和虚拟文件的接口
套接字也是虚拟文件系统的一种,因此也需要file和indoe结构体,socket的inode继承inode,同时也有自己的结构体私有成员struct socket。这个理念在内核很常见,各种inode转换成对应系统的真实类(比如SOCKET_I函数,通过inode获取socket成员)。 具体的代码实现:
static const struct super_operations sockfs_ops = {
.alloc_inode = sock_alloc_inode,
.free_inode = sock_free_inode,
.statfs = simple_statfs,
};
// 包含socket和inode
struct socket_alloc {
struct socket socket;
struct inode vfs_inode;
};
// 创建socket的inode
static struct inode *sock_alloc_inode(struct super_block *sb)
{
struct socket_alloc *ei;
ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
}
// inode获取socket
static inline struct socket *SOCKET_I(struct inode *inode)
{
void *__mptr = (void *)(inode);
return ((struct socket_alloc *)(__mptr -
((size_t) &
((struct socket_alloc *)0)->vfs_inode)))->socket;
}
// socket获取inode
static inline struct inode *SOCK_INODE(struct socket *socket)
{
return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
}
// file vfs的适配器
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
O_RDWR | (flags & O_NONBLOCK),
&socket_file_ops);
sock->file = file;
file->private_data = sock;
return file;
}
3. socket的创建过程
linux内核套接字协议族提到了创建套接字协议调用了inet_create,前面套接字的创建过程就省去了。
static const struct net_proto_family inet_family_ops = {
.family = PF_INET,
.create = inet_create,
.owner = THIS_MODULE,
};
/* This is used to register socket interfaces for IP protocols. */
struct inet_protosw {
struct list_head list;
/* These two fields form the lookup key. */
unsigned short type; /* This is the 2nd argument to socket(2). */
unsigned short protocol; /* This is the L4 protocol number. */
struct proto *prot; // 底层接口
const struct proto_ops *ops; // 应用层接口
unsigned char flags; /* See INET_PROTOSW_* below. */
};
static int inet_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct sock *sk;
struct inet_protosw *answer;
struct inet_sock *inet;
sock->ops = answer->ops; // 核心成员找到协议族
answer_prot = answer->prot;
sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); // inet_sock 结构题
sock_init_data(sock, sk); // socket内初始化底层协议的成员
}
4. socket如何被调用
应用层通过fd操作,所以第一步是通过获取fd的file,这里的file操作成员是socket_file_ops, 通过socket_file_ops调用socket的proto_ops ops成员(就是inet_create里面初始化的成员inet_protosw->ops)
static const struct file_operations socket_file_ops = {
.read_iter = sock_read_iter,
};
static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct socket *sock = file->private_data;
res = sock_recvmsg(sock, &msg, msg.msg_flags);
}
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
inet_recvmsg, sock, msg, msg_data_left(msg),
flags);
}
struct proto_ops {
int family;
struct module *owner;
int (*release) (struct socket *sock);
int (*bind) (struct socket *sock,
struct sockaddr *myaddr,
int sockaddr_len);
int (*connect) (struct socket *sock,
struct sockaddr *vaddr,
int sockaddr_len, int flags);
int (*sendmsg) (struct socket *sock, struct msghdr *m,
size_t total_len);
/* Notes for implementing recvmsg:
* ===============================
* msg->msg_namelen should get updated by the recvmsg handlers
* iff msg_name != NULL. It is by default 0 to prevent
* returning uninitialized memory to user space. The recvfrom
* handlers can assume that msg.msg_name is either NULL or has
* a minimum size of sizeof(struct sockaddr_storage).
*/
int (*recvmsg) (struct socket *sock, struct msghdr *m,
size_t total_len, int flags);
int (*mmap) (struct file *file, struct socket *sock,
struct vm_area_struct * vma);
}
5. 继续找个tcp协议例子查看
tcp的inet_protosw实现如下代码,tcp内部实现也非常复杂,后续在找个继续讨论
static struct inet_protosw inetsw_array[] =
{
{
.type = SOCK_STREAM,
.protocol = IPPROTO_TCP,
.prot = &tcp_prot,
.ops = &inet_stream_ops,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
},
};
const struct proto_ops inet_stream_ops = {
.connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
.poll = tcp_poll,
.ioctl = inet_ioctl,
.gettstamp = sock_gettstamp,
.listen = inet_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
};
// tcp协议族
struct proto tcp_prot = {
.pre_connect = tcp_v4_pre_connect,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v4_init_sock,
.destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v4_do_rcv,
};
int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags, int is_sendmsg)
{
struct sock *sk = sock->sk;
err = sk->sk_prot->connect(sk, uaddr, addr_len); // 这里调用的是inet_create里面的inet_protosw的prot底层接口
}