目录
1. 开头
linux 虚拟文件系统设计模式_byrcoder的博客-CSDN博客linux 虚拟文件系统设计模式1. 为什么有vfs2. 高级语言如何设计vfs2.1 先定义一套标准接口2.2 创建接口的方法3. linux内核是如何设计vfs3.1 IFILE如何设计3.2 IFILE 如何返回应用层3.3 可扩展的vfs3.3.1 区分文件系统1. 为什么有vfs多样的外部设备,如磁盘,网络,假如没有vfs,我们对每种设备都需要设置一套打开,读写,关闭操作,对于应用层的开发者是一个巨大的负担。vfs的职责就是简化了应用层,把复杂性在内核里包装起来。这个和我们设计模式的接口抽象思https://blog.csdn.net/weixin_41374218/article/details/121723421?spm=1001.2014.3001.5501 上一次写了一篇linux内核关于虚拟文件的设计,本文来介绍些linux的套接字整体设计。主要介绍主流的协议族,以及协议族的设计和不同的协议族和vfs的关系。
2. linux的常用协议族
协议族 | 地址族 | 地址 |
---|---|---|
PF_UNIX | AF_UNIX | (host, port) |
PF_INET | AF_INET | (host, port, |
PF_NETLINK | AF_NETLINK | (pid, groups) |
3. 协议族对外的接口
extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg,
unsigned int flags, bool forbid_cmsg_compat);
extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg,
unsigned int flags, bool forbid_cmsg_compat);
extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags,
struct __kernel_timespec __user *timeout,
struct old_timespec32 __user *timeout32);
extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags,
bool forbid_cmsg_compat);
/* helpers which do the actual work for syscalls */
extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
unsigned int flags, struct sockaddr __user *addr,
int __user *addr_len);
extern int __sys_sendto(int fd, void __user *buff, size_t len,
unsigned int flags, struct sockaddr __user *addr,
int addr_len);
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
int addrlen);
extern int __sys_listen(int fd, int backlog);
extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
int __user *usockaddr_len);
extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
int __user *usockaddr_len);
extern int __sys_socketpair(int family, int type, int protocol,
int __user *usockvec);
extern int __sys_shutdown(int fd, int how);
4. 协议族定义和管理
协议族的接口在net_proto_family定义,通过sock_register来注册。
// 抽象接口
struct net_proto_family {
int family;
int (*create)(struct net *net, struct socket *sock,
int protocol, int kern);
struct module *owner;
};
// PF_INET的协议族
static const struct net_proto_family inet_family_ops = {
.family = PF_INET,
.create = inet_create,
.owner = THIS_MODULE,
};
// 注册协议族
int sock_register(const struct net_proto_family *ops)
{
spin_lock(&net_family_lock);
if (rcu_dereference_protected(net_families[ops->family],
lockdep_is_held(&net_family_lock)))
err = -EEXIST;
else {
rcu_assign_pointer(net_families[ops->family], ops);
err = 0;
}
spin_unlock(&net_family_lock);
}
5. 套接字接口和协议族/vfs虚拟文件的关系
套接字接口通过family找到协议族,协议族内部实现初始化套接字的操作(net_proto_family的create方法)。创建套接字成功后,会通过sock_map_fd,用适配器的方法将sock转为vfs的接口。
// 系统调用接口
int __sys_socket(int family, int type, int protocol)
{
struct socket *sock;
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
return retval;
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}
// 创建套接字
int sock_create(int family, int type, int protocol, struct socket **res)
{
return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
// 和协议族的关系,省去若干无关代码
int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
const struct net_proto_family *pf;
sock = sock_alloc();
pf = rcu_dereference(net_families[family]);
err = pf->create(net, sock, protocol, kern);
*res = sock;
}
套接字和vfs的适配器方法,把sock封装成file。这样操作sock,就和操作文件一样
static int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
int fd = get_unused_fd_flags(flags);
if (unlikely(fd < 0)) {
sock_release(sock);
return fd;
}
newfile = sock_alloc_file(sock, flags, NULL);
if (!IS_ERR(newfile)) {
fd_install(fd, newfile);
return fd;
}
put_unused_fd(fd);
return PTR_ERR(newfile);
}
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
struct file *file;
if (!dname)
dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
O_RDWR | (flags & O_NONBLOCK),
&socket_file_ops);
if (IS_ERR(file)) {
sock_release(sock);
return file;
}
sock->file = file;
file->private_data = sock;
return file;
}
static const struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.read_iter = sock_read_iter,
.write_iter = sock_write_iter,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_sock_ioctl,
#endif
.mmap = sock_mmap,
.release = sock_close,
.fasync = sock_fasync,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
.splice_read = sock_splice_read,
};
// socket读写操作
static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct socket *sock = file->private_data;
struct msghdr msg = {.msg_iter = *to,
.msg_iocb = iocb};
ssize_t res;
if (file->f_flags & O_NONBLOCK)
msg.msg_flags = MSG_DONTWAIT;
if (iocb->ki_pos != 0)
return -ESPIPE;
if (!iov_iter_count(to)) /* Match SYS5 behaviour */
return 0;
res = sock_recvmsg(sock, &msg, msg.msg_flags);
// 这里最终调用sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags)
*to = msg.msg_iter;
return res;
}
6. socket结构体
底层有socket有几个核心的结构:file(vfs使用),ops(socket 提供给vfs调用的上层接口),sock(底层的协议族实现)。socket协议族内容非常多,后期在针对tcp详细说明具体的实现。
struct socket {
socket_state state;
short type;
unsigned long flags;
struct file *file; // 这里vfs的封装,sock_alloc_file
struct sock *sk;
const struct proto_ops *ops; // 这里是socket协议族的上层应用接口
struct socket_wq wq;
};
struct sock_common {
unsigned short skc_family;
struct proto *skc_prot; // 底层协议族操作
};
struct sock {
struct sock_common __sk_common;
};