sock文件系统

//sock文件系统定义如下:
static struct file_system_type sock_fs_type = {
	.name = "sockfs",
	.init_fs_context = sockfs_init_fs_context,
	.kill_sb = kill_anon_super,
};

//sock文件系统在内核初始过程中通过init_calls方式来执行
core_initcall(sock_init);

//sock文件系统入口——sock_init()接口,原型如下:
static int __init sock_init(void)
{
	int err;

	err = net_sysctl_init();
	//初始网络系统控制架构。

	if (err)
		goto out;
	
	skb_init();
	//利用slab缓存机制创建套接字缓冲区,这里总共创建了三种不同的缓冲区,分别是:skbuff_head_cache,skbuff_fclone_cache和skbuff_ext_cache

	init_inodecache();
	//创建inode高速缓存区:sock_inode_cache

	err = register_filesystem(&sock_fs_type);
	//注册sock文件系统
	if (err)
		goto out_fs;

	sock_mnt = kern_mount(&sock_fs_type);
	//挂载sock文件系统
	if (IS_ERR(sock_mnt)) {
		err = PTR_ERR(sock_mnt);
		goto out_mount;
	}

#ifdef CONFIG_NETFILTER
	err = netfilter_init();
	if (err)
		goto out;
#endif

	ptp_classifier_init();

out:
	return err;

out_mount:
	unregister_filesystem(&sock_fs_type);
	goto out;
}

//通过代码,可知sock_fs_type文件系统注册完成后,内核会对其进行挂载。内核对sock文件系统的挂载采用了虚拟文件系统内核挂载接口,即vfs_kern_mount。

//文件系统注册
int register_filesystem(struct file_system_type *fs)
{
	int res = 0;
	struct file_system_type **p;
	
	if (fs->parameters && !fs_validate_description(fs->name, fs->parameters))
		return -EINVAL;

	BUG_ON(strchr(fs->name, '.'));
	if (fs->next)
		return -EBUSY;
	write_lock(&file_systems_lock);
	p = find_filesystem(fs->name, strlen(fs->name));
	//查找该文件系统是否已注册,如果没有,则将其添加在文件系统链表的最后。这里主要使用了全局变量struct file_system_type *file_systems。

	if (*p)
		res = -EBUSY;
	else
		*p = fs;
	write_unlock(&file_systems_lock);
	return res;
}

//当sock文件系统注册成功后,对其进行挂载。
struct vfsmount *kern_mount(struct file_system_type *type)
{
	struct vfsmount *mnt;
	mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
	//虚拟文件系统挂载函数。
	//创建文件系统上下文,即struct fs_context *fc = fs_context_for_mount(type, flags)。
	//创建虚拟文件系统挂载点,即struct vfsmount *mnt= fc_mount(fc)。

	if (!IS_ERR(mnt)) {
		real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
	}
	return mnt;
}

struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
	struct fs_context *fc;
	struct vfsmount *mnt;
	int ret = 0;

	if (!type)
		return ERR_PTR(-EINVAL);
	
	fc = fs_context_for_mount(type, flags);
	//创建文件系统上下文
	if (IS_ERR(fc))
		return ERR_CAST(fc);
	
	if (name)
		ret = vfs_parse_fs_string(fc, "source", name, strlen(name));

	if (!ret)
		ret = parse_monolithic_mount_data(fc, data);
	if (!ret)
		mnt = fc_mount(fc);
		//通过文件系统上下文创建文件系统挂载节点
	else
		mnt = ERR_PTR(ret);

	put_fs_context(fc);
	return mnt;
}

//在代码中,首先申请用于描述文件系统的上下文,即struct fs_context结构体,并对其进行初始化。接着,利用该结构体来完成挂载的动作。

struct fs_context *fs_context_for_mount(struct file_system_type *fs_type, unsigned int sb_flags)
{
	return alloc_fs_context(fs_type, NULL, sb_flags, 0, FS_CONTEXT_FOR_MOUNT);
}

static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
				      struct dentry *reference,
				      unsigned int sb_flags,
				      unsigned int sb_flags_mask,
				      enum fs_context_purpose purpose)
{
	int (*init_fs_context)(struct fs_context *);
	struct fs_context *fc;
	int ret = -ENOMEM;

	fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
	//申请fs_context结构体地址空间

	if (!fc)
		return ERR_PTR(-ENOMEM);

	//对fc_context结构体进行赋值
	fc->purpose = purpose;
	fc->sb_flags = sb_flags;
	fc->sb_flags_mask = sb_flags_mask;
	fc->fs_type = get_filesystem_type(fs_type);
	fc->cred = get_current_cred();
	fc->net_ns = get_net(current->nsproxy->net_ns);
	//网络命名空间隔离

	fc->log.prefix = fs_type->name;

	mutex_init(&fc->uapi_mutex);

	switch (purpose) {
	case FS_CONTEXT_FOR_MOUNT:
		fc->user_ns = get_user_ns(fc->cred->user_ns);
		//用户命名空间隔离

		break;
	case FS_CONTEXT_FOR_SUBMOUNT:
		fc->user_ns = get_user_ns(reference->d_sb->s_user_ns)
	case FS_CONTEXT_FOR_RECONFIGURE:
		atomic_inc(&reference->d_sb->s_active);
		fc->user_ns = get_user_ns(reference->d_sb->s_user_ns);
		fc->root = dget(reference);
		break;
	}

	init_fs_context = fc->fs_type->init_fs_context;
	//指针函数的赋值

	if (!init_fs_context)
		init_fs_context = legacy_init_fs_context;
	
	ret = init_fs_context(fc);
	//执行指针函数,为:sockfs_init_fs_context()接口

	if (ret < 0)
		goto err_fc;
	fc->need_free = true;
	return fc;
}

static int sockfs_init_fs_context(struct fs_context *fc)
{
	struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFA_MAGIC);
	//创建pseudo_fs_context结构体对象

	if (!ctx)
		return -ENOMEM;
	ctx->ops = &sockfs_ops;
	//该操作结构体在套接字申请inode节点时被使用

	ctx->dops = &sockfs_dentry_operation;
	ctx->xattr = sockfs_xattr_handlers;
	return 0;
}

struct pseudo_fs_context *init_pseudo(struct fs_context *fc, unsigned long magic)
{
	struct pseudo_fs_context *ctx;
	
	ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL);
	//创建pseudo_fs_context结构体对象
	//将pseudo_fs_context结构体与fs_context结构体进行关联
	if (likely(ctx)) {
		ctx->magic =magic;
		fc->fs_private = ctx;
		fc->ops = &pseudo_fs_context_ops;
		//该操作结构体后期会被使用

		fc->sb_flags |= SB_NOUSER;
		fc->global = true;
	}
	return ctx;
}

struct vfsmount *fc_mount(struct fs_context *fc)
{
	int err = vfs_get_tree(fc);
	//获取超级块
	if (!err) {
		up_write(&fc->root->d_sb->s_umount);
		return vfs_create_mount(fc);
		//创建文件系统挂载节点
	}
	return ERR_PTR(err);
}

int vfs_get_tree(struct fs_context *fc)
{
	struct super_block *sb;
	int error;

	if (fc->root)
		return -EBUSY;

	error = fc->ops->get_tree(fc);
	//执行get_tree指针函数

	if (error < 0)
		return error;

	if (!fc->root) {
		pr_err("Filesystem %s get_tree() didn't set fc->root\n", fc->fs_type->name);
		BUG();
	}

	sb = fc->root->d_sb;
	WARN_ON(!sb->s_bdi);

	smp_wmb();
	sb->s_flags |= SB_BORN;

	error = secturity_sb_set_mnt_opts(sb, fc->security, 0, NULL);
	if (unlikely(error)) {
		fc_drop_locked(fc);
		return error;
	}

	WARN((sb->s_maxbytes < 0), "%s set sb->s maxbytes to negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes);
	return 0;
}

//在上述代码分析过程中,可知:
fc->ops = &pseudo_fs_context_ops;

static const struct fs_context_operations pseudo_fs_context_ops = {
	.free = pseudo_fs_free;
	.get_tree = pseudo_fs_get_tree;
};

static int pseudo_fs_get_tree(struct fs_context *fc)
{
	return get_tree_nodev(fc, pseudo_fs_fill_super);
}

int get_tree_nodev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc))
{
	return vfs_get_super(fc, vfs_get_independent_super, fill_super);
}

int vfs_get_super(struct fs_context *fc, enum vfs_get_super_keying keying, int (*fill_super)(struct super_block *sb, struct fs_context *fc))
{
	int (*test)(struct super_block *, struct fs_context *);
	struct super_block *sb;
	int err;

	switch (keying) {
		case vfs_get_single_super:
		case vfs_get_signle_refconf_super:
			test = test_signle_super;
			break;
		case vfs_get_keyed_super:
			test = test_keyed_super;
			break;
		case vfs_get_independent_super:	
		//该函数主要为虚拟文件系统获取独立的超级块
			test = NULL;
			break;
		default:
			BUG();
	}

	sb = sget_fc(fc, test, set_anon_super_fc);
	//获取超级块

	if (IS_ERR(sb))
		return PTR_ERR(sb);
	
	if (!sb->s_root) {
		err = fill_super(sb, fc);
		if (err)
			goto error;

		sb->s_flags |= SB_ACTIVE;
		fc->root = dget(sb->s_root);
	} else {
		fc->root = dget(sb->s_root);
		if (keying == vfs_get_signle_refconf_super) {
			err = reconfigure_super(fc);
			if (err < 0) {
				dput(fc->root);
				fc->root = NULL;
				goto error;
			}
		}
	}

	return 0;

error:
	deactivate_locked_super(sb);
	return err;
}

struct super_block *sget_fc(struct fs_context *fc, int (*test)(struct super_block *, struct fs_context *), int (*set)(struct super_block *, struct fs_context))
{
	struct super_block *s = NULL;
	struct super_block *old;
	struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns;
	//通过上边的分析,可知此时fc->global为true,user_ns为&init_user_ns	

	int err;
	
retry:
	spin_lock(&sb_lock);
	if (test) {
	//test此时为空
		hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) {
			if (test(old, fc))
				goto share_extant_sb;
		}
	}
	if (!s) {
		spin_unlock(&sb_lock);
		s = alloc_super(fc->fs_type, fc->sb_flags, user_ns);
		//创建超级块结构体对象

		if (!s)
			return ERR_PTR(-ENOMEM);
		goto retry;
	}

	s->s_fs_info = fc->s_fs_info;
	err = set(s, fc);
	//执行set指针指向的函数,该函数主要为super_block中的dev属性指定设备号

	if (err) {
		s->s_fs_info = NULL;
		spin_unlock(&sb_lock);
		destroy_unused_super(s);
		return ERR_PTR(err);
	}
	fc->s_fs_info = NULL;
	s->s_type = fc->fs_type;
	s->s_iflags |= fc->s_iflags;
	strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id));
	list_add_tail(&s->s_list, &super_blocks);
	//将该超级块添加到全局super_blocks链表当中。

	hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
	spin_unlock(&sb_lock);
	get_filesystem(s->s_type);
	register_shrinker_prepared(&s->s_shrink);
	return s;

share_extant_sb:
	if (user_ns != old->s_user_ns) {
		spin_unlock(&sb_lock);
		destroy_unused_super(s);
		return ERR_PTR(-EBUSY);
	}
	if (!grab_super(old))
		goto retry;
	destroy_unused_super(s);
	return old;
}

static struct super_block *alloc_super(struct file_system_type *type, int flags, struct user_namespace *user_ns)
{
	struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
	//创建super_block结构体对象	

	static const struct super_operations default_op;
	int i;
	
	if (!s)
		return NULL;

	INIT_LIST_HEAD(&s->s_mounts);
	//初始super_block结构体对象的挂载链表

	s->s_user_ns = get_user_ns(user_ns);
	init_rwsem(&s->s_umount);
	lockdep_set_class();

	down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);

	if (security_sb_alloc(s))
		goto fail;

	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
		if (__percpu_init_rwsem(&s->s_writers.rw_sem[i], sb_writers_name[i], &type->s_writers_key[i]))
			goto fail;
	}
	init_waitqueue_head(&s->s_writers.wait_unfrozen);
	//初始等待队列

	s->s_bdi = &noop_backing_dev_info;
	s->s_flags = flags;
	if (s->s_user_ns != &init_user_ns)
		s->s_iflags |= SB_I_NODEV;
	INIT_HLIST_NODE(&s->s_insatnces);
	INIT_HLIST_BL_HEAD(&s->s_roots);
	mutex_init(&s->s_sync_lock);
	INIT_LIST_HEAD(&s->s_inodes);
	spin_lock_init(&s->s_inode_list_lock);
	INIT_LIST_HEAD(&s->s_inodes_wb);
	spin_lock_init(&s->s_inode_wblist_lock);

	s->s_count = 1;
	atomic_set(&s->s_active, 1);
	mutex_init(&s->s_vfs_rename_mutex);
	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
	init_rwsem(&s->s_dquot.dqio_sem);
	s->s_maxbytes = MAX_NON_LFS;
	s->s_op = &default_op;
	s->s_time_gran = 1000000000;
	s->s_time_min = TIME64_MIN;
	s->s_time_max = TIME64_MAX;
	s->cleancache_poolid = CLEANCACHE_NO_POOL;
	
	s->s_shrink.seeks = DEFAULT_SEEKS;
	s->s_shrink.scan_objects = super_cache_scan;
	s->s_shrink.count_objects = super_cache_count;
	s->s_shrink.batch = 1024;
	s->s_shrink.flasg = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
	if (prealloc_shrinker(&s->s_shrink))
		goto fail;
	if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink))
		goto fail;
	if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink))
		goto fail;
	return s;

fail:
	destory_unused_super(s);
	return NULL;
}

//上述代码中的set指针函数
int set_anon_super_fc(struct super_block *sb, struct fs_context *fc)
{
	return set_anon_super(sb, NULL);
}

int set_anon_super(struct super_block *s, void *data)
{
	return get_anon_bdev(&s->s_dev);
}

//设置super_block结构体中s_dev属性,即与super_block相关的设备号
int get_anon_bdev(dev_t *p)
{
	int dev;

	dev = ida_alloc_range(&unnamed_dev_ida, 1, (1 << MINORBITS) - 1, GFP_ATOMIC);
	//申请未使用的设备次ID

	if (dev == -ENOSPC)
		dev = -EMFILE;
	if (dev < 0)
		return dev;

	*p = MKDEV(0, dev);
	//计算设备号

	retrun 0;
}

//对创建的super_block进行填充
static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
{
	struct pseudo_fs_context *ctx = fc->fs_private;
	struct inode *root;
	
	s->s_maxbytes = MAX_LFS_FILESIZE;
	s->s_blocksize = PAGE_SIZE;
	s->s_blocksize_bits = PAGE_SHIFT;
	s->s_magic = ctx->magic;
	s->s_op = ctx->ops ?: &simple_super_operations;
	s->s_xattr = ctx->xattr;
	s->s_time_gran = 1;
	root = new_inode(s);
	//创建根节点,即root_inode

	if (!root)
		return -ENOMEM;

	root->i_ino = 1;
	root->i_node = S_IFDIR | S_IRUSR | S_IWUSR;
	root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
	s->s_root = d_make_root(root);
	//根据根节点设置根目录	

	if (!s->s_root)
		return -ENOMEM;
	s->s_d_op = ctx->dops;
	return 0;
}

struct inode *new_inode(struct super_block *sb)
{
	struct inode *inode;
	
	spin_lock_prefetch(&sb->s_inode_list_lock);

	inode = new_inode_pseudo(sb);
	if (inode)
		inode_sb_list_add(inode);
	return inode;
}

struct inode *new_inode_pseudo(struct super_block *sb)
{
	struct inode *inode = alloc_inode(sb);
	
	if (inode) {
		spin_lock(&inode->i_lock);
		inode->i_state = 0;
		spin_unlock(&inode->i_lock);
		INIT_LIST_HEAD(&inode->i_sb_list);
	}
	return inode;
}

static struct inode *alloc_inode(struct super_block *sb)
{
	const struct super_operations *ops = sb->s_op;
	struct inode *inode;

	if (ops->alloc_inode)
		inode = ops->alloc_inode(sb);
		//return sock_alloc_inode()
	else
		inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
	
	if (!inode)
		return NULL;
	
	if (unlikely(inode_init_always(sb, inode))) {
		if (ops->destory_inode) {
			ops->destory_inode(inode);
			if (!ops->free_inode)
				return NULL;
		}
		inode->free_inode = ops->free_inode;
		i_callback(&inode->i_rcu);
		return NULL;
	}

	return inode;
}

//在上边的分析过程中,可知super_block->s_op为sockfs_ops
s->s_op = ctx->ops ?: &simple_super_operations;
ctx->ops = &sockfs_ops;

static const struct super_operations sockfs_ops = {
	.alloc_inode = sock_alloc_inode,
	.free_inode = sock_free_inode,
	.statfs = simple_statfs,
};

static struct inode *sock_alloc_inode(struct super_block *sb)
{
	struct socket_alloc *ei;
	//该结构体与struct socket结构体相关
	//struct socket_alloc {
	//	struct socket socket;
	//	struct inode vfs_inode;
	//};
	
	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
	if (!ei)
		return NULL;
	init_waitqueue_head(&ei->socket.wq.wait);
	ei->socket.wq.fasync_list = NULL;
	ei->socket.wq_flags = 0;

	ei->socket.state = SS_UNCONNECTED;
	ei->socket.flags = 0;
	ei->socket.ops = NULL;
	ei->socket.sk = NULL;
	ei->socket.file = NULL;
	
	return &ei->vfs_inode;
}

//通过该函数,可知,socket与inode同时创建,且两者在同一个socket_alloc结构体中。

//创建根目录
struct dentry *d_make_root(struct inode *root_inode)
{
	struct dentry *res = NULL;
	
	if (root_inode) {
		res = d_alloc_anon(root_inode->i_sb);
		if (res)
			d_instantiate(res, root_inode);
		else
			input(root_inode);
	}
	return res;
}

struct dentry *d_alloc_anon(strcut super_block *sb)
{
	return __d_alloc(sb, NULL);
}

struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{
	struct dentry *dentry;
	char *dname;
	int err;

	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
	//创建dentry(目录)结构体对象

	if (!dentry)
		return NULL;

	dentry->d_iname[DNAME_INLINE_LEN - 1] = 0;
	if (unlikely(!name)) {
		name = &slash_name;
		dname = dentry->d_iname;
	} else if (name->len > DNAME_INLINE_LEN - 1) {
		size_t size = offsetof(struct external_name, name[1]);
		struct external_name *p = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
		if (!p) {
			kmem_cache_free(dentry_cahce, dentry);
			return NULL;
		}
		atomic_set(&p->u.count, 1);
		dname = p->name;
	} else {
		dname = dentry->d_iname;
	}
	//为dentry的dname属性赋值

	dentry->d_name.len = name->len;
	dentry->d_name.hash = name->hash;
	memcpy(dname, name->name, name->len);
	dname[name->len] = 0;

	smp_store_release(&dentry->d_name.name, dname);

	dentry->d_lockref.count = 1;
	dentry->d_flags = 0;
	spin_lock_init(&dentry->d_lock);
	seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock);
	dentry->d_inode = NULL;
	dentry->d_parent = dentry;
	dentry->d_sb = sb;
	dentry->d_op = NULL;
	dentry->d_fsdata = NULL;
	INIT_HLIST_BL_NODE(&dentry->d_hash);
	INIT_LIST_HEAD(&dentry->d_lru);
	INIT_LIST_HEAD(&dentry->d_subdirs);
	INIT_HLIST_NODE(&dentry->d_u.d_alias);
	INIT_LIST_HEAD(&dentry->d_child);
	d_set_d_op(dentry, dentry->d_sb->s_d_op);

	if (dentry->d_op && dentry->d_op->d_init) {
		err = dentry->d_op->d_init(dentry);
		//如果dentry结构体存在操作结构体,则对dentry进行初始操作
		if (err) {
			if (dname_external(dentry))
				kfree(external_name(dentry));
			kmem_cache_free(dentry_cache, dentry);
			return NULL;
		}
	}

	this_cpu_inc(nr_dentry);
	//dentry结构体自增

	return dentry;
}

struct vfsmount *vfs_create_mount(struct fs_context *fc)
{
	struct mount *mnt;
	
	if (!fc->root)
		return ERR_PTR(-EINVAL);

	mnt = alloc_vfsmnt(fc->source ?: "none");
	if (!mnt)
		return ERR_PTR(-ENOMEM);

	if (fc->sb_flags & SB_KERNMOUNT)
		mnt->mnt.mnt_flags = MNT_INTERNAL;

	atomic_inc(&fc->root->d_sb->s_active);
	mnt->mnt.mnt.sb = fc->roo->d_sb;
	mnt->mnt.mnt_root = dget(fc->root);
	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
	mnt->mnt_parent = mnt;
	
	lock_mount_hash();
	list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
	unlock_mount_hash();
	retunrn &mnt->mnt;
}

此时,便可以获得所需的超级块,随后通过该超级块申请所要使用的inode节点。

至此,sock文件系统基本已经完成了注册与挂载。

从上述代码中,可以看出文件系统中,前期所要使用的结构体有:struct file_system_type, struct fs_context, struct pseudo_filesystem_context, struct super_block。
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值