EXT4文件系统学习（14）VFS之VFS inode

最新推荐文章于 2023-08-04 17:05:23 发布

王二车

最新推荐文章于 2023-08-04 17:05:23 发布

阅读量914

点赞数

分类专栏： Linux EXT4文件系统

本文链接：https://blog.csdn.net/TSZ0000/article/details/87634583

版权

Linux 同时被 2 个专栏收录

158 篇文章 24 订阅

订阅专栏

EXT4文件系统

18 篇文章 22 订阅

订阅专栏

不同的文件系统inode也不一样，对inode的操作函数也不一样，VFS inode的作用就是隐藏下面具体文件系统的inode差异，向上层提供统一的接口。分享Linux爱好者的一篇文章：深度剖析 Linux cp 命令的秘密，里面介绍了稀疏文件。

inode

struct inode {
	umode_t			i_mode;
	unsigned short		i_opflags;
	kuid_t			i_uid;
	kgid_t			i_gid;
	unsigned int		i_flags;

#ifdef CONFIG_FS_POSIX_ACL
	struct posix_acl	*i_acl;
	struct posix_acl	*i_default_acl;
#endif

	const struct inode_operations	*i_op; 指定一组对inode的操作函数
	struct super_block	*i_sb;
	struct address_space	*i_mapping;

#ifdef CONFIG_SECURITY
	void			*i_security;
#endif

	/* Stat data, not accessed from path walking */
	unsigned long		i_ino;
	/*
	 * Filesystems may only read i_nlink directly.  They shall use the
	 * following functions for modification:
	 *
	 *    (set|clear|inc|drop)_nlink 硬链接数量
	 *    inode_(inc|dec)_link_count
	 */
	union {
		const unsigned int i_nlink;
		unsigned int __i_nlink;
	};
	dev_t			i_rdev;
	loff_t			i_size;
	struct timespec		i_atime;
	struct timespec		i_mtime;
	struct timespec		i_ctime;
	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
	unsigned short          i_bytes;
	unsigned int		i_blkbits;
	blkcnt_t		i_blocks;

#ifdef __NEED_I_SIZE_ORDERED
	seqcount_t		i_size_seqcount;
#endif

	/* Misc */
	unsigned long		i_state;
	struct mutex		i_mutex;

	unsigned long		dirtied_when;	/* jiffies of first dirtying */
	unsigned long		dirtied_time_when;

	struct hlist_node	i_hash;
	struct list_head	i_wb_list;	/* backing dev IO list */
	struct list_head	i_lru;		/* inode LRU list */
	struct list_head	i_sb_list;
	union {
		struct hlist_head	i_dentry;
		struct rcu_head		i_rcu;
	};
	u64			i_version;
	atomic_t		i_count; 引用计数
	atomic_t		i_dio_count;
	atomic_t		i_writecount;
#ifdef CONFIG_IMA
	atomic_t		i_readcount; /* struct files open RO */
#endif
	const struct file_operations	*i_fop;	指定对文件内容本身的操作函数
	struct file_lock_context	*i_flctx;
	struct address_space	i_data;
	struct list_head	i_devices;
	union {
		struct pipe_inode_info	*i_pipe;特殊文件系统，如字符设备
		struct block_device	*i_bdev;
		struct cdev		*i_cdev;
	};

	__u32			i_generation;

#ifdef CONFIG_FSNOTIFY
	__u32			i_fsnotify_mask; /* all events this inode cares about */
	struct hlist_head	i_fsnotify_marks;
#endif

	void			*i_private; /* fs or device private pointer */
};

结构体大部分成员都是根据磁盘的inode初始化的，详细见上一章。

根据inode编号获取inode结构操作是很繁琐的，因此内核使用hash表让每一个inode通过i_hash链接到hash表。

VFS inode的i_op和i_fop指针操作具体文件系统的inode，但是目录文件、链接文件等是分开的，具体可分为4中情况：

普通文件和目录的inode_operations

普通文件

const struct inode_operations ext4_file_inode_operations = {
	.setattr	= ext4_setattr,
	.getattr	= ext4_getattr,
	.setxattr	= generic_setxattr,
	.getxattr	= generic_getxattr,
	.listxattr	= ext4_listxattr,
	.removexattr	= generic_removexattr,
	.get_acl	= ext4_get_acl,
	.set_acl	= ext4_set_acl,
	.fiemap		= ext4_fiemap,
};

const struct inode_operations ext4_dir_inode_operations = {
	.create		= ext4_create, 创建文件
	.lookup		= ext4_lookup,
	.link		= ext4_link, 硬链接
	.unlink		= ext4_unlink,
	.symlink	= ext4_symlink, 软链接
	.mkdir		= ext4_mkdir, 创建目录
	.rmdir		= ext4_rmdir,
	.mknod		= ext4_mknod, 创建设备节点
	.tmpfile	= ext4_tmpfile,
	.rename2	= ext4_rename2,
	.setattr	= ext4_setattr,
	.setxattr	= generic_setxattr,
	.getxattr	= generic_getxattr,
	.listxattr	= ext4_listxattr,
	.removexattr	= generic_removexattr,
	.get_acl	= ext4_get_acl,
	.set_acl	= ext4_set_acl,
	.fiemap         = ext4_fiemap,
};

普通文件和目录都有一个对应的inode结构，在某个目录下创建一个文件，会调用目录对应的inode结构的ext4_create（）函数，ext4_create（）会调用__ext4_new_inode从磁盘上分配一个空闲的inode，同时初始化ext4内存中的inode机构。

创建一个子目录，使用函数ext4_mkdir，也会调用__ext4_new_inode从磁盘上分配一个空闲的inode，同时初始化ext4内存中的inode机构。

链接文件的inode_operations

普通文件的定位：找到direntry，然后从direntry中的文件名读出inode号，最后读出inode信息。

硬链接文件：因为硬链接文件的inode号与源文件一致的，所以定位比较简单。

软链接文件：需要特殊处理，由于inode号不一致，所以需要先读取inode号获取出源文件的路径，再根据路径定位出目标文件。如果目标文件路径小于60字节，那么称为Fast Symbol link，因为这样的话路径信息就直接保存在inode所在的block，就不需要额外的block了。

const struct inode_operations ext4_fast_symlink_inode_operations = {
	.readlink	= generic_readlink, 在inode的block中读出目标文件路径
	.follow_link    = ext4_follow_fast_link, 把对链接文件的操作直接转到目标文件
	.setattr	= ext4_setattr,
	.setxattr	= generic_setxattr,
	.getxattr	= generic_getxattr,
	.listxattr	= ext4_listxattr,
	.removexattr	= generic_removexattr,
};

目标文件路径大于60字节的，称为普通符号链接，需要根据inode中的数据块地址读出目标文件的地址：

const struct inode_operations ext4_symlink_inode_operations = {
	.readlink	= generic_readlink,
	.follow_link    = ext4_follow_link,
	.put_link       = ext4_put_link,
	.setattr	= ext4_setattr,
	.setxattr	= generic_setxattr,
	.getxattr	= generic_getxattr,
	.listxattr	= ext4_listxattr,
	.removexattr	= generic_removexattr,
};

多出一个put_link函数，且follow_link与快速链接不同，follow_link把inode地址对应的数据数据读出到内存解析，处理我拿出后由put_link来释放内存。

根据前面分析的ext4_fill_super函数中ext4_iget对文件和目录的i_fop赋值也是不一样的，下面继续分析i_fop：

文件的file_operations

const struct file_operations ext4_file_operations = {
	.llseek		= ext4_llseek, 调整文件读写指针
	.read_iter	= generic_file_read_iter,
	.write_iter	= ext4_file_write_iter,
	.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl	= ext4_compat_ioctl, 向设备文件发送ioctl命令
#endif
	.mmap		= ext4_file_mmap, 
	.open		= ext4_file_open,
	.release	= ext4_release_file,
	.fsync		= ext4_sync_file,
	.splice_read	= generic_file_splice_read,
	.splice_write	= iter_file_splice_write,
	.fallocate	= ext4_fallocate,
};

read使用的是通用文件系统读函数generic_file_read_iter，支持IOCB_DIRECT直接读模式和缓存模式，IOCB_DIRECT模式在open文件时传入模式参数，绕过缓存直接对磁盘读写操作；使用缓存模式是do_generic_file_read函数中体现，函数中会检查数据是否已经缓存，如果没有就会预读取，将数据加载入缓存页。

mmap函数把一个文件内容映射到进程的虚拟地址空间中（利用页表），这样可以通过内存指针p[n]来访问文件内容。

open函数打开文件操作，建立相关的内存管理结构，如inode对象。

release函数减少文件的引用计数，当引用计数为0时，会关闭文件对象，同时释放相关的内存管理结构。

fsync函数把内存中文件内容数据写入磁盘，splice_read/write用于管道操作。

目录的dir_operations

const struct file_operations ext4_dir_operations = {
	.llseek		= ext4_dir_llseek,
	.read		= generic_read_dir,
	.iterate	= ext4_readdir,
	.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl	= ext4_compat_ioctl,
#endif
	.fsync		= ext4_sync_file,
	.release	= ext4_release_dir,
};

read是一个空函数，为啥呢？

iterate是readdir函数，从目录的数据块中把目录项读出来，目录项direntry数据块中保存了目录下存储的文件名信息等。