前面四篇我们探索了Linux核心系统–内存管理系统。接下来我们探索Linux文件系统。文件系统提供了一种抽象机制,这种机制将二进制数据抽象为文件的概念,这些二进制数据可以是代码,元数据,音视频数据等。从这个层面看文件系统像是一种寄存架构,通过将存储器上的各种数据抽象分类管理,为上层应用提供统一的寄存接口,这简直太棒了!从另一个层面,从内存管理系统可知,在缓解内存压力时文件系统将作为内存页的缓冲系统。当然,可以将文件系统的概念用于内存便形成了诸如ramfs,procfs等内存文件系统,如initramsfs作为零时文件系统解决了操作系统启动问题,Nice! 但是,文件系统各种各样,我们能否以多态形式将诸多文件系统纳入一个统一的框架呢?当然,通过VFS(虚拟文件系统)抽象各文件系统的最大共性,我们将众多文件系统纳入统一的框架中。但是VFS做得更多,VFS为字符和块设备,进程管道,交互式输入输出等提供了通讯,虚拟文件系统使得Linux保持良好的包容性。那么Linux是如何做到的,或者说Linux是如何抽象的。
一.关键结构
通过将数据结构inode赋予多重角色来拓展VFS的抽象能力,inode是一组元数据,可以描述文件,目录,链接,管道,字符设备文件,块设备文件等,通过这个顶层抽象将文件,内存,进程,设备驱动等统一起来。
linux_kernel/include/linux/fs.h
struct inode {
umode_t i_mode; //类型和权限
unsigned short i_opflags;
kuid_t i_uid; //用户ID
kgid_t i_gid; //用户组ID
unsigned int i_flags;
const struct inode_operations *i_op;//inode操作结构体,具体取决于inode扮演的角色
struct super_block *i_sb;//文件系统超级快
struct address_space *i_mapping; //内存地址空间映射
unsigned long i_ino; //状态信息
union {
const unsigned int i_nlink;//链接
unsigned int __i_nlink;
};
dev_t i_rdev;
loff_t i_size;
struct timespec64 i_atime;//访问时间
struct timespec64 i_mtime;//修改时间
struct timespec64 i_ctime;//创建时间
unsigned short i_bytes;
......
/* Misc */
unsigned long i_state; //状态
struct hlist_node i_hash;
struct list_head i_io_list; //后备缓冲IO
struct list_head i_lru; /* inode LRU list */
struct list_head i_sb_list;
struct list_head i_wb_list; /* backing dev writeback list */
union {
struct hlist_head i_dentry;
struct rcu_head i_rcu;
};
union {
const struct file_operations *i_fop; //文件操作
void (*free_inode)(struct inode *);
};
struct file_lock_context *i_flctx;
struct address_space i_data;
struct list_head i_devices;
union {
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev; //块设备文件
struct cdev *i_cdev; //字符设备文件
char *i_link;
unsigned i_dir_seq;
};
......
} __randomize_layout;
linux_kernel/include/linux/fs.h
文件系统的各种常规操作,如文件创建,链接,重命名,创建删除目录,文件映射,访问权限等
struct inode_operations {
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
int (*create) (struct inode *,struct dentry *, umode_t, bool);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*unlink) (struct inode *,struct dentry *);
int (*symlink) (struct inode *,struct dentry *,const char *);
int (*mkdir) (struct inode *,struct dentry *,umode_t);
int (*rmdir) (struct inode *,struct dentry *);
int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
int (*rename) (struct inode *, struct dentry *,
......
} ____cacheline_aligned;
二.特定于文件系统
linux_kernel/include/linux/fs.h
超级块对应于特定文件系统的超级块,保存了相关元数据
struct super_block {
struct list_head s_list; /* Keep this first */
dev_t s_dev; /* search index; _not_ kdev_t */
unsigned char s_blocksize_bits; //文件系统长度
unsigned long s_blocksize; //文件系统长度
loff_t s_maxbytes; //可处理最大文件长度
struct file_system_type *s_type; //文件系统类型
const struct super_operations *s_op; //超级块操作体
const struct dquot_operations *dq_op;
const struct quotactl_ops *s_qcop;
const struct export_operations *s_export_op;
unsigned long s_flags;
unsigned long s_iflags; /* internal SB_I_* flags */
unsigned long s_magic;
struct dentry *s_root;
struct rw_semaphore s_umount;
int s_count;
struct hlist_bl_head s_roots; //关联全局根目录
struct list_head s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev; //文件系统所属块设备
struct backing_dev_info *s_bdi;
......
struct list_head s_inodes; /* all inodes */
struct list_head s_inodes_wb; /* writeback inodes */
} __randomize_layout;
linux_kernel/include/linux/fs.h
超级块操作结构体,由各文件系统实现
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
void (*destroy_inode)(struct inode *);
void (*free_inode)(struct inode *);
void (*dirty_inode) (struct inode *, int flags);
......
long (*free_cached_objects)(struct super_block *,
struct shrink_control *);
};
linux_kernel/include/linux/fs.h
文件系统类型,同时维护多种文件系统关系
struct file_system_type {
const char *name; //系统名
int fs_flags;
int (*init_fs_context)(struct fs_context *); //初始化
const struct fs_parameter_description *parameters;
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *);//挂载操作
void (*kill_sb) (struct super_block *);
struct module *owner; //所属模块
struct file_system_type * next; //链表
struct hlist_head fs_supers;
struct lock_class_key s_lock_key;
struct lock_class_key s_umount_key;
struct lock_class_key s_vfs_rename_key;
struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
struct lock_class_key i_lock_key;
struct lock_class_key i_mutex_key;
struct lock_class_key i_mutex_dir_key;
};
linux_kernel/fs/mount.h
维护多文件系统数挂载关系
struct mount {
struct hlist_node mnt_hash;
struct mount *mnt_parent; //父文件系统
struct dentry *mnt_mountpoint; //装载点
struct vfsmount mnt;
union {
struct rcu_head mnt_rcu;
struct llist_node mnt_llist;
};
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; //子挂载链
struct list_head mnt_instance; /* mount instance on sb->s_mounts */
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */
struct list_head mnt_share; /* circular list of shared mounts */
struct list_head mnt_slave_list; //从属挂载链
struct list_head mnt_slave; /* slave list entry */
struct mount *mnt_master; /* slave is on master->mnt_slave_list */
struct mnt_namespace *mnt_ns; /* containing namespace */
struct mountpoint *mnt_mp; /* where is it mounted */
.......
} __randomize_layout;
linux_kernel/include/linux/mount.h
每个挂载点对应一个dentry和超级块
struct vfsmount {
struct dentry *mnt_root;//根
struct super_block *mnt_sb; //超级块
int mnt_flags;
} __randomize_layout;
三.特定于内存系统
linux_kernel/include/linux/fs.h
struct address_space {
struct inode *host; //对应一个inode
struct xarray i_pages;
gfp_t gfp_mask;
atomic_t i_mmap_writable;
struct rb_root_cached i_mmap;//内存映射部分
struct rw_semaphore i_mmap_rwsem;
unsigned long nrpages;//内存页数
unsigned long nrexceptional;
pgoff_t writeback_index;
const struct address_space_operations *a_ops; //地址空间操作
struct list_head private_list;
void *private_data;
} __attribute__((aligned(sizeof(long)))) __randomize_layout;
linux_kernel/include/linux/fs.h
与内存系统交互,如将内存中暂时不用的内存页写到后备缓冲器中,将内存页写到交换区中,以及内存页释放,回写等操作
struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
/* Write back some dirty pages from this mapping. */
int (*writepages)(struct address_space *, struct writeback_control *);
/* Set a page dirty. Return true if this dirtied it */
int (*set_page_dirty)(struct page *page);
......
/* swapfile support */
int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
sector_t *span);
void (*swap_deactivate)(struct file *file);
};
四.特定于进程
linux_kernel/include/linux/sched.h
struct task_struct {
......
struct fs_struct *fs; //文件系统和信息
struct files_struct *files; //已打开的文件信息
......
}
linux_kernel/include/linux/fdtable.h
struct files_struct {
atomic_t count;
bool resize_in_progress;
wait_queue_head_t resize_wait;
struct fdtable __rcu *fdt; //rcu无锁读
struct fdtable fdtab;//文件描述符表
......
};
linux_kernel/include/linux/fdtable.h
struct fdtable {
unsigned int max_fds;
struct file __rcu **fd; //数组指针,每一项指向一个file
unsigned long *close_on_exec; //要关闭的文件描述符
unsigned long *open_fds; //已打开的文件描述符
unsigned long *full_fds_bits;
struct rcu_head rcu;
};
Linux使用结构体file表示文件,文件就是字节序列,仅此而已,每个I/O设备。包括磁盘。键盘,显示器,都可看作是文件。
linux_kernel/include/linux/fs.h
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path; //文件路径
struct inode *f_inode;//关联inode元信息机构
const struct file_operations *f_op; //文件操作
......
struct address_space *f_mapping;
errseq_t f_wb_err;
} __randomize_layout
__attribute__((aligned(4)));
结构体file_operations 是文件的操作结构,具体由各类型文件去实现,例如块设备文件,ext2文件,在VF层面只有宏观操作结构定义
linux_kernel/include/linux/fs.h
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
int (*iopoll)(struct kiocb *kiocb, bool spin);
......
int (*fadvise)(struct file *, loff_t, loff_t, int);
} __randomize_layout;
linux_kernel/fs/block_dev.c
特定于块设备的操作
const struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
.iopoll = blkdev_iopoll,
.mmap = generic_file_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
};
linux_kernel/fs/ext2/file.c
特定于文件的操作
const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
.read_iter = ext2_file_read_iter,
.write_iter = ext2_file_write_iter,
.unlocked_ioctl = ext2_ioctl,
.mmap = ext2_file_mmap,
.open = dquot_file_open,
.release = ext2_release_file,
.fsync = ext2_fsync,
.get_unmapped_area = thp_get_unmapped_area,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
};
五.特定于缓存
linux_kernel/include/linux/dcache.h
结构体dentry 用于描述文件目录,充当缓存,加快VFS处理速度
struct dentry {
/* RCU lookup touched fields */
unsigned int d_flags; /* protected by d_lock */
seqcount_t d_seq; /* per dentry seqlock */
struct hlist_bl_node d_hash; //用于查找散列表
struct dentry *d_parent; //父目录
struct qstr d_name; //名称
struct inode *d_inode; //目录对应的inode元信息
* negative */
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
/* Ref lookup also touches following */
struct lockref d_lockref; /* per-dentry lock and refcount */
const struct dentry_operations *d_op; //目录操作
struct super_block *d_sb; //文件系统超级块
unsigned long d_time; /* used by d_revalidate */
void *d_fsdata; //特定于文件系统
......
} __randomize_layout;
linux_kernel/include/linux/dcache.h
dentry_operations是特定于文件系统的操作结构,由具体的文件系统实现
struct dentry_operations {
int (*d_revalidate)(struct dentry *, unsigned int);
int (*d_weak_revalidate)(struct dentry *, unsigned int);
int (*d_hash)(const struct dentry *, struct qstr *);
int (*d_compare)(const struct dentry *,
unsigned int, const char *, const struct qstr *);
int (*d_delete)(const struct dentry *);
int (*d_init)(struct dentry *);
void (*d_release)(struct dentry *);
void (*d_prune)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)(struct dentry *, char *, int);
struct vfsmount *(*d_automount)(struct path *);
int (*d_manage)(const struct path *, bool);
struct dentry *(*d_real)(struct dentry *, const struct inode *);
} ____cacheline_aligned;
在UNIX操作系统中一个核心的概念之一就是:几乎每个资源都可以表示为一个文件。显然Linux继承了这一观点,文件是对资源的抽象,VFS通过一组数据机构提供了这样一个抽象框架,具体的实现便是去填充这个框架结构。这样通过VFS统一了访问结构,使得与内核其他的子系统交互复杂度降低,同时方便了维护管理。