接下来我们以ext4 文件系统mount过程为例,讲解下文件系统的
几种数据结构之间的关联。
如果linux版本有支持ext4 fs,那么在linux初始化时会调用static int __init ext4_init_fs(void),
这个函数会通过register_filesystem(&ext4_fs_type)向系统注册ext4文件系统到全局file_systems结构中。
注册之后os就可以识别此文件系统,当要使用ext4时,通过mount函数加载ext4 super block,inode信息,
几种数据结构之间的关联。
如果linux版本有支持ext4 fs,那么在linux初始化时会调用static int __init ext4_init_fs(void),
这个函数会通过register_filesystem(&ext4_fs_type)向系统注册ext4文件系统到全局file_systems结构中。
注册之后os就可以识别此文件系统,当要使用ext4时,通过mount函数加载ext4 super block,inode信息,
之后就可以进行ext4读写了。
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
char __user *, type, unsigned long, flags, void __user *, data)
{
int ret;
char *kernel_type;
struct filename *kernel_dir;
char *kernel_dev;
unsigned long data_page;
//复制文件系统类型名到内核
ret = copy_mount_string(type, &kernel_type);
if (ret < 0)
goto out_type;
//得到挂载点路径名
kernel_dir = getname(dir_name);
if (IS_ERR(kernel_dir)) {
ret = PTR_ERR(kernel_dir);
goto out_dir;
}
//获取文件系统所在设备名,如/dev/sda1
ret = copy_mount_string(dev_name, &kernel_dev);
if (ret < 0)
goto out_dev;
//获取挂载options信息
ret = copy_mount_options(data, &data_page);
if (ret < 0)
goto out_data;
//mount主体函数
ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
(void *) data_page);
free_page(data_page);
out_data:
kfree(kernel_dev);
out_dev:
putname(kernel_dir);
out_dir:
kfree(kernel_type);
out_type:
return ret;
}
//先检查挂载参数,之后调用不同的mount函数
long do_mount(const char *dev_name, const char *dir_name,
const char *type_page, unsigned long flags, void *data_page)
{
struct path path;
int retval = 0;
int mnt_flags = 0;
printk(KERN_ERR "dev_name:%s dir_name:%s \n",dev_name,dir_name);
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;
/* Basic sanity checks */
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
return -EINVAL;
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
/* ... and get the mountpoint */
//解析dir_name获取挂载路径
retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
if (retval)
return retval;
//挂载安全性检测
retval = security_sb_mount(dev_name, &path,
type_page, flags, data_page);
if (!retval && !may_mount())
retval = -EPERM;
if (retval)
goto dput_out;
/* Default to relatime unless overriden */
if (!(flags & MS_NOATIME))
mnt_flags |= MNT_RELATIME;
/* Separate the per-mountpoint flags */
if (flags & MS_NOSUID)
mnt_flags |= MNT_NOSUID;
if (flags & MS_NODEV)
mnt_flags |= MNT_NODEV;
if (flags & MS_NOEXEC)
mnt_flags |= MNT_NOEXEC;
if (flags & MS_NOATIME)
mnt_flags |= MNT_NOATIME;
if (flags & MS_NODIRATIME)
mnt_flags |= MNT_NODIRATIME;
if (flags & MS_STRICTATIME)
mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
/* The default atime for remount is preservation */
if ((flags & MS_REMOUNT) &&
((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
MS_STRICTATIME)) == 0)) {
mnt_flags &= ~MNT_ATIME_MASK;
mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
}
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
MS_STRICTATIME);
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
data_page);
else if (flags & MS_BIND)
retval = do_loopback(&path, dev_name, flags & MS_REC);
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&path, flags);
else if (flags & MS_MOVE)
retval = do_move_mount(&path, dev_name);
else
retval = do_new_mount(&path, type_page, flags, mnt_flags,
dev_name, data_page);
dput_out:
path_put(&path);
return retval;
}
/*
对于一个新的文件系统初次挂载会调用do_new_mount,这个函数会先给这个
文件系统创建一个struct mount结构,调用文件系统特有的mount函数,
最后将struct mount加入到全局文件树中
*/
static int do_new_mount(struct path *path, const char *fstype, int flags,
int mnt_flags, const char *name, void *data)
{
struct file_system_type *type;
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
struct vfsmount *mnt;
int err;
if (!fstype)
return -EINVAL;
//通过name获取文件系统类型
type = get_fs_type(fstype);
if (!type)
return -ENODEV;
printk(KERN_ERR "fs type:%s\n",type->name);
if (user_ns != &init_user_ns) {
if (!(type->fs_flags & FS_USERNS_MOUNT)) {
put_filesystem(type);
return -EPERM;
}
/* Only in special cases allow devices from mounts
* created outside the initial user namespace.
*/
if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
flags |= MS_NODEV;
mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
}
}
//获取struct mount结构,调用特定文件系统mount函数,主要填充super block数据
mnt = vfs_kern_mount(type, flags, name, data);
//有子文件系统
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
!mnt->mnt_sb->s_subtype)
mnt = fs_set_subtype(mnt, fstype);
put_filesystem(type);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
//将mount加入到全局文件树中
err = do_add_mount(real_mount(mnt), path, mnt_flags);
if (err)
mntput(mnt);
return err;
}
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct mount *mnt;
struct dentry *root;
if (!type)
return ERR_PTR(-ENODEV);
//分配并初始化struct mount 结构
mnt = alloc_vfsmnt(name);
if (!mnt)
return ERR_PTR(-ENOMEM);
if (flags & MS_KERNMOUNT)
mnt->mnt.mnt_flags = MNT_INTERNAL;
//调用具体文件系统的mount函数
root = mount_fs(type, flags, name, data);
if (IS_ERR(root)) {
free_vfsmnt(mnt);
return ERR_CAST(root);
}
//初始化mnt变量,并将mnt加入超级块s_mounts链表中
mnt->mnt.mnt_root = root;
mnt->mnt.mnt_sb = root->d_sb;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
br_write_lock(&vfsmount_lock);
list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
br_write_unlock(&vfsmount_lock);
return &mnt->mnt;
}
struct dentry *
mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
{
struct dentry *root;
struct super_block *sb;
char *secdata = NULL;
int error = -ENOMEM;
if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
secdata = alloc_secdata();
if (!secdata)
goto out;
error = security_sb_copy_data(data, secdata);
if (error)
goto out_free_secdata;
}
//具体文件系统的mount函数,比如ext4,该函数就是系统初始化时注册的ext4_fs_type 里面的mount
root = type->mount(type, flags, name, data);//返回mount后的denty
if (IS_ERR(root)) {
error = PTR_ERR(root);
goto out_free_secdata;
}
sb = root->d_sb;
BUG_ON(!sb);
WARN_ON(!sb->s_bdi);
WARN_ON(sb->s_bdi == &default_backing_dev_info);
sb->s_flags |= MS_BORN;
error = security_sb_kern_mount(sb, flags, secdata);
if (error)
goto out_sb;
/*
* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
* but s_maxbytes was an unsigned long long for many releases. Throw
* this warning for a little while to try and catch filesystems that
* violate this rule.
*/
WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
"negative value (%lld)\n", type->name, sb->s_maxbytes);
up_write(&sb->s_umount);
free_secdata(secdata);
return root;
out_sb:
dput(root);
deactivate_locked_super(sb);
out_free_secdata:
free_secdata(secdata);
out:
return ERR_PTR(error);
}
//newmnt: 新创建的挂载实例 path:挂载路径
static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
{
struct mountpoint *mp;
struct mount *parent;
int err;
mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
//这里不是简单的加锁,如果path上挂载了很多文件系统,那么这里就是要找出最新一次挂载到其上的文件系统的根路径,这才//是我们这个文件系统要挂载到的mountpoint
mp = lock_mount(path);
if (IS_ERR(mp))
return PTR_ERR(mp);
parent = real_mount(path->mnt);//得到挂载点所属的挂载结构
err = -EINVAL;
if (unlikely(!check_mnt(parent))) {
/* that's acceptable only for automounts done in private ns */
if (!(mnt_flags & MNT_SHRINKABLE))
goto unlock;
/* ... and for those we'd better have mountpoint still alive */
if (!parent->mnt_ns)
goto unlock;
}
/* Refuse the same filesystem on the same mount point */
err = -EBUSY;
//禁止同一个文件系统挂在到同一个挂载点
if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
path->mnt->mnt_root == path->dentry)
goto unlock;
err = -EINVAL;
if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode))
goto unlock;
newmnt->mnt.mnt_flags = mnt_flags;
//把newmnt加入到全局文件系统树中
err = graft_tree(newmnt, parent, mp);
unlock:
unlock_mount(mp);
return err;
}
static struct mountpoint *lock_mount(struct path *path)
{
struct vfsmount *mnt;
struct dentry *dentry = path->dentry;
retry:
mutex_lock(&dentry->d_inode->i_mutex);
if (unlikely(cant_mount(dentry))) {
mutex_unlock(&dentry->d_inode->i_mutex);
return ERR_PTR(-ENOENT);
}
namespace_lock();
mnt = lookup_mnt(path);
if (likely(!mnt)) {//这里表示dentry上未挂载文件系统,创建一个新的mountpoint 返回
struct mountpoint *mp = new_mountpoint(dentry);
if (IS_ERR(mp)) {
namespace_unlock();
mutex_unlock(&dentry->d_inode->i_mutex);
return mp;
}
return mp;
}
namespace_unlock();
mutex_unlock(&path->dentry->d_inode->i_mutex);
path_put(path);
// 如果lookup_mnt没有返回NULL,则说明它找到了挂载在/mnt上的子文件系统,下面的逻辑是:
// 把子文件系统的mount结构赋值给path->mnt
path->mnt = mnt;
//如果此dentry之前挂载了文件系统,则新的dentry将为子文件系统mnt的挂载点
dentry = path->dentry = dget(mnt->mnt_root);
// 返回到lookup_mnt函数,用新的path变量继续查找是否还有后续的子文件系统
//这样组成的list结构:p->C1->C2->C3,从全局来看后挂载的会覆盖之前挂载的文件系统
goto retry;
}
//参数为挂载点所属的挂载实例跟目录项,dir为移动方向
/*
路径名查找时都会调用到这个函数,它的作用就是根据一个父<mount, dentry>
二元组找到挂载在其下面的子文件系统的mount实例,如果没找到就返回NULL
*/
struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
int dir)
{
struct list_head *head = mount_hashtable + hash(mnt, dentry);
struct list_head *tmp = head;
struct mount *p, *found = NULL;
for (;;) {
tmp = dir ? tmp->next : tmp->prev;
p = NULL;
if (tmp == head)//循环一圈未找到
break;
p = list_entry(tmp, struct mount, mnt_hash);//mnt_hash 链接到mount_hashtable
if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {//p其实是参数mnt的子文件系统
found = p;
break;
}
}
return found;
}