文件系统挂载过程浅析（1）

最新推荐文章于 2024-05-27 17:25:20 发布

weixin_34336292

最新推荐文章于 2024-05-27 17:25:20 发布

阅读量269

点赞数

文章标签：操作系统数据结构与算法

原文链接：http://blog.51cto.com/inkfish/563234

版权

===================================================
作者：ietf AT doit.com.cn
源文件来自于glibc-2.1.2，Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. 和 linux kernel 2.6.20
请在GNU Library General Public License下参考。
引用请注明出处。
===================================================

所有的注释由如下标记标识：
-------ietf add start-------
-------ietf add end -------

一直想跟踪从敲入mount命令开始，命令的参数通过mount工具经由sys_mount系统调用进入内核最终实现文件系统挂载的全过程。可惜找了整整一天，竟然没有能找到mount这个命令的源代码是怎么实现，只好从系统调用开始了。

1、系统调用的头文件

在glibc-2.1.2的sysdeps/unix/sysv/linux/sys/mount.h中，定义了为实现挂载和卸载Linux文件系统的头文件，其内容如下：

/* Header file for mounting/unmount Linux filesystems.
Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB. If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA. */

/* This is taken from /usr/include/linux/fs.h. */
-------ietf add start-------
给出/usr/include/linux/fs.h中定义方式（2.6.20），以供参考
-------ietf add end -------

#ifndef _SYS_MOUNT_H
#define _SYS_MOUNT_H 1

#include <features.h>
#include <sys/ioctl.h>

-------ietf add start-------
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
-------ietf add end -------
#define BLOCK_SIZE 1024
#define BLOCK_SIZE_BITS 10

/* These are the fs-independent mount-flags: up to 16 flags are
   supported */
-------ietf add start-------
#define MS_RDONLY 1 /* Mount read-only */
#define MS_NOSUID 2 /* Ignore suid and sgid bits */
#define MS_NODEV 4 /* Disallow access to device special files */
#define MS_NOEXEC 8 /* Disallow program execution */
#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
#define MS_NOATIME 1024 /* Do not update access times. */
#define MS_NODIRATIME 2048 /* Do not update directory access times */
-------ietf add end -------
enum
{
MS_RDONLY = 1,  /* Mount read-only. */
#define MS_RDONLY MS_RDONLY
MS_NOSUID = 2,  /* Ignore suid and sgid bits. */
#define MS_NOSUID MS_NOSUID
MS_NODEV = 4,   /* Disallow access to device special files. */
#define MS_NODEV MS_NODEV
MS_NOEXEC = 8,  /* Disallow program execution. */
#define MS_NOEXEC MS_NOEXEC
MS_SYNCHRONOUS = 16,  /* Writes are synced at once. */
#define MS_SYNCHRONOUS MS_SYNCHRONOUS
MS_REMOUNT = 32,  /* Alter flags of a mounted FS. */
#define MS_REMOUNT MS_REMOUNT
MS_MANDLOCK = 64,  /* Allow mandatory locks on an FS. */
#define MS_MANDLOCK MS_MANDLOCK
S_WRITE = 128,  /* Write on file/directory/symlink. */
#define S_WRITE  S_WRITE
S_APPEND = 256,  /* Append-only file. */
#define S_APPEND S_APPEND
S_IMMUTABLE = 512,  /* Immutable file. */
#define S_IMMUTABLE S_IMMUTABLE
MS_NOATIME = 1024,  /* Do not update access times. */
#define MS_NOATIME MS_NOATIME
MS_NODIRATIME = 2048  /* Do not update directory access times. */
#define MS_NODIRATIME MS_NODIRATIME
};

-------ietf add start-------
#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK)
-------ietf add end -------
/* Flags that can be altered by MS_REMOUNT */
#define MS_RMT_MASK (MS_RDONLY | MS_MANDLOCK)

/* Magic mount flag number. Has to be or-ed to the flag values. */
-------ietf add start-------
#define MS_MGC_VAL 0xC0ED0000
#define MS_MGC_MSK 0xffff0000
-------ietf add end -------
#define MS_MGC_VAL 0xc0ed0000 /* Magic flag number to indicate "new" flags */
#define MS_MGC_MSK 0xffff0000 /* Magic flag number mask */

/* The read-only stuff doesn't really belong here, but any other place
   is probably as bad and I don't want to create yet another include
   file. */
-------ietf add start-------
#define BLKROSET   _IO(0x12,93) /* set device read-only (0 = read-write) */
#define BLKROGET   _IO(0x12,94) /* get read-only status (0 = read_write) */
#define BLKRRPART _IO(0x12,95) /* re-read partition table */
#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */
#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */
#define BLKRASET   _IO(0x12,98) /* set read ahead for block device */
#define BLKRAGET   _IO(0x12,99) /* get current read ahead setting */
-------ietf add end -------
#define BLKROSET   _IO(0x12, 93) /* Set device read-only (0 = read-write). */
#define BLKROGET   _IO(0x12, 94) /* Get read-only status (0 = read_write). */
#define BLKRRPART _IO(0x12, 95) /* Re-read partition table. */
#define BLKGETSIZE _IO(0x12, 96) /* Return device size. */
#define BLKFLSBUF _IO(0x12, 97) /* Flush buffer cache. */
#define BLKRASET   _IO(0x12, 98) /* Set read ahead for block device. */
#define BLKRAGET   _IO(0x12, 99) /* Get current read ahead setting. */

/* Possible value for FLAGS parameter of `umount2'. */
enum
{
MNT_FORCE = 1 /* Force unmounting. */
#define MNT_FORCE MNT_FORCE
};

__BEGIN_DECLS

/* Mount a filesystem. */
extern int mount __P ((__const char *__special_file, __const char *__dir,
__const char *__fstype, unsigned long int __rwflag,
__const void *__data));

/* Unmount a filesystem. */
extern int umount __P ((__const char *__special_file));

/* Unmount a filesystem. Force unmounting if FLAGS is set to MNT_FORCE. */
extern int umount2 __P ((__const char *__special_file, int __flags));

__END_DECLS

#endif /* _SYS_MOUNT_H */

该文件是从fs.h中摘取的一部分，不过新版的内核有了稍许的更改。

2、系统调用号

通过给定系统调用mount和umount，查找对应的系统调用表。在文件glibc-2.1.2的sysdeps/unix/sysv/linux/mips/sys/syscall.h中如下：
/*
* Linux syscalls are in the range from 4000 to 4999
* Hopefully these syscall numbers are unused ... If not everyone using
* statically linked binaries is pretty <censored - the government>. You've
* been warned.
*/
#define SYS_Linux   4000
...... ......
#define SYS_mount   (SYS_Linux + 21)
#define SYS_umount   (SYS_Linux + 22)
即分别为4021和4022。

3、系统调用在内核的声明

系统调用的函数声明在内核源码的src/include/linux/syscall.h中，具体如下：

asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name,
    char __user *type, unsigned long flags,
    void __user *data);
-------ietf add start-------
dev_name: 字符串类型，挂载的设备名，如，/dev/sda1
dir_name: 挂载点，如，/mnt/usb
type:  在/proc/filesystems中列出的内核所支持的文件系统格式，如，"ext2", "nfs", "proc"等
flags:  标志参数，参考1中列出的MS_XXXXX类型宏定义
data:  不同文件系统特定的参数表，通常以逗号分隔
返回的错误类型请参考专门的文档详细介绍。
-------ietf add end -------

asmlinkage long sys_umount(char __user *name, int flags);
-------ietf add start-------
name: 同sys_mount中dir_name
flags: 同sys_mount中flags
-------ietf add end -------

asmlinkage long sys_oldumount(char __user *name);
-------ietf add start-------
name: 同sys_mount中dir_name
-------ietf add end -------
与1中的三个函数相对应。

4、系统调用的实现

具体实现在内核源马的src/fs/namespace.c中，下面就三个调用分别参考其流程。

4.1 sys_mount
sys_mount主要功能是将用户的命令行参数从用户空间传递到内核空间，并调用do_mount解析参数，完成mount过程。其实现源码如下：

asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
char __user * type, unsigned long flags,
void __user * data)
{
int retval;
unsigned long data_page;
unsigned long type_page;
unsigned long dev_page;
char *dir_page;
-------ietf add start-------
将文件系统类型参数传入内核空间，在namespace.c中实现如下：
int copy_mount_options(const void __user * data, unsigned long *where)
{
int i;
unsigned long page;
unsigned long size;

*where = 0;
if (!data)
return 0;

if (!(page = __get_free_page(GFP_KERNEL)))
return -ENOMEM;

/* We only care that *some* data at the address the user
* gave us is valid. Just in case, we'll zero
* the remainder of the page.
*/
/* copy_from_user cannot cross TASK_SIZE ! */
size = TASK_SIZE - (unsigned long)data;
if (size > PAGE_SIZE)
size = PAGE_SIZE;

i = size - exact_copy_from_user((void *)page, data, size);
if (!i) {
  free_page(page);
  return -EFAULT;
}
if (i != PAGE_SIZE)
  memset((char *)page + i, 0, PAGE_SIZE - i);
*where = page;
return 0;
}
-------ietf add end -------
retval = copy_mount_options(type, &type_page);
if (retval < 0)
  return retval;
-------ietf add start-------
将挂载点名传入内核空间，并在系统配置了audit属性时，对名字进行访问控制，其实现在namei.c中实现，如下：
char * getname(const char __user * filename)
{
char *tmp, *result;

result = ERR_PTR(-ENOMEM);
tmp = __getname();
if (tmp) {
int retval = do_getname(filename, tmp);

  result = tmp;
  if (retval < 0) {
   __putname(tmp);
   result = ERR_PTR(retval);
  }
}
audit_getname(result);
return result;
}
-------ietf add end -------
dir_page = getname(dir_name);
retval = PTR_ERR(dir_page);
if (IS_ERR(dir_page))
  goto out1;
-------ietf add start-------
将设备名参数传入内核空间
-------ietf add end -------
retval = copy_mount_options(dev_name, &dev_page);
if (retval < 0)
  goto out2;
-------ietf add start-------
将文件系统特定参数表传入内核空间
-------ietf add end -------
retval = copy_mount_options(data, &data_page);
if (retval < 0)

goto out3;

lock_kernel();

-------ietf add start-------
调用do_mount，解析参数，实现挂载
-------ietf add end -------
retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
flags, (void *)data_page);
unlock_kernel();
free_page(data_page);

out3:
free_page(dev_page);
out2:
putname(dir_page);
out1:
free_page(type_page);
return retval;
}

4.2 do_mount

do_mount根据sys_mount传递的内核空间的参数，学则对应的执行过程，在namespace.c中，其源码具体如下：

/*
* Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
* be given to the mount() call (ie: read-only, no-dev, no-suid etc).
*
* data is a (void *) that can point to any structure up to
* PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
* information (or be NULL).
*
* Pre-0.97 versions of mount() didn't have a flags word.
* When the flags word was introduced its top half was required
* to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
* Therefore, if this magic number is present, it carries no information
* and must be discarded.
*/
long do_mount(char *dev_name, char *dir_name, char *type_page,
unsigned long flags, void *data_page)
{
struct nameidata nd;
int retval = 0;
int mnt_flags = 0;

/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;

/* Basic sanity checks */
-------ietf add start-------
如果挂载点或者设备名字符串为空，或者字符串过长，超过PAGE_SIZE大小，则返回失败
-------ietf add end -------
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
  return -EINVAL;
if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
  return -EINVAL;
-------ietf add start-------
如果data_page超过PAGE_SIZE长度，将超出部分截断
-------ietf add end -------
if (data_page)
  ((char *)data_page)[PAGE_SIZE - 1] = 0;

-------ietf add start-------
根据用户参数，构建flags值
-------ietf add end -------
/* Separate the per-mountpoint flags */
if (flags & MS_NOSUID)
  mnt_flags |= MNT_NOSUID;
if (flags & MS_NODEV)
  mnt_flags |= MNT_NODEV;
if (flags & MS_NOEXEC)
  mnt_flags |= MNT_NOEXEC;
if (flags & MS_NOATIME)
  mnt_flags |= MNT_NOATIME;
if (flags & MS_NODIRATIME)
  mnt_flags |= MNT_NODIRATIME;
if (flags & MS_RELATIME)
  mnt_flags |= MNT_RELATIME;

-------ietf add start-------
根据dir_name查找挂载点路径，将该挂载点引用计数增1，将挂载点数据保存在nd中
方法调用了namei.c中的path_lookup，再通过path_lookup调用do_path_lookup实现，源码如下：
int fastcall path_lookup(const char *name, unsigned int flags,
struct nameidata *nd)
{
return do_path_lookup(AT_FDCWD, name, flags, nd);
}

/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
static int fastcall do_path_lookup(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
int retval = 0;
int fput_needed;
struct file *file;
struct fs_struct *fs = current->fs;

nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags;
nd->depth = 0;

if (*name=='/') {
  read_lock(&fs->lock);
  if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
   nd->mnt = mntget(fs->altrootmnt);
   nd->dentry = dget(fs->altroot);
   read_unlock(&fs->lock);
   if (__emul_lookup_dentry(name,nd))
    goto out; /* found in altroot */
   read_lock(&fs->lock);
  }
  nd->mnt = mntget(fs->rootmnt);
  nd->dentry = dget(fs->root);
  read_unlock(&fs->lock);
} else if (dfd == AT_FDCWD) {
  read_lock(&fs->lock);
  nd->mnt = mntget(fs->pwdmnt);
  nd->dentry = dget(fs->pwd);
  read_unlock(&fs->lock);
} else {
  struct dentry *dentry;

  file = fget_light(dfd, &fput_needed);
  retval = -EBADF;
  if (!file)
   goto out_fail;

dentry = file->f_path.dentry;

  retval = -ENOTDIR;
  if (!S_ISDIR(dentry->d_inode->i_mode))
   goto fput_fail;

  retval = file_permission(file, MAY_EXEC);
  if (retval)
   goto fput_fail;

nd->mnt = mntget(file->f_path.mnt);
nd->dentry = dget(dentry);

  fput_light(file, fput_needed);
}
current->total_link_count = 0;
retval = link_path_walk(name, nd);
out:
if (likely(retval == 0)) {
  if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
    nd->dentry->d_inode))
  audit_inode(name, nd->dentry->d_inode);
}
out_fail:
return retval;

fput_fail:
fput_light(file, fput_needed);
goto out_fail;
}

持续跟踪该操作，会进入很深层次。它先查看该路径是否在dcache中，如果不在，将重新从硬盘上将该路径加入dcache。
-------ietf add end -------
/* ... and get the mountpoint */
retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
if (retval)
return retval;

-------ietf add start-------
如果没有配置安全属性，该函数为空；否则使用系统初始化时设定的安全框架函数。这一部分需要进一步分析
-------ietf add end -------
retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
if (retval)
  goto dput_out;
if (flags & MS_REMOUNT)
-------ietf add start-------
根据给定的路径，将设备重新挂载到新的路径点。该操做中，内核先将该sb对应的dcache数据缩减，以减小remount时的开销，并将所有的dirty inode写回硬盘，最后调用该文件系统sb中的remount_fs方法。
-------ietf add end -------
  retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
        data_page);
else if (flags & MS_BIND)
-------ietf add start-------
太复杂，还没深入看，其中分两种情况，一种copy方式，一种clone方式。
-------ietf add end -------
  retval = do_loopback(&nd, dev_name, flags & MS_REC);
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
-------ietf add start-------
更改mount参数类型
-------ietf add end -------
  retval = do_change_type(&nd, flags);
else if (flags & MS_MOVE)
-------ietf add start-------
卸载挂载点
-------ietf add end -------
  retval = do_move_mount(&nd, dev_name);
else
-------ietf add start-------
这里是初次挂载一个新设备时的入口，也是俺所感兴趣的部分。
该函数分别调用了do_kern_mount()和do_add_mount()两个函数，代码如下：
/*
* create a new mount for userspace and request it to be added into the
* namespace's tree
*/
static int do_new_mount(struct nameidata *nd, char *type, int flags,
   int mnt_flags, char *name, void *data)
{
struct vfsmount *mnt;

if (!type || !memchr(type, 0, PAGE_SIZE))
return -EINVAL;

/* we need capabilities... */
if (!capable(CAP_SYS_ADMIN))
  return -EPERM;
-------ietf add start-------
分配挂载点空间，具体见4.3
-------ietf add end -------
mnt = do_kern_mount(type, flags, name, data);
if (IS_ERR(mnt))
  return PTR_ERR(mnt);
-------ietf add start-------
见下
-------ietf add end -------
return do_add_mount(mnt, nd, mnt_flags, NULL);
}
-------ietf add end -------
  retval = do_new_mount(&nd, type_page, flags, mnt_flags,
          dev_name, data_page);
dput_out:
path_release(&nd);
return retval;
}

4.3 do_kern_mount & vfs_kern_mount

这两个程序都在/src/fs/super.c中实现

struct vfsmount *
do_kern_mount(const char *fstype, int flags, const char *name, void *data)
{
-------ietf add start-------
根据文件系统类型名载文件系统链表中查找对应的模块，如果模块不在内核中，则尝试加载模块
-------ietf add end -------
struct file_system_type *type = get_fs_type(fstype);
struct vfsmount *mnt;
if (!type)
return ERR_PTR(-ENODEV);
mnt = vfs_kern_mount(type, flags, name, data);
put_filesystem(type);
return mnt;
}

struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct vfsmount *mnt;
char *secdata = NULL;
int error;

if (!type)
return ERR_PTR(-ENODEV);

error = -ENOMEM;
-------ietf add start-------
为设备分配挂载点数据结构，设置引用计数，初始化各参数，并将mnt_devname域赋值为name（即dev_name）,
-------ietf add end -------
mnt = alloc_vfsmnt(name);
if (!mnt)
goto out;

-------ietf add start-------
传入的data参数不为空，又是和security框架相关的操作
-------ietf add end -------
if (data) {
  secdata = alloc_secdata();
  if (!secdata)
   goto out_mnt;

  error = security_sb_copy_data(type, data, secdata);
  if (error)
   goto out_free_secdata;
}

-------ietf add start-------
调用被挂载设备上的文件系统定义的get_sb()模块，判断superblock中指定的块设备是否包含该方法支持的文件系统。执行成功，就返回指向superblock的指针，出错则返回NULL。具体功能实现在分析特定文件系统时详细介绍。
-------ietf add end -------
error = type->get_sb(type, flags, name, data, mnt);
if (error < 0)
goto out_free_secdata;

  error = security_sb_kern_mount(mnt->mnt_sb, secdata);
  if (error)
   goto out_sb;

mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
up_write(&mnt->mnt_sb->s_umount);
free_secdata(secdata);
return mnt;
out_sb:
dput(mnt->mnt_root);
up_write(&mnt->mnt_sb->s_umount);
deactivate_super(mnt->mnt_sb);
out_free_secdata:
free_secdata(secdata);
out_mnt:
free_vfsmnt(mnt);
out:
return ERR_PTR(error);
}

4.4 do_add_mount

既然已经为新的设备分配挂载点，就该把该挂载点加入到系统的文件系统中来了。

/*
* add a mount into a namespace's mount tree
* - provide the option of adding the new mount to an expiration list
*/
int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
   int mnt_flags, struct list_head *fslist)
{
int err;
-------ietf add start-------
先关锁
-------ietf add end -------
down_write(&namespace_sem);
/* Something was mounted here while we slept */
while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
  ;
err = -EINVAL;
if (!check_mnt(nd->mnt))
  goto unlock;

/* Refuse the same filesystem on the same mount point */
-------ietf add start-------
相同的设备，相同的挂载点，不必再挂
-------ietf add end -------
err = -EBUSY;
if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
nd->mnt->mnt_root == nd->dentry)
goto unlock;

err = -EINVAL;
-------ietf add start-------
挂载在link上，跳过
-------ietf add end -------
if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
goto unlock;

newmnt->mnt_flags = mnt_flags;
-------ietf add start-------
挂载的操作，其中调用了attach_recursive_mnt()，实现挂载，具体见4.5
-------ietf add end -------
if ((err = graft_tree(newmnt, nd)))
  goto unlock;
-------ietf add start-------
第一次加载时为空，忽略该操作
-------ietf add end -------
if (fslist) {
  /* add to the specified expiration list */
  spin_lock(&vfsmount_lock);
  list_add_tail(&newmnt->mnt_expire, fslist);
  spin_unlock(&vfsmount_lock);
}
up_write(&namespace_sem);
return 0;

unlock:
up_write(&namespace_sem);
mntput(newmnt);
return err;
}

4.5 attach_recursive_mnt

这是目前看到的注释最长的一个函数，瞻仰一下：
/*
* @source_mnt : mount tree to be attached
* @nd         : place the mount tree @source_mnt is attached
* @parent_nd : if non-null, detach the source_mnt from its parent and
*      store the parent mount and mountpoint dentry.
*      (done when source_mnt is moved)
*
* NOTE: in the table below explains the semantics when a source mount
* of a given type is attached to a destination mount of a given type.
* ---------------------------------------------------------------------------
* |         BIND MOUNT OPERATION                                            |
* |**************************************************************************
* | source-->| shared        |       private |       slave    | unbindable |
* | dest     |               |                |                |            |
* |   |      |               |                |                |            |
* |   v      |               |                |                |            |
* |**************************************************************************
* | shared | shared (++)   |     shared (+) |     shared(+++)| invalid   |
* |          |               |                |                |            |
* |non-shared| shared (+)    |      private   |      slave (*) | invalid   |
* ***************************************************************************
* A bind operation clones the source mount and mounts the clone on the
* destination mount.
*
* (++) the cloned mount is propagated to all the mounts in the propagation
* tree of the destination mount and the cloned mount is added to
* the peer group of the source mount.
* (+)   the cloned mount is created under the destination mount and is marked
*       as shared. The cloned mount is added to the peer group of the source
*       mount.
* (+++) the mount is propagated to all the mounts in the propagation tree
*       of the destination mount and the cloned mount is made slave
*       of the same master as that of the source mount. The cloned mount
*       is marked as 'shared and slave'.
* (*)   the cloned mount is made a slave of the same master as that of the
* source mount.
*
* ---------------------------------------------------------------------------
* |           MOVE MOUNT OPERATION                                 |
* |**************************************************************************
* | source-->| shared        |       private |       slave    | unbindable |
* | dest     |               |                |                |            |
* |   |      |               |                |                |            |
* |   v      |               |                |                |            |
* |**************************************************************************
* | shared | shared (+)    |     shared (+) |    shared(+++) | invalid   |
* |          |               |                |                |            |
* |non-shared| shared (+*)   |      private   |    slave (*)   | unbindable |
* ***************************************************************************
*
* (+) the mount is moved to the destination. And is then propagated to
* all the mounts in the propagation tree of the destination mount.
* (+*) the mount is moved to the destination.
* (+++) the mount is moved to the destination and is then propagated to
* all the mounts belonging to the destination mount's propagation tree.
* the mount is marked as 'shared and slave'.
* (*) the mount continues to be a slave at the new location.
*
* if the source mount is a tree, the operations explained above is
* applied to each mount in the tree.
* Must be called without spinlocks held, since this function can sleep
* in allocations.
*/
static int attach_recursive_mnt(struct vfsmount *source_mnt,
   struct nameidata *nd, struct nameidata *parent_nd)
{
LIST_HEAD(tree_list);
struct vfsmount *dest_mnt = nd->mnt;
struct dentry *dest_dentry = nd->dentry;
struct vfsmount *child, *p;

if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
return -EINVAL;

if (IS_MNT_SHARED(dest_mnt)) {
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
set_mnt_shared(p);
}

spin_lock(&vfsmount_lock);
if (parent_nd) {
  detach_mnt(source_mnt, parent_nd);
  attach_mnt(source_mnt, nd);
  touch_mnt_namespace(current->nsproxy->mnt_ns);
} else {
  mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
  commit_tree(source_mnt);
}

list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
list_del_init(&child->mnt_hash);
commit_tree(child);
}
spin_unlock(&vfsmount_lock);
return 0;
}
上面解释的很清楚，就不罗索了。
呵呵，有点虎头蛇尾。

转载于:https://blog.51cto.com/inkfish/563234

weixin_34336292

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
文件系统挂载过程浅析（1）

===================================================作者：ietf AT doit.com.cn源文件来自于glibc-2.1.2，Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. 和 linux kernel 2.6.20请在GNU Lib...
复制链接

扫一扫