===================================================
作者:ietf AT doit.com.cn
源文件来自于glibc-2.1.2,Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc. 和 linux kernel 2.6.20
请在GNU Library General Public License下参考。
引用请注明出处。
===================================================

所有的注释由如下标记标识:
-------ietf add start-------
-------ietf add end  -------

一直想跟踪从敲入mount命令开始,命令的参数通过mount工具经由sys_mount系统调用进入内核最终实现文件系统挂载的全过程。可惜找了整整一天,竟然没有能找到mount这个命令的源代码是怎么实现,只好从系统调用开始了。

1、系统调用的头文件

在glibc-2.1.2的sysdeps/unix/sysv/linux/sys/mount.h中,定义了为实现挂载和卸载Linux文件系统的头文件,其内容如下:

/* Header file for mounting/unmount Linux filesystems.
   Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB.  If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA.  */

/* This is taken from /usr/include/linux/fs.h.  */
-------ietf add start-------
给出/usr/include/linux/fs.h中定义方式(2.6.20),以供参考
-------ietf add end  -------

#ifndef _SYS_MOUNT_H
#define _SYS_MOUNT_H 1

#include <features.h>
#include <sys/ioctl.h>

-------ietf add start-------
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
-------ietf add end  -------
#define BLOCK_SIZE 1024
#define BLOCK_SIZE_BITS 10


/* These are the fs-independent mount-flags: up to 16 flags are
   supported  */
-------ietf add start-------
#define MS_RDONLY  1 /* Mount read-only */
#define MS_NOSUID  2 /* Ignore suid and sgid bits */
#define MS_NODEV  4 /* Disallow access to device special files */
#define MS_NOEXEC  8 /* Disallow program execution */
#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
#define MS_NOATIME 1024 /* Do not update access times. */
#define MS_NODIRATIME 2048 /* Do not update directory access times */
-------ietf add end  -------
enum
{
  MS_RDONLY = 1,  /* Mount read-only.  */
#define MS_RDONLY MS_RDONLY
  MS_NOSUID = 2,  /* Ignore suid and sgid bits.  */
#define MS_NOSUID MS_NOSUID
  MS_NODEV = 4,   /* Disallow access to device special files.  */
#define MS_NODEV MS_NODEV
  MS_NOEXEC = 8,  /* Disallow program execution.  */
#define MS_NOEXEC MS_NOEXEC
  MS_SYNCHRONOUS = 16,  /* Writes are synced at once.  */
#define MS_SYNCHRONOUS MS_SYNCHRONOUS
  MS_REMOUNT = 32,  /* Alter flags of a mounted FS.  */
#define MS_REMOUNT MS_REMOUNT
  MS_MANDLOCK = 64,  /* Allow mandatory locks on an FS.  */
#define MS_MANDLOCK MS_MANDLOCK
  S_WRITE = 128,  /* Write on file/directory/symlink.  */
#define S_WRITE  S_WRITE
  S_APPEND = 256,  /* Append-only file.  */
#define S_APPEND S_APPEND
  S_IMMUTABLE = 512,  /* Immutable file.  */
#define S_IMMUTABLE S_IMMUTABLE
  MS_NOATIME = 1024,  /* Do not update access times.  */
#define MS_NOATIME MS_NOATIME
  MS_NODIRATIME = 2048  /* Do not update directory access times.  */
#define MS_NODIRATIME MS_NODIRATIME
};

-------ietf add start-------
#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK)
-------ietf add end  -------
/* Flags that can be altered by MS_REMOUNT  */
#define MS_RMT_MASK (MS_RDONLY | MS_MANDLOCK)


/* Magic mount flag number. Has to be or-ed to the flag values.  */
-------ietf add start-------
#define MS_MGC_VAL 0xC0ED0000
#define MS_MGC_MSK 0xffff0000
-------ietf add end  -------
#define MS_MGC_VAL 0xc0ed0000 /* Magic flag number to indicate "new" flags */
#define MS_MGC_MSK 0xffff0000 /* Magic flag number mask */


/* The read-only stuff doesn't really belong here, but any other place
   is probably as bad and I don't want to create yet another include
   file.  */
-------ietf add start-------
#define BLKROSET   _IO(0x12,93) /* set device read-only (0 = read-write) */
#define BLKROGET   _IO(0x12,94) /* get read-only status (0 = read_write) */
#define BLKRRPART  _IO(0x12,95) /* re-read partition table */
#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */
#define BLKFLSBUF  _IO(0x12,97) /* flush buffer cache */
#define BLKRASET   _IO(0x12,98) /* set read ahead for block device */
#define BLKRAGET   _IO(0x12,99) /* get current read ahead setting */
-------ietf add end  -------
#define BLKROSET   _IO(0x12, 93) /* Set device read-only (0 = read-write).  */
#define BLKROGET   _IO(0x12, 94) /* Get read-only status (0 = read_write).  */
#define BLKRRPART  _IO(0x12, 95) /* Re-read partition table.  */
#define BLKGETSIZE _IO(0x12, 96) /* Return device size.  */
#define BLKFLSBUF  _IO(0x12, 97) /* Flush buffer cache.  */
#define BLKRASET   _IO(0x12, 98) /* Set read ahead for block device.  */
#define BLKRAGET   _IO(0x12, 99) /* Get current read ahead setting.  */


/* Possible value for FLAGS parameter of `umount2'.  */
enum
{
  MNT_FORCE = 1   /* Force unmounting.  */
#define MNT_FORCE MNT_FORCE
};


__BEGIN_DECLS

/* Mount a filesystem.  */
extern int mount __P ((__const char *__special_file, __const char *__dir,
         __const char *__fstype, unsigned long int __rwflag,
         __const void *__data));

/* Unmount a filesystem.  */
extern int umount __P ((__const char *__special_file));

/* Unmount a filesystem.  Force unmounting if FLAGS is set to MNT_FORCE.  */
extern int umount2 __P ((__const char *__special_file, int __flags));

__END_DECLS

#endif /* _SYS_MOUNT_H */

该文件是从fs.h中摘取的一部分,不过新版的内核有了稍许的更改。

2、系统调用号

通过给定系统调用mount和umount,查找对应的系统调用表。在文件glibc-2.1.2的sysdeps/unix/sysv/linux/mips/sys/syscall.h中如下:
/*
 * Linux syscalls are in the range from 4000 to 4999
 * Hopefully these syscall numbers are unused ...  If not everyone using
 * statically linked binaries is pretty <censored - the government>.  You've
 * been warned.
 */
#define SYS_Linux   4000
...... ......
#define SYS_mount   (SYS_Linux +  21)
#define SYS_umount   (SYS_Linux +  22)
即分别为4021和4022。

3、系统调用在内核的声明

系统调用的函数声明在内核源码的src/include/linux/syscall.h中,具体如下:

asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name,
    char __user *type, unsigned long flags,
    void __user *data);
-------ietf add start-------
dev_name: 字符串类型,挂载的设备名,如,/dev/sda1
dir_name: 挂载点,如,/mnt/usb
type:  在/proc/filesystems中列出的内核所支持的文件系统格式,如,"ext2", "nfs", "proc"等
flags:  标志参数,参考1中列出的MS_XXXXX类型宏定义
data:  不同文件系统特定的参数表,通常以逗号分隔
返回的错误类型请参考专门的文档详细介绍。
-------ietf add end  -------

asmlinkage long sys_umount(char __user *name, int flags);
-------ietf add start-------
name:  同sys_mount中dir_name
flags:  同sys_mount中flags
-------ietf add end  -------

asmlinkage long sys_oldumount(char __user *name);
-------ietf add start-------
name:  同sys_mount中dir_name
-------ietf add end  -------
与1中的三个函数相对应。

4、系统调用的实现

具体实现在内核源马的src/fs/namespace.c中,下面就三个调用分别参考其流程。

4.1 sys_mount
sys_mount主要功能是将用户的命令行参数从用户空间传递到内核空间,并调用do_mount解析参数,完成mount过程。其实现源码如下:

asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
     char __user * type, unsigned long flags,
     void __user * data)
{
 int retval;
 unsigned long data_page;
 unsigned long type_page;
 unsigned long dev_page;
 char *dir_page;
-------ietf add start-------
将文件系统类型参数传入内核空间,在namespace.c中实现如下:
int copy_mount_options(const void __user * data, unsigned long *where)
{
 int i;
 unsigned long page;
 unsigned long size;

 *where = 0;
 if (!data)
  return 0;

 if (!(page = __get_free_page(GFP_KERNEL)))
  return -ENOMEM;

 /* We only care that *some* data at the address the user
  * gave us is valid.  Just in case, we'll zero
  * the remainder of the page.
  */
 /* copy_from_user cannot cross TASK_SIZE ! */
 size = TASK_SIZE - (unsigned long)data;
 if (size > PAGE_SIZE)
  size = PAGE_SIZE;

 i = size - exact_copy_from_user((void *)page, data, size);
 if (!i) {
  free_page(page);
  return -EFAULT;
 }
 if (i != PAGE_SIZE)
  memset((char *)page + i, 0, PAGE_SIZE - i);
 *where = page;
 return 0;
}
-------ietf add end  -------
 retval = copy_mount_options(type, &type_page);
 if (retval < 0)
  return retval;
-------ietf add start-------
将挂载点名传入内核空间,并在系统配置了audit属性时,对名字进行访问控制,其实现在namei.c中实现,如下:
char * getname(const char __user * filename)
{
 char *tmp, *result;

 result = ERR_PTR(-ENOMEM);
 tmp = __getname();
 if (tmp)  {
  int retval = do_getname(filename, tmp);

  result = tmp;
  if (retval < 0) {
   __putname(tmp);
   result = ERR_PTR(retval);
  }
 }
 audit_getname(result);
 return result;
}
-------ietf add end  -------
 dir_page = getname(dir_name);
 retval = PTR_ERR(dir_page);
 if (IS_ERR(dir_page))
  goto out1;
-------ietf add start-------
将设备名参数传入内核空间
-------ietf add end  -------
 retval = copy_mount_options(dev_name, &dev_page);
 if (retval < 0)
  goto out2;
-------ietf add start-------
将文件系统特定参数表传入内核空间
-------ietf add end  -------
 retval = copy_mount_options(data, &data_page);
 if (retval < 0)

  goto out3;

 lock_kernel();

-------ietf add start-------
调用do_mount,解析参数,实现挂载
-------ietf add end  -------
 retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
     flags, (void *)data_page);
 unlock_kernel();
 free_page(data_page);

out3:
 free_page(dev_page);
out2:
 putname(dir_page);
out1:
 free_page(type_page);
 return retval;
}

4.2 do_mount

do_mount根据sys_mount传递的内核空间的参数,学则对应的执行过程,在namespace.c中,其源码具体如下:

/*
 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
 *
 * data is a (void *) that can point to any structure up to
 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
 * information (or be NULL).
 *
 * Pre-0.97 versions of mount() didn't have a flags word.
 * When the flags word was introduced its top half was required
 * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
 * Therefore, if this magic number is present, it carries no information
 * and must be discarded.
 */
long do_mount(char *dev_name, char *dir_name, char *type_page,
    unsigned long flags, void *data_page)
{
 struct nameidata nd;
 int retval = 0;
 int mnt_flags = 0;

 /* Discard magic */
 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
  flags &= ~MS_MGC_MSK;

 /* Basic sanity checks */
-------ietf add start-------
如果挂载点或者设备名字符串为空,或者字符串过长,超过PAGE_SIZE大小,则返回失败
-------ietf add end  -------
 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
  return -EINVAL;
 if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
  return -EINVAL;
-------ietf add start-------
如果data_page超过PAGE_SIZE长度,将超出部分截断
-------ietf add end  -------
 if (data_page)
  ((char *)data_page)[PAGE_SIZE - 1] = 0;

-------ietf add start-------
根据用户参数,构建flags值
-------ietf add end  -------
 /* Separate the per-mountpoint flags */
 if (flags & MS_NOSUID)
  mnt_flags |= MNT_NOSUID;
 if (flags & MS_NODEV)
  mnt_flags |= MNT_NODEV;
 if (flags & MS_NOEXEC)
  mnt_flags |= MNT_NOEXEC;
 if (flags & MS_NOATIME)
  mnt_flags |= MNT_NOATIME;
 if (flags & MS_NODIRATIME)
  mnt_flags |= MNT_NODIRATIME;
 if (flags & MS_RELATIME)
  mnt_flags |= MNT_RELATIME;

 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
     MS_NOATIME | MS_NODIRATIME | MS_RELATIME);

-------ietf add start-------
根据dir_name查找挂载点路径,将该挂载点引用计数增1,将挂载点数据保存在nd中
方法调用了namei.c中的path_lookup,再通过path_lookup调用do_path_lookup实现,源码如下:
int fastcall path_lookup(const char *name, unsigned int flags,
   struct nameidata *nd)
{
 return do_path_lookup(AT_FDCWD, name, flags, nd);
}

/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
static int fastcall do_path_lookup(int dfd, const char *name,
    unsigned int flags, struct nameidata *nd)
{
 int retval = 0;
 int fput_needed;
 struct file *file;
 struct fs_struct *fs = current->fs;

 nd->last_type = LAST_ROOT; /* if there are only slashes... */
 nd->flags = flags;
 nd->depth = 0;

 if (*name=='/') {
  read_lock(&fs->lock);
  if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
   nd->mnt = mntget(fs->altrootmnt);
   nd->dentry = dget(fs->altroot);
   read_unlock(&fs->lock);
   if (__emul_lookup_dentry(name,nd))
    goto out; /* found in altroot */
   read_lock(&fs->lock);
  }
  nd->mnt = mntget(fs->rootmnt);
  nd->dentry = dget(fs->root);
  read_unlock(&fs->lock);
 } else if (dfd == AT_FDCWD) {
  read_lock(&fs->lock);
  nd->mnt = mntget(fs->pwdmnt);
  nd->dentry = dget(fs->pwd);
  read_unlock(&fs->lock);
 } else {
  struct dentry *dentry;

  file = fget_light(dfd, &fput_needed);
  retval = -EBADF;
  if (!file)
   goto out_fail;

  dentry = file->f_path.dentry;

  retval = -ENOTDIR;
  if (!S_ISDIR(dentry->d_inode->i_mode))
   goto fput_fail;

  retval = file_permission(file, MAY_EXEC);
  if (retval)
   goto fput_fail;

  nd->mnt = mntget(file->f_path.mnt);
  nd->dentry = dget(dentry);

  fput_light(file, fput_needed);
 }
 current->total_link_count = 0;
 retval = link_path_walk(name, nd);
out:
 if (likely(retval == 0)) {
  if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
    nd->dentry->d_inode))
  audit_inode(name, nd->dentry->d_inode);
 }
out_fail:
 return retval;

fput_fail:
 fput_light(file, fput_needed);
 goto out_fail;
}

持续跟踪该操作,会进入很深层次。它先查看该路径是否在dcache中,如果不在,将重新从硬盘上将该路径加入dcache。
-------ietf add end  -------
 /* ... and get the mountpoint */
 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
 if (retval)
  return retval;

-------ietf add start-------
如果没有配置安全属性,该函数为空;否则使用系统初始化时设定的安全框架函数。这一部分需要进一步分析
-------ietf add end  -------
 retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
 if (retval)
  goto dput_out;
 if (flags & MS_REMOUNT)
-------ietf add start-------
根据给定的路径,将设备重新挂载到新的路径点。该操做中,内核先将该sb对应的dcache数据缩减,以减小remount时的开销,并将所有的dirty inode写回硬盘,最后调用该文件系统sb中的remount_fs方法。
-------ietf add end  -------
  retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
        data_page);
 else if (flags & MS_BIND)
-------ietf add start-------
太复杂,还没深入看,其中分两种情况,一种copy方式,一种clone方式。
-------ietf add end  -------
  retval = do_loopback(&nd, dev_name, flags & MS_REC);
 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
-------ietf add start-------
更改mount参数类型
-------ietf add end  -------
  retval = do_change_type(&nd, flags);
 else if (flags & MS_MOVE)
-------ietf add start-------
卸载挂载点
-------ietf add end  -------
  retval = do_move_mount(&nd, dev_name);
 else
-------ietf add start-------
这里是初次挂载一个新设备时的入口,也是俺所感兴趣的部分。
该函数分别调用了do_kern_mount()和do_add_mount()两个函数,代码如下:
/*
 * create a new mount for userspace and request it to be added into the
 * namespace's tree
 */
static int do_new_mount(struct nameidata *nd, char *type, int flags,
   int mnt_flags, char *name, void *data)
{
 struct vfsmount *mnt;

 if (!type || !memchr(type, 0, PAGE_SIZE))
  return -EINVAL;

 /* we need capabilities... */
 if (!capable(CAP_SYS_ADMIN))
  return -EPERM;
-------ietf add start-------
分配挂载点空间,具体见4.3
-------ietf add end  -------
 mnt = do_kern_mount(type, flags, name, data);
 if (IS_ERR(mnt))
  return PTR_ERR(mnt);
-------ietf add start-------
见下
-------ietf add end  -------
 return do_add_mount(mnt, nd, mnt_flags, NULL);
}
-------ietf add end  -------

  retval = do_new_mount(&nd, type_page, flags, mnt_flags,
          dev_name, data_page);
dput_out:
 path_release(&nd);
 return retval;
}

4.3 do_kern_mount & vfs_kern_mount

这两个程序都在/src/fs/super.c中实现

struct vfsmount *
do_kern_mount(const char *fstype, int flags, const char *name, void *data)
{
-------ietf add start-------
根据文件系统类型名载文件系统链表中查找对应的模块,如果模块不在内核中,则尝试加载模块
-------ietf add end  -------
 struct file_system_type *type = get_fs_type(fstype);
 struct vfsmount *mnt;
 if (!type)
  return ERR_PTR(-ENODEV);
 mnt = vfs_kern_mount(type, flags, name, data);
 put_filesystem(type);
 return mnt;
}

struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
 struct vfsmount *mnt;
 char *secdata = NULL;
 int error;

 if (!type)
  return ERR_PTR(-ENODEV);

 error = -ENOMEM;
-------ietf add start-------
为设备分配挂载点数据结构,设置引用计数,初始化各参数,并将mnt_devname域赋值为name(即dev_name),
-------ietf add end  -------
 mnt = alloc_vfsmnt(name);
 if (!mnt)
  goto out;

-------ietf add start-------
传入的data参数不为空,又是和security框架相关的操作
-------ietf add end  -------
 if (data) {
  secdata = alloc_secdata();
  if (!secdata)
   goto out_mnt;

  error = security_sb_copy_data(type, data, secdata);
  if (error)
   goto out_free_secdata;
 }

-------ietf add start-------
调用被挂载设备上的文件系统定义的get_sb()模块,判断superblock中指定的块设备是否包含该方法支持的文件系统。执行成功,就返回指向superblock的指针,出错则返回NULL。具体功能实现在分析特定文件系统时详细介绍。
-------ietf add end  -------
 error = type->get_sb(type, flags, name, data, mnt);
 if (error < 0)
  goto out_free_secdata;

  error = security_sb_kern_mount(mnt->mnt_sb, secdata);
  if (error)
   goto out_sb;

 mnt->mnt_mountpoint = mnt->mnt_root;
 mnt->mnt_parent = mnt;
 up_write(&mnt->mnt_sb->s_umount);
 free_secdata(secdata);
 return mnt;
out_sb:
 dput(mnt->mnt_root);
 up_write(&mnt->mnt_sb->s_umount);
 deactivate_super(mnt->mnt_sb);
out_free_secdata:
 free_secdata(secdata);
out_mnt:
 free_vfsmnt(mnt);
out:
 return ERR_PTR(error);
}

4.4 do_add_mount

既然已经为新的设备分配挂载点,就该把该挂载点加入到系统的文件系统中来了。

/*
 * add a mount into a namespace's mount tree
 * - provide the option of adding the new mount to an expiration list
 */
int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
   int mnt_flags, struct list_head *fslist)
{
 int err;
-------ietf add start-------
先关锁
-------ietf add end  -------

 down_write(&namespace_sem);
 /* Something was mounted here while we slept */
 while (d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
  ;
 err = -EINVAL;
 if (!check_mnt(nd->mnt))
  goto unlock;

 /* Refuse the same filesystem on the same mount point */
-------ietf add start-------
相同的设备,相同的挂载点,不必再挂
-------ietf add end  -------
 err = -EBUSY;
 if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
     nd->mnt->mnt_root == nd->dentry)
  goto unlock;

 err = -EINVAL;
-------ietf add start-------
挂载在link上,跳过
-------ietf add end  -------
 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
  goto unlock;

 newmnt->mnt_flags = mnt_flags;
-------ietf add start-------
挂载的操作,其中调用了attach_recursive_mnt(),实现挂载,具体见4.5
-------ietf add end  -------
 if ((err = graft_tree(newmnt, nd)))
  goto unlock;
-------ietf add start-------
第一次加载时为空,忽略该操作
-------ietf add end  -------
 if (fslist) {
  /* add to the specified expiration list */
  spin_lock(&vfsmount_lock);
  list_add_tail(&newmnt->mnt_expire, fslist);
  spin_unlock(&vfsmount_lock);
 }
 up_write(&namespace_sem);
 return 0;

unlock:
 up_write(&namespace_sem);
 mntput(newmnt);
 return err;
}

4.5 attach_recursive_mnt

这是目前看到的注释最长的一个函数,瞻仰一下:
/*
 *  @source_mnt : mount tree to be attached
 *  @nd         : place the mount tree @source_mnt is attached
 *  @parent_nd  : if non-null, detach the source_mnt from its parent and
 *       store the parent mount and mountpoint dentry.
 *       (done when source_mnt is moved)
 *
 *  NOTE: in the table below explains the semantics when a source mount
 *  of a given type is attached to a destination mount of a given type.
 * ---------------------------------------------------------------------------
 * |         BIND MOUNT OPERATION                                            |
 * |**************************************************************************
 * | source-->| shared        |       private  |       slave    | unbindable |
 * | dest     |               |                |                |            |
 * |   |      |               |                |                |            |
 * |   v      |               |                |                |            |
 * |**************************************************************************
 * |  shared  | shared (++)   |     shared (+) |     shared(+++)|  invalid   |
 * |          |               |                |                |            |
 * |non-shared| shared (+)    |      private   |      slave (*) |  invalid   |
 * ***************************************************************************
 * A bind operation clones the source mount and mounts the clone on the
 * destination mount.
 *
 * (++)  the cloned mount is propagated to all the mounts in the propagation
 *   tree of the destination mount and the cloned mount is added to
 *   the peer group of the source mount.
 * (+)   the cloned mount is created under the destination mount and is marked
 *       as shared. The cloned mount is added to the peer group of the source
 *       mount.
 * (+++) the mount is propagated to all the mounts in the propagation tree
 *       of the destination mount and the cloned mount is made slave
 *       of the same master as that of the source mount. The cloned mount
 *       is marked as 'shared and slave'.
 * (*)   the cloned mount is made a slave of the same master as that of the
 *   source mount.
 *
 * ---------------------------------------------------------------------------
 * |           MOVE MOUNT OPERATION                                 |
 * |**************************************************************************
 * | source-->| shared        |       private  |       slave    | unbindable |
 * | dest     |               |                |                |            |
 * |   |      |               |                |                |            |
 * |   v      |               |                |                |            |
 * |**************************************************************************
 * |  shared  | shared (+)    |     shared (+) |    shared(+++) |  invalid   |
 * |          |               |                |                |            |
 * |non-shared| shared (+*)   |      private   |    slave (*)   | unbindable |
 * ***************************************************************************
 *
 * (+)  the mount is moved to the destination. And is then propagated to
 *  all the mounts in the propagation tree of the destination mount.
 * (+*)  the mount is moved to the destination.
 * (+++)  the mount is moved to the destination and is then propagated to
 *  all the mounts belonging to the destination mount's propagation tree.
 *  the mount is marked as 'shared and slave'.
 * (*) the mount continues to be a slave at the new location.
 *
 * if the source mount is a tree, the operations explained above is
 * applied to each mount in the tree.
 * Must be called without spinlocks held, since this function can sleep
 * in allocations.
 */
static int attach_recursive_mnt(struct vfsmount *source_mnt,
   struct nameidata *nd, struct nameidata *parent_nd)
{
 LIST_HEAD(tree_list);
 struct vfsmount *dest_mnt = nd->mnt;
 struct dentry *dest_dentry = nd->dentry;
 struct vfsmount *child, *p;

 if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
  return -EINVAL;

 if (IS_MNT_SHARED(dest_mnt)) {
  for (p = source_mnt; p; p = next_mnt(p, source_mnt))
   set_mnt_shared(p);
 }

 spin_lock(&vfsmount_lock);
 if (parent_nd) {
  detach_mnt(source_mnt, parent_nd);
  attach_mnt(source_mnt, nd);
  touch_mnt_namespace(current->nsproxy->mnt_ns);
 } else {
  mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
  commit_tree(source_mnt);
 }

 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
  list_del_init(&child->mnt_hash);
  commit_tree(child);
 }
 spin_unlock(&vfsmount_lock);
 return 0;
}
上面解释的很清楚,就不罗索了。
呵呵,有点虎头蛇尾。