mount命令常用于挂载文件系统,它调用的内核函数是sys_mount,sys_mount主要功能是将用户的命令行参数从用户空间传递到内核空间,并调用do_mount解析参数,完成mount过程。其实现源码如下:
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
char __user *, type, unsigned long, flags, void __user *, data)
{
int ret;
char *kernel_type;
char *kernel_dir;
char *kernel_dev;
unsigned long data_page;
ret = copy_mount_string(type, &kernel_type);//拷贝用户参数
if (ret < 0)
goto out_type;
kernel_dir = getname(dir_name);
if (IS_ERR(kernel_dir)) {
ret = PTR_ERR(kernel_dir);
goto out_dir;
}
ret = copy_mount_string(dev_name, &kernel_dev);
if (ret < 0)
goto out_dev;
ret = copy_mount_options(data, &data_page);
if (ret < 0)
goto out_data;
ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,//实际执行挂载操作
(void *) data_page);
free_page(data_page);
out_data:
kfree(kernel_dev);
out_dev:
putname(kernel_dir);
out_dir:
kfree(kernel_type);
out_type:
return ret;
}
这里主要调用了do_mount来执行具体的挂载操作
/*
* Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
* be given to the mount() call (ie: read-only, no-dev, no-suid etc).
*
* data is a (void *) that can point to any structure up to
* PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
* information (or be NULL).
*
* Pre-0.97 versions of mount() didn't have a flags word.
* When the flags word was introduced its top half was required
* to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
* Therefore, if this magic number is present, it carries no information
* and must be discarded.
*/
long do_mount(char *dev_name, char *dir_name, char *type_page,
unsigned long flags, void *data_page)
{
struct path path;
int retval = 0;
int mnt_flags = 0;
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;
/* Basic sanity checks */
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
return -EINVAL;
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
/* ... and get the mountpoint */
retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);//查找挂载到目录的path对象?
if (retval)
return retval;
retval = security_sb_mount(dev_name, &path,//安全检查
type_page, flags, data_page);
if (retval)
goto dput_out;
/* Default to relatime unless overriden */
if (!(flags & MS_NOATIME))//参数设置
mnt_flags |= MNT_RELATIME;
/* Separate the per-mountpoint flags */
if (flags & MS_NOSUID)
mnt_flags |= MNT_NOSUID;
if (flags & MS_NODEV)
mnt_flags |= MNT_NODEV;
if (flags & MS_NOEXEC)
mnt_flags |= MNT_NOEXEC;
if (flags & MS_NOATIME)
mnt_flags |= MNT_NOATIME;
if (flags & MS_NODIRATIME)
mnt_flags |= MNT_NODIRATIME;
if (flags & MS_STRICTATIME)
mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
MS_STRICTATIME);
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
data_page);
else if (flags & MS_BIND)
retval = do_loopback(&path, dev_name, flags & MS_REC);
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&path, flags);
else if (flags & MS_MOVE)
retval = do_move_mount(&path, dev_name);
else
retval = do_new_mount(&path, type_page, flags, mnt_flags,//一般是执行一个新的挂载操作
dev_name, data_page);
dput_out:
path_put(&path);
return retval;
}
在do_mount执行一些参数的检查,以及挂载标志的设置, 函数会根据调用参数 flags 来决定调用以下四个函数之一:do_remount()、 do_loopback()、do_move_mount()、do_add_mount()。这里调用do_new_mount来执行一新的挂载操作:
/*
* create a new mount for userspace and request it to be added into the
* namespace's tree
*/
static int do_new_mount(struct path *path, char *type, int flags,
int mnt_flags, char *name, void *data)
{
struct vfsmount *mnt;
if (!type)
return -EINVAL;
/* we need capabilities... */
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
lock_kernel();
mnt = do_kern_mount(type, flags, name, data);//执行真正的挂载操作
unlock_kernel();
if (IS_ERR(mnt))
return PTR_ERR(mnt);
return do_add_mount(mnt, path, mnt_flags, NULL);//添加到挂载树
}
struct vfsmount *
do_kern_mount(const char *fstype, int flags, const char *name, void *data)
{
struct file_system_type *type = get_fs_type(fstype);
struct vfsmount *mnt;
if (!type)
return ERR_PTR(-ENODEV);
mnt = vfs_kern_mount(type, flags, name, data);/*初始化vfsmount结构,并对其进行赋值*/
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
!mnt->mnt_sb->s_subtype)
mnt = fs_set_subtype(mnt, fstype);//赋值文件系统类型
put_filesystem(type);
return mnt;
}
do_kern_mount()处理实际的安装操作并返回一个新安装文件系统描述符的地址, 这里定义了一脸上vfsmount指针,vfsmount结构描述的是一个独立文件系统的挂载信息,每个不同挂载点对应一个独立的vfsmount结构,属于同一文件系统的所有目录和文件隶属于同一个vfsmount,该vfsmount结构对应于该文件系统顶层目录,即挂载目录。
主要工作有二:一是获得一个新的安装区域块,二是将该新的安装区域块加入了安装系统链表
看一下vfs_kern_mount:
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct vfsmount *mnt;
char *secdata = NULL;
int error;
if (!type)
return ERR_PTR(-ENODEV);
error = -ENOMEM;
mnt = alloc_vfsmnt(name);//申请了一块该类型的内存空间(struct vfsmount *mnt),并初始化其部分成员变量
if (!mnt)
goto out;
if (flags & MS_KERNMOUNT)
mnt->mnt_flags = MNT_INTERNAL;
if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {//文件系统使用的二进制安装数据
secdata = alloc_secdata();
if (!secdata)
goto out_mnt;
error = security_sb_copy_data(data, secdata);
if (error)
goto out_free_secdata;
}
error = type->get_sb(type, flags, name, data, mnt);/*调用文件系统控制结构体的get_sb()*/
if (error < 0)
goto out_free_secdata;
BUG_ON(!mnt->mnt_sb);
WARN_ON(!mnt->mnt_sb->s_bdi);
mnt->mnt_sb->s_flags |= MS_BORN;
error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
if (error)
goto out_sb;
/*
* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
* but s_maxbytes was an unsigned long long for many releases. Throw
* this warning for a little while to try and catch filesystems that
* violate this rule. This warning should be either removed or
* converted to a BUG() in 2.6.34.
*/
WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
"negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
mnt->mnt_mountpoint = mnt->mnt_root;//设置挂载点的目录项
mnt->mnt_parent = mnt;// 把新的 vfsmount 结构赋给自身的 parent 这样可以 通过 parent 遍历出所有 mount 的文件系统
up_write(&mnt->mnt_sb->s_umount);
free_secdata(secdata);
return mnt;
out_sb:
dput(mnt->mnt_root);
deactivate_locked_super(mnt->mnt_sb);
out_free_secdata:
free_secdata(secdata);
out_mnt:
free_vfsmnt(mnt);
out:
return ERR_PTR(error);
}
get_sb函数主要是获取文件系统的超级块对象
回到do_new_mount函数,接着调用do_add_mount添加到命令空间挂载树上去
/*
* add a mount into a namespace's mount tree
* - provide the option of adding the new mount to an expiration list
*/
int do_add_mount(struct vfsmount *newmnt, struct path *path,
int mnt_flags, struct list_head *fslist)
{
int err;
mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
down_write(&namespace_sem);
/* Something was mounted here while we slept */
while (d_mountpoint(path->dentry) &&
follow_down(path))
;
err = -EINVAL;
if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
goto unlock;
/* Refuse the same filesystem on the same mount point */
err = -EBUSY;
if (path->mnt->mnt_sb == newmnt->mnt_sb &&
path->mnt->mnt_root == path->dentry)
goto unlock;
err = -EINVAL;
if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
goto unlock;
newmnt->mnt_flags = mnt_flags;
if ((err = graft_tree(newmnt, path)))//主要是这个函数
goto unlock;
if (fslist) /* add to the specified expiration list */
list_add_tail(&newmnt->mnt_expire, fslist);
up_write(&namespace_sem);
return 0;
unlock:
up_write(&namespace_sem);
mntput(newmnt);
return err;
}
这里主要调用 graft_tree()把vfsmnt结构加入到安装系统链表中,同时 graft_tree() 还要将新分配的 struct vfsmount 类型的变量加入到一个hash表中
static int graft_tree(struct vfsmount *mnt, struct path *path)
{
int err;
if (mnt->mnt_sb->s_flags & MS_NOUSER)
return -EINVAL;
if (S_ISDIR(path->dentry->d_inode->i_mode) !=
S_ISDIR(mnt->mnt_root->d_inode->i_mode))
return -ENOTDIR;
err = -ENOENT;
mutex_lock(&path->dentry->d_inode->i_mutex);
if (cant_mount(path->dentry))
goto out_unlock;
if (!d_unlinked(path->dentry))
err = attach_recursive_mnt(mnt, path, NULL);//
out_unlock:
mutex_unlock(&path->dentry->d_inode->i_mutex);
return err;
}
这里主要调用attach_recursive_mnt
/*
* @source_mnt : mount tree to be attached
* @nd : place the mount tree @source_mnt is attached
* @parent_nd : if non-null, detach the source_mnt from its parent and
* store the parent mount and mountpoint dentry.
* (done when source_mnt is moved)
*
* NOTE: in the table below explains the semantics when a source mount
* of a given type is attached to a destination mount of a given type.
* ---------------------------------------------------------------------------
* | BIND MOUNT OPERATION |
* |**************************************************************************
* | source-->| shared | private | slave | unbindable |
* | dest | | | | |
* | | | | | | |
* | v | | | | |
* |**************************************************************************
* | shared | shared (++) | shared (+) | shared(+++)| invalid |
* | | | | | |
* |non-shared| shared (+) | private | slave (*) | invalid |
* ***************************************************************************
* A bind operation clones the source mount and mounts the clone on the
* destination mount.
*
* (++) the cloned mount is propagated to all the mounts in the propagation
* tree of the destination mount and the cloned mount is added to
* the peer group of the source mount.
* (+) the cloned mount is created under the destination mount and is marked
* as shared. The cloned mount is added to the peer group of the source
* mount.
* (+++) the mount is propagated to all the mounts in the propagation tree
* of the destination mount and the cloned mount is made slave
* of the same master as that of the source mount. The cloned mount
* is marked as 'shared and slave'.
* (*) the cloned mount is made a slave of the same master as that of the
* source mount.
*
* ---------------------------------------------------------------------------
* | MOVE MOUNT OPERATION |
* |**************************************************************************
* | source-->| shared | private | slave | unbindable |
* | dest | | | | |
* | | | | | | |
* | v | | | | |
* |**************************************************************************
* | shared | shared (+) | shared (+) | shared(+++) | invalid |
* | | | | | |
* |non-shared| shared (+*) | private | slave (*) | unbindable |
* ***************************************************************************
*
* (+) the mount is moved to the destination. And is then propagated to
* all the mounts in the propagation tree of the destination mount.
* (+*) the mount is moved to the destination.
* (+++) the mount is moved to the destination and is then propagated to
* all the mounts belonging to the destination mount's propagation tree.
* the mount is marked as 'shared and slave'.
* (*) the mount continues to be a slave at the new location.
*
* if the source mount is a tree, the operations explained above is
* applied to each mount in the tree.
* Must be called without spinlocks held, since this function can sleep
* in allocations.
*/
/*将文件系统添加到父文件系统的命名空间中*/
static int attach_recursive_mnt(struct vfsmount *source_mnt,
struct path *path, struct path *parent_path)
{
LIST_HEAD(tree_list);
struct vfsmount *dest_mnt = path->mnt;
struct dentry *dest_dentry = path->dentry;
struct vfsmount *child, *p;
int err;
if (IS_MNT_SHARED(dest_mnt)) {
err = invent_group_ids(source_mnt, true);
if (err)
goto out;
}
err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
if (err)
goto out_cleanup_ids;
spin_lock(&vfsmount_lock);
if (IS_MNT_SHARED(dest_mnt)) {
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
set_mnt_shared(p);
}
if (parent_path) {
detach_mnt(source_mnt, parent_path);
attach_mnt(source_mnt, path);
touch_mnt_namespace(parent_path->mnt->mnt_ns);
} else {
/*确保新的vfsmount实例的mnt_parent成员指向父文件系统
的vfsmount实例,而mnt_mountpoint成员指向装载点在父文件
系统中的dentry实例*/
mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
commit_tree(source_mnt);
}
list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
list_del_init(&child->mnt_hash);
/*将新的mnt添加到全局散列表以及父文件系统mnt实例中
的子文件系统链表*/
commit_tree(child);
}
spin_unlock(&vfsmount_lock);
return 0;
out_cleanup_ids:
if (IS_MNT_SHARED(dest_mnt))
cleanup_group_ids(source_mnt, NULL);
out:
return err;
}
这里看一下mnt_set_mountpoint
void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
struct vfsmount *child_mnt)
{
child_mnt->mnt_parent = mntget(mnt);
child_mnt->mnt_mountpoint = dget(dentry);
dentry->d_mounted++;
}
这里关系到我们在进行路径名查找了,参考一下 http://blog.csdn.net/new_abc/article/details/7688880。在文章的最后,如果找到的某个path是安装点就会找到其最近一次被安装文件系统的指针。当找到该指针后,便将 nd 中的 mnt 成员换成该安装区域块指针,同时将 nd 中的 dentry 成员换成安装区域块中的 dentry 指针。
再来看一 下commit_tree
static void commit_tree(struct vfsmount *mnt)
{
struct vfsmount *parent = mnt->mnt_parent;
struct vfsmount *m;
LIST_HEAD(head);
struct mnt_namespace *n = parent->mnt_ns;
BUG_ON(parent == mnt);
list_add_tail(&head, &mnt->mnt_list);
list_for_each_entry(m, &head, mnt_list)
m->mnt_ns = n;
list_splice(&head, n->list.prev);
list_add_tail(&mnt->mnt_hash, mount_hashtable +
hash(parent, mnt->mnt_mountpoint));//添加到父节点链表末
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);//添加到父节点mnt_mounts链表末
touch_mnt_namespace(n);
}
这里会把它添加到一个全局的mount_hashtable,这里的插入点是通过hash(parent, mnt->mnt_mountpoint),即挂载目录先前的vfsmount结构和dentry结构
到这里,文件系统就已经挂载完成了,当下次再次访问到这个挂载点时,就会使用这个新的安装的文件系统指针。
参考:
http://www.ibm.com/developerworks/cn/linux/l-vfs/