http://www.deepin.org/thread-1140687-1-1.html
linux文件系统之路径查找与文件系统的挂载 |
为了避免过入无限的递归,linux内核对递归层次做出了限制.每次嵌套一次,current-> link_count加1.每次退出嵌套时,current->link_count减1.当current->link_count达到MAX_NESTED_LINKS时,退出递归.
另外,为了避免恶意用户设备大量的符号链接,linux内核对符号链接的总数也进行限制.current->total_link_count存放了遇到的符号链接的数目,如果这个数目超过40.也会停止解析.
解析一个符号链接,首先要读取这个符号链接所指向的对象,这是由inode->i_op-> follow_link()完成的.
读取到链接对象之会,会调用__vfs_follow_link().转入看下这个操作.
static inline int __vfs_follow_link(struct nameidata *nd, const char *link)
{
int res = 0;
char *name;
if (IS_ERR(link))
goto fail;
//所以是以'/'开头的绝对路径.则重新设置nd的dentry与vfsmount
if (*link == '/') {
path_release(nd);
if (!walk_init_root(link, nd))
/* weird __emul_prefix() stuff did it */
goto out;
}
//调用link_path_walk解析这个路径.这就是一个递归的过程了
res = link_path_walk(link, nd);
out:
if (nd->depth || res || nd->last_type!=LAST_NORM)
return res;
/*
* If it is an iterative symlinks resolution in open_namei() we
* have to copy the last component. And all that crap because of
* bloody create() on broken symlinks. Furrfu...
*/
name = __getname();
if (unlikely(!name)) {
path_release(nd);
return -ENOMEM;
}
strcpy(name, nd->last.name);
nd->last.name = name;
return 0;
fail:
path_release(nd);
return PTR_ERR(link);
}
同理,在上述代码中遇到的依赖于特定文件系统的操作,我们先把它搁开. *^_^*
三:文件系统的挂载
在实际应用中,通常会特定格式的硬件设备挂载到linux里,使linux能够对其进行读写.其实,在linux中,文件系统也包含有虚拟文件系统,这些文件系统一般都是存放在RAM中的。例如:ramfs , sysfs等.
文件系统会挂载到一个特定的点上,那这个点又是怎么生成的呢?我们先从系统初始化时,总根的挂载说起.
3.1:根文件系统的挂载
内核启动到start_kernel()会调用mnt_init().从这个函数说起:
void __init mnt_init(unsigned long mempages)
{
struct list_head *d;
unsigned int nr_hash;
int i;
int err;
init_rwsem(&namespace_sem);
//创建vfsmount的cache
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
//vfsmount hash数组,一个页面大小
mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
if (!mount_hashtable)
panic("Failed to allocate mount hash table\n");
/*
* Find the power-of-two list-heads that can fit into the allocation..
* We don't guarantee that "sizeof(struct list_head)" is necessarily
* a power-of-two.
*/
//哈希数组的项数
nr_hash = PAGE_SIZE / sizeof(struct list_head);
//计算项数总共有多少位
hash_bits = 0;
do {
hash_bits++;
} while ((nr_hash >> hash_bits) != 0);
hash_bits--;
/*
* Re-calculate the actual number of entries and the mask
* from the number of bits we can fit.
*/
//将哈希表大小向上取整
nr_hash = 1UL << hash_bits;
//哈希表数组的掩码
hash_mask = nr_hash - 1;
printk("Mount-cache hash table entries: %d\n", nr_hash);
/* And initialize the newly allocated array */
//hash数组初始化
d = mount_hashtable;
i = nr_hash;
do {
INIT_LIST_HEAD(d);
d++;
i--;
} while (i);
//sysfs初始化
err = sysfs_init();
if (err)
printk(KERN_WARNING "%s: sysfs_init error: %d\n",
__FUNCTION__, err);
err = subsystem_register(&fs_subsys);
if (err)
printk(KERN_WARNING "%s: subsystem_register error: %d\n",
__FUNCTION__, err);
//初始化rootfs
init_rootfs();
//初始化挂载树
init_mount_tree();
}
转到init_rootf()中:
int __init init_rootfs(void)
{
return register_filesystem(&rootfs_fs_type);
}
其实它是注册了rootfs的文件系统。转进去看下代码:
int register_filesystem(struct file_system_type * fs)
{
int res = 0;
struct file_system_type ** p;
BUG_ON(strchr(fs->name, '.'));
if (fs->next)
return -EBUSY;
//初始化fs->fs_supers
INIT_LIST_HEAD(&fs->fs_supers);
write_lock(&file_systems_lock);
//从注册的文件系统中搜索待注册的fs
p = find_filesystem(fs->name, strlen(fs->name));
//如果存在,返回错误
if (*p)
res = -EBUSY;
else
//如果不存在,则加到它的后面
*p = fs;
write_unlock(&file_systems_lock);
return res;
}
其中find_filesystem()的代码如下:
static struct file_system_type **find_filesystem(const char *name, unsigned len)
{
//遍历链表file_systems
struct file_system_type **p;
for (p=&file_systems; *p; p=&(*p)->next)
//链表中信息的匹配
if (strlen((*p)->name) == len &&
strncmp((*p)->name, name, len) == 0)
break;
//如果找到了相关的信息,则返回它在链表中的结点位置,否则。返会链表末尾位置
return p;
}
综合上面所说的,init_rootfs()仅是将rootfs_fs_type挂到了file_systems链表上.
init_mount_tree()的代码如下:
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
struct mnt_namespace *ns;
//挂载rootfs文件系统
mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
if (IS_ERR(mnt))
panic("Can't create rootfs");
ns = kmalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
panic("Can't allocate initial namespace");
atomic_set(&ns->count, 1);
INIT_LIST_HEAD(&ns->list);
init_waitqueue_head(&ns->poll);
ns->event = 0;
list_add(&mnt->mnt_list, &ns->list);
ns->root = mnt;
mnt->mnt_ns = ns;
init_task.nsproxy->mnt_ns = ns;
get_mnt_ns(ns);
//设置当前目录和root目录都是文件系统的根目录
set_fs_pwd(current->fs, ns->root, ns->root->mnt_root);
set_fs_root(current->fs, ns->root, ns->root->mnt_root);
}
注意在这里的cuuuent进程是init进程。后续的进程全是它的子进程,也就是说后面进程继承了它的当前目录与root目录信息.
Rootfs的挂载是在do_kern_mount()中完成的。它的代码如下:
struct vfsmount *
do_kern_mount(const char *fstype, int flags, const char *name, void *data)
{
//取得相应的文件类型
struct file_system_type *type = get_fs_type(fstype);
struct vfsmount *mnt;
if (!type)
return ERR_PTR(-ENODEV);
//具体的挂载过程
mnt = vfs_kern_mount(type, flags, name, data);
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
!mnt->mnt_sb->s_subtype)
mnt = fs_set_subtype(mnt, fstype);
//减少文件系统的引用计数
put_filesystem(type);
return mnt;
}
vfs_kern_mount()的代码如下:
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct vfsmount *mnt;
char *secdata = NULL;
int error;
if (!type)
return ERR_PTR(-ENODEV);
error = -ENOMEM;
//分配vfsmount 并使mnt->mnt_devname = name
mnt = alloc_vfsmnt(name);
if (!mnt)
goto out;
if (data) {
secdata = alloc_secdata();
if (!secdata)
goto out_mnt;
error = security_sb_copy_data(type, data, secdata);
if (error)
goto out_free_secdata;
}
//调用type->get_sb()创建相应的super_block.挂载点的dentry.inode之类的信息
error = type->get_sb(type, flags, name, data, mnt);
if (error < 0)
goto out_free_secdata;
BUG_ON(!mnt->mnt_sb);
error = security_sb_kern_mount(mnt->mnt_sb, secdata);
if (error)
goto out_sb;
//将挂载点置位文件系统根目录
mnt->mnt_mountpoint = mnt->mnt_root;
//将父文件系统置为其本身
mnt->mnt_parent = mnt;
up_write(&mnt->mnt_sb->s_umount);
free_secdata(secdata);
return mnt;
out_sb:
dput(mnt->mnt_root);
up_write(&mnt->mnt_sb->s_umount);
deactivate_super(mnt->mnt_sb);
out_free_secdata:
free_secdata(secdata);
out_mnt:
free_vfsmnt(mnt);
out:
return ERR_PTR(error);
}
在上面会调用type->get_sb()来填充一些关链的信息。Rootfs的file_system_type定义如下:
static struct file_system_type rootfs_fs_type = {
.name = "rootfs",
.get_sb = rootfs_get_sb,
.kill_sb = kill_litter_super,
}
Rootfs_get_sb()代码如下:
static int rootfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
mnt);
}
内核为get_sb提供了一个统一的初始化接口,具体有关文件系统的消息填充放在一个回调函数里。在内核中get_sb()有以下几个API:
get_sb_bdev():挂载一个基于块设备的文件系统
get_sb_nodev():挂载一个不存在于磁盘的文件系统
get_sb_single():挂载一个与其它挂载共享的文件系统
get_sb_nodev()的代码如下:
struct super_block *get_sb_nodev(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int))
{
int error;
//分配并初始化一个super_block,并分配一个虚拟设备号
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
if (IS_ERR(s))
return s;
s->s_flags = flags;
//调用回调函数填充具体的信息
error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
if (error) {
up_write(&s->s_umount);
deactivate_super(s);
return ERR_PTR(error);
}
//置MS_ACTIVE标志
s->s_flags |= MS_ACTIVE;
return s;
}
Rootfs调用get_sb_nodev()所有的回调函数为ramfs_fill_super().代码如下:
static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
{
struct inode * inode;
struct dentry * root;
//初始化super_block
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = RAMFS_MAGIC;
sb->s_op = &ramfs_ops;
//初始化inode节点
inode = ramfs_get_inode(sb, S_IFDIR | 0755, 0);
if (!inode)
return -ENOMEM;
//分配一个dentry做为文件系统的根目录
root = d_alloc_root(inode);
if (!root) {
iput(inode);
return -ENOMEM;
}
sb->s_root = root;
return 0;
}
在这里,需要留意sb->s_op的赋值。Rootfs的inode节点是在ramfs_get_inode()分配并初始化的。代码如下:
struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)
{
//分配inode
struct inode * inode = new_inode(sb);
//初始化rootfs的inode
if (inode) {
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_mapping->a_ops = &ramfs_aops;
inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch (mode & S_IFMT) {
default:
init_special_inode(inode, mode, dev);
break;
//S_IFREG:一般文件
case S_IFREG:
//索引结点操作表
inode->i_op = &ramfs_file_inode_operations;
inode->i_fop = &ramfs_file_operations;
break;
//S_IFDIR:目录
case S_IFDIR:
inode->i_op = &ramfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inode->i_nlink++;
break;
//S_IFLNK:符号链接
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
break;
}
}
return inode;
}
Rootfs文件系统根结点的dentry是在d_alloc_root()中分配并初始化的。代码如下:
struct dentry * d_alloc_root(struct inode * root_inode)
{
struct dentry *res = NULL;
if (root_inode) {
//这就是我们所看到的 “/”
static const struct qstr name = { .name = "/", .len = 1 };
res = d_alloc(NULL, &name);
if (res) {
//d_sb:文件超级块
res->d_sb = root_inode->i_sb;
res->d_parent = res;
d_instantiate(res, root_inode);
}
}
return res;
}
注意上面rootfs的文件系统根目录是’/’.这也是我们平时在shell里看到的’/’了.
3.2:ext2文件系统的挂载
挂载完根目录之后,我们就可以具体的文件系统的挂载了。以ext2为例做说明.mount对应的系统调用入口是sys_mount().它的代码如下:
asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
char __user * type, unsigned long flags,
void __user * data)
{
int retval;
unsigned long data_page;
unsigned long type_page;
unsigned long dev_page;
char *dir_page;
//从用户空间copy 数据到内核空间
//copy type
retval = copy_mount_options (type, &type_page);
if (retval < 0)
return retval;
dir_page = getname(dir_name);
retval = PTR_ERR(dir_page);
if (IS_ERR(dir_page))
goto out1;
//copy dev_name
retval = copy_mount_options (dev_name, &dev_page);
if (retval < 0)
goto out2;
//copy data
retval = copy_mount_options (data, &data_page);
if (retval < 0)
goto out3;
lock_kernel();
//具体的mount过程
retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
flags, (void*)data_page);
unlock_kernel();
//释放分配的资源
free_page(data_page);
out3:
free_page(dev_page);
out2:
putname(dir_page);
out1:
free_page(type_page);
return retval;
}
Do_mount()的代码如下:
long do_mount(char * dev_name, char * dir_name, char *type_page,
unsigned long flags, void *data_page)
{
struct nameidata nd;
int retval = 0;
int mnt_flags = 0;
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
flags &= ~MS_MGC_MSK;
/* Basic sanity checks */
//参数的有效性判断
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
return -EINVAL;
if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
return -EINVAL;
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
/* Separate the per-mountpoint flags */
if (flags & MS_NOSUID)
mnt_flags |= MNT_NOSUID;
if (flags & MS_NODEV)
mnt_flags |= MNT_NODEV;
if (flags & MS_NOEXEC)
mnt_flags |= MNT_NOEXEC;
flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);
/* ... and get the mountpoint */
//查找挂载点
retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
//如果挂载目录不存在,出错退出
if (retval)
return retval;
retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
if (retval)
goto dput_out;
//重新安装
if (flags & MS_REMOUNT)
retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
data_page);
//安装一个绑定设备
else if (flags & MS_BIND)
retval = do_loopback(&nd, dev_name, flags & MS_REC);
//将挂载的文件系统移动到其它的位置
else if (flags & MS_MOVE)
retval = do_move_mount(&nd, dev_name);
else
//全新的安装
retval = do_new_mount(&nd, type_page, flags, mnt_flags,
dev_name, data_page);
dput_out:
path_release(&nd);
return retval;
}
通常执行的是一个全新的安装,即会转入到do_new_mount().代码如下:
static int do_new_mount(struct nameidata *nd, char *type, int flags,
int mnt_flags, char *name, void *data)
{
struct vfsmount *mnt;
//参数有效性判断
if (!type || !memchr(type, 0, PAGE_SIZE))
return -EINVAL;
/* we need capabilities... */
//判断是否有相应的权限
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
//具体的挂载过程
mnt = do_kern_mount(type, flags, name, data);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
//将vfsmount 加至挂载树
return do_add_mount(mnt, nd, mnt_flags, NULL);
}
Do_kern_mount()的代码在根目录挂载一节已经分析过了.do_add_mount()代码如下示:
int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
int mnt_flags, struct list_head *fslist)
{
int err;
down_write(¤t->namespace->sem);
/* Something was mounted here while we slept */
//可能在安装的时候又有设备挂到下面去了
//重新遍历一下挂载目录下的安装结点
while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
;
err = -EINVAL;
if (!check_mnt(nd->mnt))
goto unlock;
/* Refuse the same filesystem on the same mount point */
err = -EBUSY;
//在同一个挂载点上挂载相同的文件系统
if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
nd->mnt->mnt_root == nd->dentry)
goto unlock;
err = -EINVAL;
//文件系统的根目录是一个链接?
if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
goto unlock;
newmnt->mnt_flags = mnt_flags;
//将newmnt加至namespace->list ,将mnt加至hash表,加至父文件系统的子链表
err = graft_tree(newmnt, nd);
if (err == 0 && fslist) {
/* add to the specified expiration list */
spin_lock(&vfsmount_lock);
list_add_tail(&newmnt->mnt_fslink, fslist);
spin_unlock(&vfsmount_lock);
}
unlock:
up_write(¤t->namespace->sem);
mntput(newmnt);
return err;
}
如果挂载的是ext2文件系统,在do_kern_mount()中就会调用file_system_type -> get_sb().
Ext2的file_system_type定义如下:
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
.name = "ext2",
.get_sb = ext2_get_sb,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
}
相应的get_sb入口为ext2_get_sb():
static struct super_block *ext2_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
//get_sb_bdev挂载一个基于块设备的文件系统
return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
}
get_sb_bdev()涉及到块设备方面的东西,先把它放开。
ext2_get_sb()的代码比较简单.相应的回调函数为ext2_fill_super():
static int ext2_fill_super(struct super_block *sb, void *data, int silent)
{
struct buffer_head * bh;
struct ext2_sb_info * sbi;
struct ext2_super_block * es;
struct inode *root;
unsigned long block;
//从指定的参数中取得超级块位置
unsigned long sb_block = get_sb_block(&data);
unsigned long logic_sb_block;
unsigned long offset = 0;
unsigned long def_mount_opts;
int blocksize = BLOCK_SIZE;
int db_count;
int i, j;
__le32 features;
//super_block的私有结构是sbi
sbi = kmalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
memset(sbi, 0, sizeof(*sbi));
/*
* See what the current blocksize for the device is, and
* use that as the blocksize. Otherwise (or if the blocksize
* is smaller than the default) use the default.
* This is important for devices that have a hardware
* sectorsize that is larger than the default.
*/
//设置sb->block_size 和sb->s_blocksize_bits
blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
if (!blocksize) {
printk ("EXT2-fs: unable to set blocksize\n");
goto failed_sbi;
}
/*
* If the superblock doesn't start on a hardware sector boundary,
* calculate the offset.
*/
//data中包含sb=XXX.用来指定超级块的块号
//如果不是默认块大小
if (blocksize != BLOCK_SIZE) {
logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
offset = (sb_block*BLOCK_SIZE) % blocksize;
} else {
logic_sb_block = sb_block;
}
//读取块设备中的指定块内容,将其放入缓冲区中
if (!(bh = sb_bread(sb, logic_sb_block))) {
printk ("EXT2-fs: unable to read superblock\n");
goto failed_sbi;
}
/*
* Note: s_es must be initialized as soon as possible because
* some ext2 macro-instructions depend on its value
*/
es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
sbi->s_es = es;
sb->s_magic = le16_to_cpu(es->s_magic);
sb->s_flags |= MS_ONE_SECOND;
//如果文件系统魔数不是ext2规定的.出错退出
if (sb->s_magic != EXT2_SUPER_MAGIC) {
if (!silent)
printk ("VFS: Can't find ext2 filesystem on dev %s.\n",
sb->s_id);
goto failed_mount;
}
/* Set defaults before we parse the mount options */
//解析文件系统中默认的挂载选项
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
if (def_mount_opts & EXT2_DEFM_DEBUG)
set_opt(sbi->s_mount_opt, DEBUG);
if (def_mount_opts & EXT2_DEFM_BSDGROUPS)
set_opt(sbi->s_mount_opt, GRPID);
if (def_mount_opts & EXT2_DEFM_UID16)
set_opt(sbi->s_mount_opt, NO_UID32);
if (def_mount_opts & EXT2_DEFM_XATTR_USER)
set_opt(sbi->s_mount_opt, XATTR_USER);
if (def_mount_opts & EXT2_DEFM_ACL)
set_opt(sbi->s_mount_opt, POSIX_ACL);
if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC)
set_opt(sbi->s_mount_opt, ERRORS_PANIC);
else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO)
set_opt(sbi->s_mount_opt, ERRORS_RO);
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
//解析参数中的挂载参数
if (!parse_options ((char *) data, sbi))
goto failed_mount;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
MS_POSIXACL : 0);
if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
(EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U)))
printk("EXT2-fs warning: feature flags set on rev 0 fs, "
"running e2fsck is recommended\n");
/*
* Check feature flags regardless of the revision level, since we
* previously didn't change the revision level when setting the flags,
* so there is a chance incompat flags are set on a rev 0 filesystem.
*/
features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
if (features) {
printk("EXT2-fs: %s: couldn't mount because of "
"unsupported optional features (%x).\n",
sb->s_id, le32_to_cpu(features));
goto failed_mount;
}
if (!(sb->s_flags & MS_RDONLY) &&
(features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
printk("EXT2-fs: %s: couldn't mount RDWR because of "
"unsupported optional features (%x).\n",
sb->s_id, le32_to_cpu(features));
goto failed_mount;
}
//块大小的计算方式: 默认块大小的sbi->s_es->s_log_block_size 次方
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
/* If the blocksize doesn't match, re-read the thing.. */
//如果super_block的块大小不与ext2规定的块大小相同,则更新super_block->s_blocksize
//这里需要重读ext2_super_block
if (sb->s_blocksize != blocksize) {
brelse(bh);
if (!sb_set_blocksize(sb, blocksize)) {
printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n");
goto failed_sbi;
}
logic_sb_block = (sb_block*BLOCK_SIZE) / blocksize;
offset = (sb_block*BLOCK_SIZE) % blocksize;
bh = sb_bread(sb, logic_sb_block);
if(!bh) {
printk("EXT2-fs: Couldn't read superblock on "
"2nd try.\n");
goto failed_sbi;
}
es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
printk ("EXT2-fs: Magic mismatch, very weird !\n");
goto failed_mount;
}
}
//所允许的最大的文件大小
sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits);
if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) {
sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE;
sbi->s_first_ino = EXT2_GOOD_OLD_FIRST_INO;
} else {
sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) ||
(sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
(sbi->s_inode_size > blocksize)) {
printk ("EXT2-fs: unsupported inode size: %d\n",
sbi->s_inode_size);
goto failed_mount;
}
}
sbi->s_frag_size = EXT2_MIN_FRAG_SIZE <<
le32_to_cpu(es->s_log_frag_size);
if (sbi->s_frag_size)
sbi->s_frags_per_block = sb->s_blocksize /
sbi->s_frag_size;
else
sb->s_magic = 0;
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
//每个块中的inode数= block大小/inode大小
sbi->s_inodes_per_block = sb->s_blocksize /
EXT2_INODE_SIZE(sb);
//每个block grop中,inode table所占的块数= 组中的总点数/每个块里的inode数目
sbi->s_itb_per_group = sbi->s_inodes_per_group /
sbi->s_inodes_per_block;
//每个块中的组描述符数目= 块大小/组描述符大小
sbi->s_desc_per_block = sb->s_blocksize /
sizeof (struct ext2_group_desc);
//包含这个超级块的bh
sbi->s_sbh = bh;
//文件系统的状态
sbi->s_mount_state = le16_to_cpu(es->s_state);
sbi->s_addr_per_block_bits =
log2 (EXT2_ADDR_PER_BLOCK(sb));
sbi->s_desc_per_block_bits =
log2 (EXT2_DESC_PER_BLOCK(sb));
if (sb->s_magic != EXT2_SUPER_MAGIC) {
if (!silent)
printk ("VFS: Can't find an ext2 filesystem on dev "
"%s.\n",
sb->s_id);
goto failed_mount;
}
if (sb->s_blocksize != bh->b_size) {
if (!silent)
printk ("VFS: Unsupported blocksize on dev "
"%s.\n", sb->s_id);
goto failed_mount;
}
if (sb->s_blocksize != sbi->s_frag_size) {
printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n",
sbi->s_frag_size, sb->s_blocksize);
goto failed_mount;
}
if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
printk ("EXT2-fs: #blocks per group too big: %lu\n",
sbi->s_blocks_per_group);
goto failed_mount;
}
if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
printk ("EXT2-fs: #fragments per group too big: %lu\n",
sbi->s_frags_per_group);
goto failed_mount;
}
if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
printk ("EXT2-fs: #inodes per group too big: %lu\n",
sbi->s_inodes_per_group);
goto failed_mount;
}
//文件系统中的块组数
sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
le32_to_cpu(es->s_first_data_block) +
EXT2_BLOCKS_PER_GROUP(sb) - 1) /
EXT2_BLOCKS_PER_GROUP(sb);
//组描述符所占的块数 = (块组总数-1)/每一个块中的组描述符
//每一个块组对应一个描述符
db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
EXT2_DESC_PER_BLOCK(sb);
// TODO:要注意的是,对一个块对应一个BH
sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
printk ("EXT2-fs: not enough memory\n");
goto failed_mount;
}
percpu_counter_init(&sbi->s_freeblocks_counter);
percpu_counter_init(&sbi->s_freeinodes_counter);
percpu_counter_init(&sbi->s_dirs_counter);
bgl_lock_init(&sbi->s_blockgroup_lock);
//每一个块组对应一个s_debts . sbi->s_debts是一个数组...
sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
GFP_KERNEL);
if (!sbi->s_debts) {
printk ("EXT2-fs: not enough memory\n");
goto failed_mount_group_desc;
}
memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts));
for (i = 0; i < db_count; i++) {
//计算在超级块后的第N个块
//组描述符是放在 超级块之后
block = descriptor_loc(sb, logic_sb_block, i);
sbi->s_group_desc = sb_bread(sb, block);
if (!sbi->s_group_desc) {
for (j = 0; j < i; j++)
brelse (sbi->s_group_desc[j]);
printk ("EXT2-fs: unable to read group descriptors\n");
goto failed_mount_group_desc;
}
}
if (!ext2_check_descriptors (sb)) {
printk ("EXT2-fs: group descriptors corrupted!\n");
db_count = i;
goto failed_mount2;
}
sbi->s_gdb_count = db_count;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock);
/*
* set up enough so that it can read an inode
*/
sb->s_op = &ext2_sops;
sb->s_export_op = &ext2_export_ops;
sb->s_xattr = ext2_xattr_handlers;
//ext2的根目录.在第二个inode
root = iget(sb, EXT2_ROOT_INO);
//初始化文件系统的根目录,使其dentry的inode引用指向root
sb->s_root = d_alloc_root(root);
if (!sb->s_root) {
iput(root);
printk(KERN_ERR "EXT2-fs: get root inode failed\n");
goto failed_mount2;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
dput(sb->s_root);
sb->s_root = NULL;
printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n");
goto failed_mount2;
}
if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
ext2_warning(sb, __FUNCTION__,
"mounting ext3 filesystem as ext2\n");
ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
percpu_counter_mod(&sbi->s_freeblocks_counter,
ext2_count_free_blocks(sb));
percpu_counter_mod(&sbi->s_freeinodes_counter,
ext2_count_free_inodes(sb));
percpu_counter_mod(&sbi->s_dirs_counter,
ext2_count_dirs(sb));
return 0;
failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc);
failed_mount_group_desc:
kfree(sbi->s_group_desc);
if (sbi->s_debts)
kfree(sbi->s_debts);
failed_mount:
brelse(bh);
failed_sbi:
sb->s_fs_info = NULL;
kfree(sbi);
return -EINVAL;
}
这段代码里关于具体设备的读值部份将放在块设备驱动中做专题讲述。上面的操作主要是初始化了super_block中指向具体文件系统的信息部份。其中的信息如下图所示:
在上面的代码中,需要注意的是,文件系统的根目录对应的inode是在第二个inode.inode的设置是下列代码完成的:
root = iget(sb, EXT2_ROOT_INO);
跟踪进iget():
static inline struct inode *iget(struct super_block *sb, unsigned long ino)
{
//分配并初始化inode
struct inode *inode = iget_locked(sb, ino);
//调用sb->s_op_read_inode()获得具体的inode信息
if (inode && (inode->i_state & I_NEW)) {
sb->s_op->read_inode(inode);
unlock_new_inode(inode);
}
return inode;
}
而ext2的super_block的操作是由下列代码设置的:
sb->s_op = &ext2_sops;
sb->s_export_op = &ext2_export_ops;
sb->s_xattr = ext2_xattr_handlers;
相应的。exe2_sops由下所示:
static struct super_operations ext2_sops = {
.alloc_inode = ext2_alloc_inode,
.destroy_inode = ext2_destroy_inode,
.read_inode = ext2_read_inode,
.write_inode = ext2_write_inode,
.put_inode = ext2_put_inode,
.delete_inode = ext2_delete_inode,
.put_super = ext2_put_super,
.write_super = ext2_write_super,
.statfs = ext2_statfs,
.remount_fs = ext2_remount,
.clear_inode = ext2_clear_inode,
}
其read_inode对应的接口为ext2_read_inode().代码如下:
void ext2_read_inode (struct inode * inode)
{
struct ext2_inode_info *ei = EXT2_I(inode);
//取得它inode对应的索引结点号
ino_t ino = inode->i_ino;
struct buffer_head * bh;
//从ext2文件系统中读取索引结点对应的信息
struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
int n;
#ifdef CONFIG_EXT2_FS_POSIX_ACL
ei->i_acl = EXT2_ACL_NOT_CACHED;
ei->i_default_acl = EXT2_ACL_NOT_CACHED;
#endif
if (IS_ERR(raw_inode))
goto bad_inode;
//利用读取到的信息对inode经行初始化
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
if (!(test_opt (inode->i_sb, NO_UID32))) {
inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime);
inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime);
inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
/* We now have enough fields to check if the inode was active or not.
* This is needed because nfsd might try to access dead inodes
* the test is that same one that e2fsck uses
* NeilBrown 1999oct15
*/
if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) {
/* this inode is deleted */
brelse (bh);
goto bad_inode;
}
inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
ei->i_frag_no = raw_inode->i_frag;
ei->i_frag_size = raw_inode->i_fsize;
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
ei->i_dir_acl = 0;
if (S_ISREG(inode->i_mode))
inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
else
ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
ei->i_dtime = 0;
inode->i_generation = le32_to_cpu(raw_inode->i_generation);
ei->i_state = 0;
ei->i_next_alloc_block = 0;
ei->i_next_alloc_goal = 0;
ei->i_prealloc_count = 0;
ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
ei->i_dir_start_lookup = 0;
/*
* NOTE! The in-memory inode i_data array is in little-endian order
* even on big-endian machines: we do NOT byteswap the block numbers!
*/
for (n = 0; n < EXT2_N_BLOCKS; n++)
ei->i_data[n] = raw_inode->i_block[n];
//.一般文件
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext2_file_inode_operations;
inode->i_fop = &ext2_file_operations;
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
inode->i_mapping->a_ops = &ext2_aops;
}
//目录
else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &ext2_dir_inode_operations;
inode->i_fop = &ext2_dir_operations;
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
inode->i_mapping->a_ops = &ext2_aops;
}
//链接文件
else if (S_ISLNK(inode->i_mode)) {
if (ext2_inode_is_fast_symlink(inode))
inode->i_op = &ext2_fast_symlink_inode_operations;
else {
inode->i_op = &ext2_symlink_inode_operations;
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
inode->i_mapping->a_ops = &ext2_aops;
}
} else {
inode->i_op = &ext2_special_inode_operations;
if (raw_inode->i_block[0])
init_special_inode(inode, inode->i_mode,
old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
else
init_special_inode(inode, inode->i_mode,
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
}
brelse (bh);
ext2_set_inode_flags(inode);
return;
bad_inode:
make_bad_inode(inode);
return;
}
从文件系统中读取索引结点号对应的信息是通过ext2_get_inode()完成的。它的代码如下:
static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
struct buffer_head **p)
{
struct buffer_head * bh;
unsigned long block_group;
unsigned long block;
unsigned long offset;
struct ext2_group_desc * gdp;
*p = NULL;
//参数的有效性判断
if ((ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb)) ||
ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
goto Einval;
//索引结点号/每个组中的结点数 = inode所在的组号
block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
//组号对应的组描述符
gdp = ext2_get_group_desc(sb, block_group, &bh);
if (!gdp)
goto Egdp;
/*
* Figure out the offset within the block group inode table
*/
//计算在组中的偏移量
offset = ((ino - 1) % EXT2_INODES_PER_GROUP(sb)) * EXT2_INODE_SIZE(sb);
//inode对应的块号
block = le32_to_cpu(gdp->bg_inode_table) +
(offset >> EXT2_BLOCK_SIZE_BITS(sb));
//读取相应的块信息
if (!(bh = sb_bread(sb, block)))
goto Eio;
*p = bh;
//计算索引结点在块中的偏移量
offset &= (EXT2_BLOCK_SIZE(sb) - 1);
return (struct ext2_inode *) (bh->b_data + offset);
Einval:
ext2_error(sb, "ext2_get_inode", "bad inode number: %lu",
(unsigned long) ino);
return ERR_PTR(-EINVAL);
Eio:
ext2_error(sb, "ext2_get_inode",
"unable to read inode block - inode=%lu, block=%lu",
(unsigned long) ino, block);
Egdp:
return ERR_PTR(-EIO);
}
// 取得对应块组号的组描述符
struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
unsigned int block_group,
struct buffer_head ** bh)
{
unsigned long group_desc;
unsigned long offset;
struct ext2_group_desc * desc;
struct ext2_sb_info *sbi = EXT2_SB(sb);
//块组号大于块组总数,出错退出
if (block_group >= sbi->s_groups_count) {
ext2_error (sb, "ext2_get_group_desc",
"block_group >= groups_count - "
"block_group = %d, groups_count = %lu",
block_group, sbi->s_groups_count);
return NULL;
}
//块组号/每块中的组描述符数 可以计算出是在那一个BH
group_desc = block_group / EXT2_DESC_PER_BLOCK(sb);
//在BH中的偏程
offset = block_group % EXT2_DESC_PER_BLOCK(sb);
//对应的BH为空.出错退出
if (!sbi->s_group_desc[group_desc]) {
ext2_error (sb, "ext2_get_group_desc",
"Group descriptor not loaded - "
"block_group = %d, group_desc = %lu, desc = %lu",
block_group, group_desc, offset);
return NULL;
}
//取得组描述符所在的BH
desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
if (bh)
*bh = sbi->s_group_desc[group_desc];
//加上在组中的偏移量即为所求的组描述符
return desc + offset;
}
至此,对文件系统的挂载就完成了。关于文件系统的其它操作的实现。请继续关注本站更新。
四:ext2中文件的查找
现在,就可以来看下文件查找所遗留的问题了.在文件查找里曾分析到。如果文件所对应的目录不在缓存里的话,就会调用文件系统对应的lookup 操作。在上面看到对inode的设置。如果是目录的话就会将i_op设置为ext2_dir_inode_operations.它的定义如下:
struct inode_operations ext2_dir_inode_operations = {
.create = ext2_create,
.lookup = ext2_lookup,
.link = ext2_link,
.unlink = ext2_unlink,
.symlink = ext2_symlink,
.mkdir = ext2_mkdir,
.rmdir = ext2_rmdir,
.mknod = ext2_mknod,
.rename = ext2_rename,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext2_listxattr,
.removexattr = generic_removexattr,
#endif
.setattr = ext2_setattr,
.permission = ext2_permission,
}
相应的lookup接口为 ext2_lookup().代码如下:
static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
struct inode * inode;
ino_t ino;
//判断文件名是否超长
if (dentry->d_name.len > EXT2_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
//取得dentry所在的索引结点号
ino = ext2_inode_by_name(dir, dentry);
inode = NULL;
if (ino) {
//取得索引结点号所对应的inode
inode = iget(dir->i_sb, ino);
if (!inode)
return ERR_PTR(-EACCES);
}
//将inode和dentry关联起来
if (inode)
return d_splice_alias(inode, dentry);
d_add(dentry, inode);
return NULL;
}
具体的查找过程是在ext2_inode_by_name()完成的。代码如下:
ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry)
{
ino_t res = 0;
struct ext2_dir_entry_2 * de;
struct page *page;
de = ext2_find_entry (dir, dentry, &page);
if (de) {
res = le32_to_cpu(de->inode);
kunmap(page);
page_cache_release(page);
}
return res;
}
转入ext2_find_entry():
struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
struct dentry *dentry, struct page ** res_page)
{
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
unsigned reclen = EXT2_DIR_REC_LEN(namelen);
unsigned long start, n;
//计算文件大小所占的页面
unsigned long npages = dir_pages(dir);
struct page *page = NULL;
struct ext2_inode_info *ei = EXT2_I(dir);
ext2_dirent * de;
if (npages == 0)
goto out;
/* OFFSET_CACHE */
*res_page = NULL;
//起始搜索的页面号
start = ei->i_dir_start_lookup;
//超出了大小,将其置0
if (start >= npages)
start = 0;
n = start;
//逐页面的读取inode代码的文件
do {
char *kaddr;
//一次读一个页面
page = ext2_get_page(dir, n);
if (!IS_ERR(page)) {
kaddr = page_address(page);
de = (ext2_dirent *) kaddr;
//本页面的最后一个文件
kaddr += ext2_last_byte(dir, n) - reclen;
//遍历这个页面的所有目录
while ((char *) de <= kaddr) {
//长度非法,出错退出
if (de->rec_len == 0) {
ext2_error(dir->i_sb, __FUNCTION__,
"zero-length directory entry");
ext2_put_page(page);
goto out;
}
//名称是否匹配
if (ext2_match (namelen, name, de))
goto found;
//取得下一个目录
de = ext2_next_entry(de);
}
// 释放页面
ext2_put_page(page);
}
if (++n >= npages)
n = 0;
} while (n != start);
out:
return NULL;
found:
*res_page = page;
//更改最近搜索的页面
ei->i_dir_start_lookup = n;
return de;
}
至此,就完成了一个搜索的过程。如果搜索成功,ext2_inode_by_name()就会返回文件所在的索引号.
找到相应的inode之后,会调用d_splice_alias()使dentry与inode关联起来.这部份代码比较简单,可以自行了解.
五:小结
本节主要以ext2文件系统为例来描述了文件系统的挂载。读者需要自行了解有关ext2布局的知识.文件系统是一个非常繁杂的子系统。里面涉及到的东西需要分解成很多的专题。不可能在一节中完全讲述的很清楚。后续专题请关注本站更新.转自:http://blog.chinaunix.net/u3/97267/showart_1952948.html