先叙述一下大概流程,再补充流程细节,本博客主要以VFS文件系统为核心来介绍,对于实际文件系统ext2或sysfs文件系统,超出本博客范围
*Linux 打开一个文件,需要两个步骤
- (1) 找到文件
- (2) 打开文件
C库调用open函数的系统入口时sys_open函数.
asmlinkage long sys_open(const char __user *filename, int flags, int mode)
{
long ret;
// 判断是否为32位系统
if (force_o_largefile())
flags |= O_LARGEFILE;
ret = do_sys_open(AT_FDCWD, filename, flags, mode);
/* avoid REGPARM breakage on x86: */
asmlinkage_protect(3, ret, filename, flags, mode);
return ret;
}
long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
{
// 将文件路径及名字复制到内核空间
char *tmp = getname(filename);
int fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
//从位图里获得一个未使用的文件描述符,如果位图已满就扩充位图
fd = get_unused_fd_flags(flags);
if (fd >= 0) {
//实际打开函数
struct file *f = do_filp_open(dfd, tmp, flags, mode);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
} else {
fsnotify_open(f->f_path.dentry);
fd_install(fd, f);
}
}
putname(tmp);
}
return fd;
}
struct file *do_filp_open(int dfd, const char *pathname,
int open_flag, int mode)
{
……
/*
* The simplest case - just a plain lookup.
*/
if (!(flag & O_CREAT)) {
// 此函数实现的是找到文件
error = path_lookup_open(dfd, pathname, lookup_flags(flag),
&nd, flag);
if (error)
return ERR_PTR(error);
goto ok;
}
……
ok:
// 此函数实现的是打开文件
filp = nameidata_to_filp(&nd, open_flag);
……
}
// 寻找文件的过程如下
path_lookup_open
->__path_lookup_intent_open
->do_path_lookup
->path_walk
->link_path_walk
->__link_path_walk
->do_lookup
->real_lookup
->result = dir->i_op->lookup(dir, dentry, nd); //底层文件系统相关的lookup函数
static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
……
if (ino) {
inode = ext2_iget(dir->i_sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
}
……
}
struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
{
……
if (S_ISREG(inode->i_mode)) {
……
} else if (S_ISDIR(inode->i_mode)) {
……
} else if (S_ISLNK(inode->i_mode)) {
……
} else {
……
if (raw_inode->i_block[0])
init_special_inode(inode, inode->i_mode,
old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
else
init_special_inode(inode, inode->i_mode,
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
}
……
}
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
inode->i_mode = mode;
if (S_ISCHR(mode)) {
inode->i_fop = &def_chr_fops;
inode->i_rdev = rdev;
} else if (S_ISBLK(mode)) {
inode->i_fop = &def_blk_fops;
inode->i_rdev = rdev;
} else if (S_ISFIFO(mode))
inode->i_fop = &def_fifo_fops;
else if (S_ISSOCK(mode))
inode->i_fop = &bad_sock_fops;
else
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
mode);
}
- 找到def_chr_fops结构,path_lookup_open函数也就得到设备文件对应文件系统的默认open函数.
const struct file_operations def_chr_fops = {
.open = chrdev_open,
};
再看do_filp_open函数调用的另一个函数nameidata_to_filp
nameidata_to_filp
->__dentry_open
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int flags, struct file *f,
int (*open)(struct inode *, struct file *))
{
……
f->f_op = fops_get(inode->i_fop);
……
if (!open && f->f_op)
open = f->f_op->open;
if (open) {
error = open(inode, f); // 此处调用底层文件系统默认open函数,即上述过程找到的open函数
if (error)
goto cleanup_all;
}
……
}
static int chrdev_open(struct inode *inode, struct file *filp)
{
……
// 在cdev_map设备库中按照主次设备号查找kobject结构
kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
// 根据kobject实例得到对应的cdev结构地址
// cdev结构包含驱动程序里定义的file_operation结构
new = container_of(kobj, struct cdev, kobj);
……
if (filp->f_op->open) {
lock_kernel();
// 此处调用驱动程序定义的open函数
ret = filp->f_op->open(inode,filp);
unlock_kernel();
}
……
}
下面详细分析查找过程
以C函数open("/home/book/test.txt") Linux2.6.26源码为例
// nd是do_filp_open()函数声明的一个结构体实例
static int __path_lookup_intent_open(int dfd, const char *name,
unsigned int lookup_flags, struct nameidata *nd,
int open_flags, int create_mode)
{
// 通过slab分配器,从filp_cachep缓存链表分配一个file结构,关于slab分配器,不属于本博客范围
struct file *filp = get_empty_filp();
int err;
if (filp == NULL)
return -ENFILE;
nd->intent.open.file = filp;
nd->intent.open.flags = open_flags;
nd->intent.open.create_mode = create_mode;
err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
……
return err;
}
static int do_path_lookup(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
int retval = 0;
int fput_needed;
struct file *file;
struct fs_struct *fs = current->fs;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags;
nd->depth = 0;
//例子/home/book/test会进入这个if分支.其他分支也类似,只是为了找到初始目录
if (*name=='/') {
read_lock(&fs->lock);
if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
nd->path = fs->altroot;
path_get(&fs->altroot);
read_unlock(&fs->lock);
if (__emul_lookup_dentry(name,nd))
goto out; /* found in altroot */
read_lock(&fs->lock);
}
nd->path = fs->root; //设置路径为进程所在文件系统的挂载点,此处设置为'/',关于文件系统不属于本博客范围
path_get(&fs->root);
read_unlock(&fs->lock);
} else if (dfd == AT_FDCWD) {
……
} else {
……
}
retval = path_walk(name, nd);
……
return retval;
}
static int __link_path_walk(const char *name, struct nameidata *nd)
{
struct path next;
struct inode *inode;
int err;
unsigned int lookup_flags = nd->flags;
// 将路径里的前n个'/'去掉,也就是说open("//home/book/test.c", O_RDONLY)的语句内核也能正确解析
// 此时name指向"home/book/test.c"字符串
while (*name=='/')
name++;
if (!*name)
goto return_reval;
// 此处初始值为根目录'/'的inode实例
inode = nd->path.dentry->d_inode;
if (nd->depth)
lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
/* At this point we know we have a real path component. */
for(;;) {
unsigned long hash;
struct qstr this;
unsigned int c;
nd->flags |= LOOKUP_CONTINUE;
//这两句是验证进程权限,我也不是很清楚具体实现方式
err = exec_permission_lite(inode, nd);
if (err == -EAGAIN)
err = vfs_permission(nd, MAY_EXEC);
if (err)
break;
// this结构指向name所在的字符串,这个this和c++的this不一样.
this.name = name;
c = *(const unsigned char *)name; //取name的第一个字符赋值给c.
hash = init_name_hash(); //hash值初始化为0.
//这个while循环是查找到下一个'/'字符所在位置,或查找到字符串尾的位置.
do {
name++;
hash = partial_name_hash(c, hash); //计算哈希值,我也不清楚具体实现方法.
c = *(const unsigned char *)name;
} while (c && (c != '/'));
//计算一个路径的字符串长度,此处第一次为"home"的长度4,接下来为"book"的长度,然后是"test.c"的长度
this.len = name - (const char *) this.name;
this.hash = end_name_hash(hash);
/* remove trailing slashes? */
//判断寻找到哪一步了,因为处理过程大同小异,省略了last_component和last_with_slashes相应的代码段
if (!c) //寻找到最后一个组件,即"test.c"字符串
goto last_component;
while (*++name == '/'); //去掉组件后面的n个'/'字符,也就是open("/home/book/test.c",O_RDONLY)语句内核也能正确解析
if (!*name)//最后组件是带'/'的
goto last_with_slashes;
……
/* This does the actual lookups.. */
/* 查找字符串this.name对应目录的dentry结构 */
err = do_lookup(nd, &this, &next);
if (err)
break;
err = -ENOENT;
inode = next.dentry->d_inode;
if (!inode)
goto out_dput;
err = -ENOTDIR;
if (!inode->i_op)
goto out_dput;
if (inode->i_op->follow_link) {
err = do_follow_link(&next, nd);
if (err)
goto return_err;
err = -ENOENT;
inode = nd->path.dentry->d_inode;
if (!inode)
break;
err = -ENOTDIR;
if (!inode->i_op)
break;
} else
path_to_nameidata(&next, nd); //将查找到的dentry关联到nd结构
err = -ENOTDIR;
if (!inode->i_op->lookup)
break;
continue;
/* here ends the main loop */
……
}
path_put(&nd->path);
……
}
static int do_lookup(struct nameidata *nd, struct qstr *name,
struct path *path)
{
// nd->path是name的父目录
struct vfsmount *mnt = nd->path.mnt;
// 从父目录的dentry链表查找name对应的dentry结构,一般是查不到的,因为第一次打开并没有生成对应的结构
struct dentry *dentry = __d_lookup(nd->path.dentry, name);
if (!dentry)
goto need_lookup;
if (dentry->d_op && dentry->d_op->d_revalidate)
goto need_revalidate;
need_lookup:
dentry = real_lookup(nd->path.dentry, name, nd);
if (IS_ERR(dentry))
goto fail;
goto done;
}
static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
{
struct dentry * result;
struct inode *dir = parent->d_inode;
……
//这里用其他方式再查找一遍,还是查不到
result = d_lookup(parent, name);
if (!result) {
//申请一个dentry结构.用name字符串初始化,并将该dentry结构关联到父目录的dentry链表
//在对应文件系统的lookup函数中将新的dentry结构链接到dentry_hashtable数组.
struct dentry * dentry = d_alloc(parent, name);
result = ERR_PTR(-ENOMEM);
if (dentry) {
//调用父目录inode结构的lookup函数.
result = dir->i_op->lookup(dir, dentry, nd);
if (result)
dput(dentry);
else
result = dentry;
}
mutex_unlock(&dir->i_mutex);
return result;
}
……
return result;
}
/* 根目录’/'是ext2文件系统的挂载点,所以调用的是ext2文件系统的lookup函数 */
//ext2文件系统的inode操作函数如下
const struct inode_operations ext2_dir_inode_operations = {
.create = ext2_create,
.lookup = ext2_lookup,
.link = ext2_link,
.unlink = ext2_unlink,
.symlink = ext2_symlink,
.mkdir = ext2_mkdir,
.rmdir = ext2_rmdir,
.mknod = ext2_mknod,
.rename = ext2_rename,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext2_listxattr,
.removexattr = generic_removexattr,
#endif
.setattr = ext2_setattr,
.permission = ext2_permission,
};
// dir是父目录的inode结构,dentry是根据要查找的name定义的一个dentry结构
static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
struct inode * inode;
ino_t ino;
if (dentry->d_name.len > EXT2_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
// 在ext2文件系统中查找,dentry对应inode的编号
ino = ext2_inode_by_name(dir, dentry);
inode = NULL;
if (ino) {
inode = ext2_iget(dir->i_sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
}
//将inode和dentry进行关联
return d_splice_alias(inode, dentry);
}
// 这个函数执行完,就查找到当前name对应的操作函数,再将dentry关联到nd结构,就可以用nd结构调用得到的函数。
struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
{
struct ext2_inode_info *ei;
struct buffer_head * bh;
struct ext2_inode *raw_inode;
struct inode *inode;
long ret = -EIO;
int n;
// 从ext2文件系统对应的超级块中查找ino编号相应的inode结构
inode = iget_locked(sb, ino);
//普通文件
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext2_file_inode_operations;
if (ext2_use_xip(inode->i_sb)) {
inode->i_mapping->a_ops = &ext2_aops_xip;
inode->i_fop = &ext2_xip_file_operations;
} else if (test_opt(inode->i_sb, NOBH)) {
inode->i_mapping->a_ops = &ext2_nobh_aops;
inode->i_fop = &ext2_file_operations;
} else {
inode->i_mapping->a_ops = &ext2_aops;
inode->i_fop = &ext2_file_operations;
}
} else if (S_ISDIR(inode->i_mode)) { //目录文件
inode->i_op = &ext2_dir_inode_operations; //又将该结构赋值给当前inode,用于作为下级目录的父目录
inode->i_fop = &ext2_dir_operations;
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
inode->i_mapping->a_ops = &ext2_aops;
} else if (S_ISLNK(inode->i_mode)) {
if (ext2_inode_is_fast_symlink(inode))
inode->i_op = &ext2_fast_symlink_inode_operations;
else {
inode->i_op = &ext2_symlink_inode_operations;
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
inode->i_mapping->a_ops = &ext2_aops;
}
} else {
inode->i_op = &ext2_special_inode_operations;
if (raw_inode->i_block[0])
init_special_inode(inode, inode->i_mode,
old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
else
init_special_inode(inode, inode->i_mode,
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
}
}
//打开文件,就是通过nd结构得到相应的open函数,再调用open函数.