Linux虚拟文件系统之文件打开（sys_open()）

最新推荐文章于 2021-05-25 12:33:19 发布

Tommy_wxie

最新推荐文章于 2021-05-25 12:33:19 发布

阅读量2.5k

点赞数

分类专栏： Linux kernel 文章标签： linux path file struct transition permissions

本文链接：https://blog.csdn.net/tommy_wxie/article/details/7583322

版权

Linux kernel 专栏收录该内容

151 篇文章 25 订阅

订阅专栏

在文件读写之前，我们必须先打开文件。从应用程序的角度来看，这是通过标准库的open函数完成的，该函数返回一个文件描述符。内核中是由系统调用sys_open()函数完成。

SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
{
	long ret;

	if (force_o_largefile())
		flags |= O_LARGEFILE;

	ret = do_sys_open(AT_FDCWD, filename, flags, mode);
	/* avoid REGPARM breakage on x86: */
	asmlinkage_protect(3, ret, filename, flags, mode);
	return ret;
}

实际实现工作

# long do_sys_open(int dfd, const char __user *filename, int flags, int mode)  
# {  
#     /*从进程地址空间读取该文件的路径名*/  
#     char *tmp = getname(filename);  
#     int fd = PTR_ERR(tmp);  
#   
#     if (!IS_ERR(tmp)) {  
#         /*在内核中，每个打开的文件由一个文件描述符表示 
#         该描述符在特定于进程的数组中充当位置索引(数组是 
#         task_struct->files->fd_arry)，该数组的元素包含了file结构，其中 
#         包括每个打开文件的所有必要信息。因此，调用下面 
#         函数查找一个未使用的文件描述符,返回的是上面 
#         说的数组的下标*/  
#         fd = get_unused_fd_flags(flags);  
#         if (fd >= 0) {  
#             /*fd获取成功则开始打开文件，此函数是主要完成打开功能的函数*/  
#             struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);  
#             if (IS_ERR(f)) {  
#                 put_unused_fd(fd);  
#                 fd = PTR_ERR(f);  
#             } else {  
#                 fsnotify_open(f->f_path.dentry);  
#                 fd_install(fd, f);  
#             }  
#         }  
#         putname(tmp);  
#     }  
#     return fd;  
# }

打开文件主体实现

# struct file *do_filp_open(int dfd, const char *pathname,  
#         int open_flag, int mode, int acc_mode)  
# {  
#     struct file *filp;  
#     struct nameidata nd;  
#     int error;  
#     struct path path;  
#     struct dentry *dir;  
#     int count = 0;  
#     int will_write;  
#       /*改变参数flag的值，具体做法是flag+1*/  
#     int flag = open_to_namei_flags(open_flag);  
#     /*设置访问权限*/  
#     if (!acc_mode)  
#         acc_mode = MAY_OPEN | ACC_MODE(flag);  
#   
#     /* O_TRUNC implies we need access checks for write permissions */  
#       
#     /*根据 O_TRUNC标志设置写权限 */  
#     if (flag & O_TRUNC)  
#         acc_mode |= MAY_WRITE;  
#   
#     /* Allow the LSM permission hook to distinguish append  
#        access from general write access. */  
#        /* 设置O_APPEND 标志*/  
#     if (flag & O_APPEND)  
#         acc_mode |= MAY_APPEND;  
#   
#     /* 
#      * The simplest case - just a plain lookup. 
#      */  
#       /*如果不是创建文件*/  
#     if (!(flag & O_CREAT)) {  
#         /*当内核要访问一个文件的时候，第一步要做的是找到这个文件， 
#         而查找文件的过程在vfs里面是由path_lookup或者path_lookup_open函数来完成的。 
#         这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构， 
#         并建立好相应的inode和file结构，将指向file的描述符返回用户。用户随后 
#         通过文件描述符，来访问这些数据结构*/  
#         error = path_lookup_open(dfd, pathname, lookup_flags(flag),  
#                      &nd, flag);  
#         if (error)  
#             return ERR_PTR(error);  
#         goto ok;/*跳过下面的创建部分*/  
#     }  
#   
#     /* 
#      * Create - we need to know the parent. 
#      */  
#      /*到此则是要创建文件*/  
#     /* path-init为查找作准备工作，path_walk真正上路查找， 
#     这两个函数联合起来根据一段路径名找到对应的dentry */  
#     error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);  
#     if (error)  
#         return ERR_PTR(error);  
#     error = path_walk(pathname, &nd);  
#     if (error) {  
#         if (nd.root.mnt)  
#             path_put(&nd.root);  
#         return ERR_PTR(error);  
#     }  
#     if (unlikely(!audit_dummy_context()))  
#         /*保存inode节点信息*/  
#         audit_inode(pathname, nd.path.dentry);  
#   
#     /* 
#      * We have the parent and last component. First of all, check 
#      * that we are not asked to creat(2) an obvious directory - that 
#      * will not do. 
#      */  
#     error = -EISDIR;  
#     /*父节点信息*/  
#     if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])  
#         goto exit_parent;  
#   
#     error = -ENFILE;  
#      /*获取文件指针*/  
#     filp = get_empty_filp();  
#     if (filp == NULL)  
#         goto exit_parent;  
#     /*填充nameidata 结构*/  
#     nd.intent.open.file = filp;  
#     nd.intent.open.flags = flag;  
#     nd.intent.open.create_mode = mode;  
#     dir = nd.path.dentry;  
#     nd.flags &= ~LOOKUP_PARENT;  
#     nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;  
#     if (flag & O_EXCL)  
#         nd.flags |= LOOKUP_EXCL;  
#     mutex_lock(&dir->d_inode->i_mutex);  
#     /*从哈希表中查找目的文件对应的dentry,上面路径搜索的是父节点 
#     也就是目的文件的上一层目录，为了得到目的文件的 
#     path结构，我们用nd中的last结构和上一层目录的dentry结构 
#     可以找到*/  
#     path.dentry = lookup_hash(&nd);  
#     path.mnt = nd.path.mnt;  
#     /*到此目标节点的path结构已经找到*/  
# do_last:  
#     error = PTR_ERR(path.dentry);  
#     if (IS_ERR(path.dentry)) {  
#         mutex_unlock(&dir->d_inode->i_mutex);  
#         goto exit;  
#     }  
#   
#     if (IS_ERR(nd.intent.open.file)) {  
#         error = PTR_ERR(nd.intent.open.file);  
#         goto exit_mutex_unlock;  
#     }  
#   
#     /* Negative dentry, just create the file */  
#     /*如果此dentry结构没有对应的inode节点，说明是无效的，应该创建文件节点 */  
#     if (!path.dentry->d_inode) {  
#         /* 
#          * This write is needed to ensure that a 
#          * ro->rw transition does not occur between 
#          * the time when the file is created and when 
#          * a permanent write count is taken through 
#          * the 'struct file' in nameidata_to_filp(). 
#          */  
#          /*write权限是必需的*/  
#         error = mnt_want_write(nd.path.mnt);  
#         if (error)  
#             goto exit_mutex_unlock;  
#         /*按照namei格式的flag open*,主要是创建inode*/  
#         error = __open_namei_create(&nd, &path, flag, mode);  
#         if (error) {  
#             mnt_drop_write(nd.path.mnt);  
#             goto exit;  
#         }  
#         /*根据nameidata 得到相应的file结构*/  
#         filp = nameidata_to_filp(&nd, open_flag);  
#         if (IS_ERR(filp))  
#             ima_counts_put(&nd.path,  
#                        acc_mode & (MAY_READ | MAY_WRITE |  
#                            MAY_EXEC));  
#         /*放弃写权限*/  
#         mnt_drop_write(nd.path.mnt);  
#         if (nd.root.mnt)  
#             path_put(&nd.root);  
#         return filp;  
#     }  
#   
#     /* 
#      * It already exists. 
#      */  
#       /*要打开的文件已经存在*/  
#     mutex_unlock(&dir->d_inode->i_mutex);  
#     /*保存inode节点*/  
#     audit_inode(pathname, path.dentry);  
#   
#     error = -EEXIST;  
#     if (flag & O_EXCL)  
#         goto exit_dput;  
#     /*如果path上安装了文件系统，则依次往下找，直到找到 
#     的文件系统没有安装别的文件系统，更新path结构为 
#     此文件系统的根目录信息*/  
#     if (__follow_mount(&path)) {  
#         error = -ELOOP;  
#         if (flag & O_NOFOLLOW)  
#             goto exit_dput;  
#     }  
#   
#     error = -ENOENT;  
#     if (!path.dentry->d_inode)  
#         goto exit_dput;  
#     if (path.dentry->d_inode->i_op->follow_link)  
#         goto do_link;/*顺次遍历符号链接*/  
#     /*路径转化为相应的nameidata 结构*/  
#     path_to_nameidata(&path, &nd);  
#     error = -EISDIR;  
#     /*如果是文件夹*/  
#     if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))  
#         goto exit;  
#     /*到这里，nd结构中存放的信息已经是最后的目的文件信息*/  
# ok:  
#     /* 
#      * Consider: 
#      * 1. may_open() truncates a file 
#      * 2. a rw->ro mount transition occurs 
#      * 3. nameidata_to_filp() fails due to 
#      *    the ro mount. 
#      * That would be inconsistent, and should 
#      * be avoided. Taking this mnt write here 
#      * ensures that (2) can not occur. 
#      */  
#     will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);  
#     if (will_write) {  
#         error = mnt_want_write(nd.path.mnt);  
#         if (error)  
#             goto exit;  
#     }  
#     /*may_open执行权限检测、文件打开和truncate的操作*/  
#     error = may_open(&nd.path, acc_mode, flag);  
#     if (error) {  
#         if (will_write)  
#             mnt_drop_write(nd.path.mnt);  
#         goto exit;  
#     }  
#     /*将nameidata转化为file*/  
#     filp = nameidata_to_filp(&nd, open_flag);  
#     if (IS_ERR(filp))  
#         ima_counts_put(&nd.path,  
#                    acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));  
#     /* 
#      * It is now safe to drop the mnt write 
#      * because the filp has had a write taken 
#      * on its behalf. 
#      */  
#     if (will_write)  
#         /*释放写权限*/  
#         mnt_drop_write(nd.path.mnt);  
#     if (nd.root.mnt)  
#         /*释放引用计数*/  
#         path_put(&nd.root);  
#     return filp;  
#   
# exit_mutex_unlock:  
#     mutex_unlock(&dir->d_inode->i_mutex);  
# exit_dput:  
#     path_put_conditional(&path, &nd);  
# exit:  
#     if (!IS_ERR(nd.intent.open.file))  
#         release_open_intent(&nd);  
# exit_parent:  
#     if (nd.root.mnt)  
#         path_put(&nd.root);  
#     path_put(&nd.path);  
#     return ERR_PTR(error);  
# /*允许遍历连接文件，则手工找到连接文件对应的文件*/  
# do_link:  
#     error = -ELOOP;  
#     if (flag & O_NOFOLLOW)  
#         goto exit_dput;/*不允许遍历连接文件，返回错误*/  
#     /* 
#      * This is subtle. Instead of calling do_follow_link() we do the 
#      * thing by hands. The reason is that this way we have zero link_count 
#      * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 
#      * After that we have the parent and last component, i.e. 
#      * we are in the same situation as after the first path_walk(). 
#      * Well, almost - if the last component is normal we get its copy 
#      * stored in nd->last.name and we will have to putname() it when we 
#      * are done. Procfs-like symlinks just set LAST_BIND. 
#      */  
#      /*以下是手工找到链接文件对应的文件dentry结构代码 
#           */  
#           /*设置查找LOOKUP_PARENT标志*/  
#     nd.flags |= LOOKUP_PARENT;  
#     /*判断操作是否安全*/  
#     error = security_inode_follow_link(path.dentry, &nd);  
#     if (error)  
#         goto exit_dput;  
#     /*处理符号链接,即路径搜索，结果放入nd中*/  
#     error = __do_follow_link(&path, &nd);  
#     if (error) {  
#         /* Does someone understand code flow here? Or it is only 
#          * me so stupid? Anathema to whoever designed this non-sense 
#          * with "intent.open". 
#          */  
#         release_open_intent(&nd);  
#         if (nd.root.mnt)  
#             path_put(&nd.root);  
#         return ERR_PTR(error);  
#     }  
#     nd.flags &= ~LOOKUP_PARENT;  
#     /*检查最后一段文件或目录名的属性情况*/  
#     if (nd.last_type == LAST_BIND)  
#         goto ok;  
#     error = -EISDIR;  
#     if (nd.last_type != LAST_NORM)  
#         goto exit;  
#     if (nd.last.name[nd.last.len]) {  
#         __putname(nd.last.name);  
#         goto exit;  
#     }  
#     error = -ELOOP;  
#     /*出现回环标志: 循环超过32次*/  
#     if (count++==32) {  
#         __putname(nd.last.name);  
#         goto exit;  
#     }  
#     dir = nd.path.dentry;  
#     mutex_lock(&dir->d_inode->i_mutex);  
#     /*更新路径的挂接点和dentry*/  
#     path.dentry = lookup_hash(&nd);  
#     path.mnt = nd.path.mnt;  
#     __putname(nd.last.name);  
#     goto do_last;  
# }

在内核中要打开一个文件，首先应该找到这个文件，而查找文件的过程在vfs里面是由do_path_lookup或者path_lookup_open函数来完成的，关于文件路径查找在前面已经分析过相关的代码了。这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构，并建立好相应的inode和file结构，将指向file的描述符返回用户。用户随后通过文件描述符，来访问这些数据结构。