在文件读写之前,我们必须先打开文件。从应用程序的角度来看,这是通过标准库的open函数完成的,该函数返回一个文件描述符。内核中是由系统调用sys_open()函数完成。
/*sys_open*/
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
{
long ret;
/*检查是否应该不考虑用户层传递的标志、总是强行设置
O_LARGEFILE标志。如果底层处理器的字长不是32位,就是这种
情况*/
if (force_o_largefile())
flags |= O_LARGEFILE;
/*实际工作*/
ret = do_sys_open(AT_FDCWD, filename, flags, mode);
/* avoid REGPARM breakage on x86: */
asmlinkage_protect(3, ret, filename, flags, mode);
return ret;
}
实际实现工作
long do_sys_open(int dfd, const char __user *filename, int flags, int mode) { /*从进程地址空间读取该文件的路径名*/ char *tmp = getname(filename); int fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { /*在内核中,每个打开的文件由一个文件描述符表示 该描述符在特定于进程的数组中充当位置索引(数组是 task_struct->files->fd_arry),该数组的元素包含了file结构,其中 包括每个打开文件的所有必要信息。因此,调用下面 函数查找一个未使用的文件描述符,返回的是上面 说的数组的下标*/ fd = get_unused_fd_flags(flags); if (fd >= 0) { /*fd获取成功则开始打开文件,此函数是主要完成打开功能的函数*/ struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); } else { fsnotify_open(f->f_path.dentry); fd_install(fd, f); } } putname(tmp); } return fd; }
打开文件主体实现
/* * Note that the low bits of the passed in "open_flag" * are not the same as in the local variable "flag". See * open_to_namei_flags() for more details. */ struct file *do_filp_open(int dfd, const char *pathname, int open_flag, int mode, int acc_mode) { struct file *filp; struct nameidata nd; int error; struct path path; struct dentry *dir; int count = 0; int will_write; /*改变参数flag的值,具体做法是flag+1*/ int flag = open_to_namei_flags(open_flag); /*设置访问权限*/ if (!acc_mode) acc_mode = MAY_OPEN | ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ /*根据 O_TRUNC标志设置写权限 */ if (flag & O_TRUNC) acc_mode |= MAY_WRITE; /* Allow the LSM permission hook to distinguish append access from general write access. */ /* 设置O_APPEND 标志*/ if (flag & O_APPEND) acc_mode |= MAY_APPEND; /* * The simplest case - just a plain lookup. */ /*如果不是创建文件*/ if (!(flag & O_CREAT)) { /*当内核要访问一个文件的时候,第一步要做的是找到这个文件, 而查找文件的过程在vfs里面是由path_lookup或者path_lookup_open函数来完成的。 这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构, 并建立好相应的inode和file结构,将指向file的描述符返回用户。用户随后 通过文件描述符,来访问这些数据结构*/ error = path_lookup_open(dfd, pathname, lookup_flags(flag), &nd, flag); if (error) return ERR_PTR(error); goto ok;/*跳过下面的创建部分*/ } /* * Create - we need to know the parent. */ /*到此则是要创建文件*/ /* path-init为查找作准备工作,path_walk真正上路查找, 这两个函数联合起来根据一段路径名找到对应的dentry */ error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); if (error) return ERR_PTR(error); error = path_walk(pathname, &nd); if (error) { if (nd.root.mnt) path_put(&nd.root); return ERR_PTR(error); } if (unlikely(!audit_dummy_context())) /*保存inode节点信息*/ audit_inode(pathname, nd.path.dentry); /* * We have the parent and last component. First of all, check * that we are not asked to creat(2) an obvious directory - that * will not do. */ error = -EISDIR; /*父节点信息*/ if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) goto exit_parent; error = -ENFILE; /*获取文件指针*/ filp = get_empty_filp(); if (filp == NULL) goto exit_parent; /*填充nameidata 结构*/ nd.intent.open.file = filp; nd.intent.open.flags = flag; nd.intent.open.create_mode = mode; dir = nd.path.dentry; nd.flags &= ~LOOKUP_PARENT; nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN; if (flag & O_EXCL) nd.flags |= LOOKUP_EXCL; mutex_lock(&dir->d_inode->i_mutex); /*从哈希表中查找目的文件对应的dentry,上面路径搜索的是父节点 也就是目的文件的上一层目录,为了得到目的文件的 path结构,我们用nd中的last结构和上一层目录的dentry结构 可以找到*/ path.dentry = lookup_hash(&nd); path.mnt = nd.path.mnt; /*到此目标节点的path结构已经找到*/ do_last: error = PTR_ERR(path.dentry); if (IS_ERR(path.dentry)) { mutex_unlock(&dir->d_inode->i_mutex); goto exit; } if (IS_ERR(nd.intent.open.file)) { error = PTR_ERR(nd.intent.open.file); goto exit_mutex_unlock; } /* Negative dentry, just create the file */ /*如果此dentry结构没有对应的inode节点,说明是无效的,应该创建文件节点 */ if (!path.dentry->d_inode) { /* * This write is needed to ensure that a * ro->rw transition does not occur between * the time when the file is created and when * a permanent write count is taken through * the 'struct file' in nameidata_to_filp(). */ /*write权限是必需的*/ error = mnt_want_write(nd.path.mnt); if (error) goto exit_mutex_unlock; /*按照namei格式的flag open*,主要是创建inode*/ error = __open_namei_create(&nd, &path, flag, mode); if (error) { mnt_drop_write(nd.path.mnt); goto exit; } /*根据nameidata 得到相应的file结构*/ filp = nameidata_to_filp(&nd, open_flag); if (IS_ERR(filp)) ima_counts_put(&nd.path, acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); /*放弃写权限*/ mnt_drop_write(nd.path.mnt); if (nd.root.mnt) path_put(&nd.root); return filp; } /* * It already exists. */ /*要打开的文件已经存在*/ mutex_unlock(&dir->d_inode->i_mutex); /*保存inode节点*/ audit_inode(pathname, path.dentry); error = -EEXIST; if (flag & O_EXCL) goto exit_dput; /*如果path上安装了文件系统,则依次往下找,直到找到 的文件系统没有安装别的文件系统,更新path结构为 此文件系统的根目录信息*/ if (__follow_mount(&path)) { error = -ELOOP; if (flag & O_NOFOLLOW) goto exit_dput; } error = -ENOENT; if (!path.dentry->d_inode) goto exit_dput; if (path.dentry->d_inode->i_op->follow_link) goto do_link;/*顺次遍历符号链接*/ /*路径转化为相应的nameidata 结构*/ path_to_nameidata(&path, &nd); error = -EISDIR; /*如果是文件夹*/ if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) goto exit; /*到这里,nd结构中存放的信息已经是最后的目的文件信息*/ ok: /* * Consider: * 1. may_open() truncates a file * 2. a rw->ro mount transition occurs * 3. nameidata_to_filp() fails due to * the ro mount. * That would be inconsistent, and should * be avoided. Taking this mnt write here * ensures that (2) can not occur. */ will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); if (will_write) { error = mnt_want_write(nd.path.mnt); if (error) goto exit; } /*may_open执行权限检测、文件打开和truncate的操作*/ error = may_open(&nd.path, acc_mode, flag); if (error) { if (will_write) mnt_drop_write(nd.path.mnt); goto exit; } /*将nameidata转化为file*/ filp = nameidata_to_filp(&nd, open_flag); if (IS_ERR(filp)) ima_counts_put(&nd.path, acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); /* * It is now safe to drop the mnt write * because the filp has had a write taken * on its behalf. */ if (will_write) /*释放写权限*/ mnt_drop_write(nd.path.mnt); if (nd.root.mnt) /*释放引用计数*/ path_put(&nd.root); return filp; exit_mutex_unlock: mutex_unlock(&dir->d_inode->i_mutex); exit_dput: path_put_conditional(&path, &nd); exit: if (!IS_ERR(nd.intent.open.file)) release_open_intent(&nd); exit_parent: if (nd.root.mnt) path_put(&nd.root); path_put(&nd.path); return ERR_PTR(error); /*允许遍历连接文件,则手工找到连接文件对应的文件*/ do_link: error = -ELOOP; if (flag & O_NOFOLLOW) goto exit_dput;/*不允许遍历连接文件,返回错误*/ /* * This is subtle. Instead of calling do_follow_link() we do the * thing by hands. The reason is that this way we have zero link_count * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. * After that we have the parent and last component, i.e. * we are in the same situation as after the first path_walk(). * Well, almost - if the last component is normal we get its copy * stored in nd->last.name and we will have to putname() it when we * are done. Procfs-like symlinks just set LAST_BIND. */ /*以下是手工找到链接文件对应的文件dentry结构代码 */ /*设置查找LOOKUP_PARENT标志*/ nd.flags |= LOOKUP_PARENT; /*判断操作是否安全*/ error = security_inode_follow_link(path.dentry, &nd); if (error) goto exit_dput; /*处理符号链接,即路径搜索,结果放入nd中*/ error = __do_follow_link(&path, &nd); if (error) { /* Does someone understand code flow here? Or it is only * me so stupid? Anathema to whoever designed this non-sense * with "intent.open". */ release_open_intent(&nd); if (nd.root.mnt) path_put(&nd.root); return ERR_PTR(error); } nd.flags &= ~LOOKUP_PARENT; /*检查最后一段文件或目录名的属性情况*/ if (nd.last_type == LAST_BIND) goto ok; error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit; if (nd.last.name[nd.last.len]) { __putname(nd.last.name); goto exit; } error = -ELOOP; /*出现回环标志: 循环超过32次*/ if (count++==32) { __putname(nd.last.name); goto exit; } dir = nd.path.dentry; mutex_lock(&dir->d_inode->i_mutex); /*更新路径的挂接点和dentry*/ path.dentry = lookup_hash(&nd); path.mnt = nd.path.mnt; __putname(nd.last.name); goto do_last; }
在内核中要打开一个文件,首先应该找到这个文件,而查找文件的过程在vfs里面是由do_path_lookup或者path_lookup_open函数来完成的,关于文件路径查找在前面已经分析过相关的代码了。这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构,并建立好相应的inode和file结构,将指向file的描述符返回用户。用户随后通过文件描述符,来访问这些数据结构。