在文件读写之前,我们必须先打开文件。从应用程序的角度来看,这是通过标准库的open函数完成的,该函数返回一个文件描述符。内核中是由系统调用sys_open()函数完成。
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
{
long ret;
if (force_o_largefile())
flags |= O_LARGEFILE;
ret = do_sys_open(AT_FDCWD, filename, flags, mode);
/* avoid REGPARM breakage on x86: */
asmlinkage_protect(3, ret, filename, flags, mode);
return ret;
}
实际实现工作
# long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
# {
# /*从进程地址空间读取该文件的路径名*/
# char *tmp = getname(filename);
# int fd = PTR_ERR(tmp);
#
# if (!IS_ERR(tmp)) {
# /*在内核中,每个打开的文件由一个文件描述符表示
# 该描述符在特定于进程的数组中充当位置索引(数组是
# task_struct->files->fd_arry),该数组的元素包含了file结构,其中
# 包括每个打开文件的所有必要信息。因此,调用下面
# 函数查找一个未使用的文件描述符,返回的是上面
# 说的数组的下标*/
# fd = get_unused_fd_flags(flags);
# if (fd >= 0) {
# /*fd获取成功则开始打开文件,此函数是主要完成打开功能的函数*/
# struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
# if (IS_ERR(f)) {
# put_unused_fd(fd);
# fd = PTR_ERR(f);
# } else {
# fsnotify_open(f->f_path.dentry);
# fd_install(fd, f);
# }
# }
# putname(tmp);
# }
# return fd;
# }
打开文件主体实现
# struct file *do_filp_open(int dfd, const char *pathname,
# int open_flag, int mode, int acc_mode)
# {
# struct file *filp;
# struct nameidata nd;
# int error;
# struct path path;
# struct dentry *dir;
# int count = 0;
# int will_write;
# /*改变参数flag的值,具体做法是flag+1*/
# int flag = open_to_namei_flags(open_flag);
# /*设置访问权限*/
# if (!acc_mode)
# acc_mode = MAY_OPEN | ACC_MODE(flag);
#
# /* O_TRUNC implies we need access checks for write permissions */
#
# /*根据 O_TRUNC标志设置写权限 */
# if (flag & O_TRUNC)
# acc_mode |= MAY_WRITE;
#
# /* Allow the LSM permission hook to distinguish append
# access from general write access. */
# /* 设置O_APPEND 标志*/
# if (flag & O_APPEND)
# acc_mode |= MAY_APPEND;
#
# /*
# * The simplest case - just a plain lookup.
# */
# /*如果不是创建文件*/
# if (!(flag & O_CREAT)) {
# /*当内核要访问一个文件的时候,第一步要做的是找到这个文件,
# 而查找文件的过程在vfs里面是由path_lookup或者path_lookup_open函数来完成的。
# 这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构,
# 并建立好相应的inode和file结构,将指向file的描述符返回用户。用户随后
# 通过文件描述符,来访问这些数据结构*/
# error = path_lookup_open(dfd, pathname, lookup_flags(flag),
# &nd, flag);
# if (error)
# return ERR_PTR(error);
# goto ok;/*跳过下面的创建部分*/
# }
#
# /*
# * Create - we need to know the parent.
# */
# /*到此则是要创建文件*/
# /* path-init为查找作准备工作,path_walk真正上路查找,
# 这两个函数联合起来根据一段路径名找到对应的dentry */
# error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
# if (error)
# return ERR_PTR(error);
# error = path_walk(pathname, &nd);
# if (error) {
# if (nd.root.mnt)
# path_put(&nd.root);
# return ERR_PTR(error);
# }
# if (unlikely(!audit_dummy_context()))
# /*保存inode节点信息*/
# audit_inode(pathname, nd.path.dentry);
#
# /*
# * We have the parent and last component. First of all, check
# * that we are not asked to creat(2) an obvious directory - that
# * will not do.
# */
# error = -EISDIR;
# /*父节点信息*/
# if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
# goto exit_parent;
#
# error = -ENFILE;
# /*获取文件指针*/
# filp = get_empty_filp();
# if (filp == NULL)
# goto exit_parent;
# /*填充nameidata 结构*/
# nd.intent.open.file = filp;
# nd.intent.open.flags = flag;
# nd.intent.open.create_mode = mode;
# dir = nd.path.dentry;
# nd.flags &= ~LOOKUP_PARENT;
# nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
# if (flag & O_EXCL)
# nd.flags |= LOOKUP_EXCL;
# mutex_lock(&dir->d_inode->i_mutex);
# /*从哈希表中查找目的文件对应的dentry,上面路径搜索的是父节点
# 也就是目的文件的上一层目录,为了得到目的文件的
# path结构,我们用nd中的last结构和上一层目录的dentry结构
# 可以找到*/
# path.dentry = lookup_hash(&nd);
# path.mnt = nd.path.mnt;
# /*到此目标节点的path结构已经找到*/
# do_last:
# error = PTR_ERR(path.dentry);
# if (IS_ERR(path.dentry)) {
# mutex_unlock(&dir->d_inode->i_mutex);
# goto exit;
# }
#
# if (IS_ERR(nd.intent.open.file)) {
# error = PTR_ERR(nd.intent.open.file);
# goto exit_mutex_unlock;
# }
#
# /* Negative dentry, just create the file */
# /*如果此dentry结构没有对应的inode节点,说明是无效的,应该创建文件节点 */
# if (!path.dentry->d_inode) {
# /*
# * This write is needed to ensure that a
# * ro->rw transition does not occur between
# * the time when the file is created and when
# * a permanent write count is taken through
# * the 'struct file' in nameidata_to_filp().
# */
# /*write权限是必需的*/
# error = mnt_want_write(nd.path.mnt);
# if (error)
# goto exit_mutex_unlock;
# /*按照namei格式的flag open*,主要是创建inode*/
# error = __open_namei_create(&nd, &path, flag, mode);
# if (error) {
# mnt_drop_write(nd.path.mnt);
# goto exit;
# }
# /*根据nameidata 得到相应的file结构*/
# filp = nameidata_to_filp(&nd, open_flag);
# if (IS_ERR(filp))
# ima_counts_put(&nd.path,
# acc_mode & (MAY_READ | MAY_WRITE |
# MAY_EXEC));
# /*放弃写权限*/
# mnt_drop_write(nd.path.mnt);
# if (nd.root.mnt)
# path_put(&nd.root);
# return filp;
# }
#
# /*
# * It already exists.
# */
# /*要打开的文件已经存在*/
# mutex_unlock(&dir->d_inode->i_mutex);
# /*保存inode节点*/
# audit_inode(pathname, path.dentry);
#
# error = -EEXIST;
# if (flag & O_EXCL)
# goto exit_dput;
# /*如果path上安装了文件系统,则依次往下找,直到找到
# 的文件系统没有安装别的文件系统,更新path结构为
# 此文件系统的根目录信息*/
# if (__follow_mount(&path)) {
# error = -ELOOP;
# if (flag & O_NOFOLLOW)
# goto exit_dput;
# }
#
# error = -ENOENT;
# if (!path.dentry->d_inode)
# goto exit_dput;
# if (path.dentry->d_inode->i_op->follow_link)
# goto do_link;/*顺次遍历符号链接*/
# /*路径转化为相应的nameidata 结构*/
# path_to_nameidata(&path, &nd);
# error = -EISDIR;
# /*如果是文件夹*/
# if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
# goto exit;
# /*到这里,nd结构中存放的信息已经是最后的目的文件信息*/
# ok:
# /*
# * Consider:
# * 1. may_open() truncates a file
# * 2. a rw->ro mount transition occurs
# * 3. nameidata_to_filp() fails due to
# * the ro mount.
# * That would be inconsistent, and should
# * be avoided. Taking this mnt write here
# * ensures that (2) can not occur.
# */
# will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
# if (will_write) {
# error = mnt_want_write(nd.path.mnt);
# if (error)
# goto exit;
# }
# /*may_open执行权限检测、文件打开和truncate的操作*/
# error = may_open(&nd.path, acc_mode, flag);
# if (error) {
# if (will_write)
# mnt_drop_write(nd.path.mnt);
# goto exit;
# }
# /*将nameidata转化为file*/
# filp = nameidata_to_filp(&nd, open_flag);
# if (IS_ERR(filp))
# ima_counts_put(&nd.path,
# acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
# /*
# * It is now safe to drop the mnt write
# * because the filp has had a write taken
# * on its behalf.
# */
# if (will_write)
# /*释放写权限*/
# mnt_drop_write(nd.path.mnt);
# if (nd.root.mnt)
# /*释放引用计数*/
# path_put(&nd.root);
# return filp;
#
# exit_mutex_unlock:
# mutex_unlock(&dir->d_inode->i_mutex);
# exit_dput:
# path_put_conditional(&path, &nd);
# exit:
# if (!IS_ERR(nd.intent.open.file))
# release_open_intent(&nd);
# exit_parent:
# if (nd.root.mnt)
# path_put(&nd.root);
# path_put(&nd.path);
# return ERR_PTR(error);
# /*允许遍历连接文件,则手工找到连接文件对应的文件*/
# do_link:
# error = -ELOOP;
# if (flag & O_NOFOLLOW)
# goto exit_dput;/*不允许遍历连接文件,返回错误*/
# /*
# * This is subtle. Instead of calling do_follow_link() we do the
# * thing by hands. The reason is that this way we have zero link_count
# * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
# * After that we have the parent and last component, i.e.
# * we are in the same situation as after the first path_walk().
# * Well, almost - if the last component is normal we get its copy
# * stored in nd->last.name and we will have to putname() it when we
# * are done. Procfs-like symlinks just set LAST_BIND.
# */
# /*以下是手工找到链接文件对应的文件dentry结构代码
# */
# /*设置查找LOOKUP_PARENT标志*/
# nd.flags |= LOOKUP_PARENT;
# /*判断操作是否安全*/
# error = security_inode_follow_link(path.dentry, &nd);
# if (error)
# goto exit_dput;
# /*处理符号链接,即路径搜索,结果放入nd中*/
# error = __do_follow_link(&path, &nd);
# if (error) {
# /* Does someone understand code flow here? Or it is only
# * me so stupid? Anathema to whoever designed this non-sense
# * with "intent.open".
# */
# release_open_intent(&nd);
# if (nd.root.mnt)
# path_put(&nd.root);
# return ERR_PTR(error);
# }
# nd.flags &= ~LOOKUP_PARENT;
# /*检查最后一段文件或目录名的属性情况*/
# if (nd.last_type == LAST_BIND)
# goto ok;
# error = -EISDIR;
# if (nd.last_type != LAST_NORM)
# goto exit;
# if (nd.last.name[nd.last.len]) {
# __putname(nd.last.name);
# goto exit;
# }
# error = -ELOOP;
# /*出现回环标志: 循环超过32次*/
# if (count++==32) {
# __putname(nd.last.name);
# goto exit;
# }
# dir = nd.path.dentry;
# mutex_lock(&dir->d_inode->i_mutex);
# /*更新路径的挂接点和dentry*/
# path.dentry = lookup_hash(&nd);
# path.mnt = nd.path.mnt;
# __putname(nd.last.name);
# goto do_last;
# }
在内核中要打开一个文件,首先应该找到这个文件,而查找文件的过程在vfs里面是由do_path_lookup或者path_lookup_open函数来完成的,关于文件路径查找在前面已经分析过相关的代码了。这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构,并建立好相应的inode和file结构,将指向file的描述符返回用户。用户随后通过文件描述符,来访问这些数据结构。