linux设备驱动模型一字符设备open系统调用流程

最新推荐文章于 2024-08-01 23:19:08 发布

转载最新推荐文章于 2024-08-01 23:19:08 发布 · 1.9k 阅读

Linux 专栏收录该内容

103 篇文章

订阅专栏

本文深入剖析Linux系统中open系统调用的工作原理，详细解释了如何从用户态拷贝文件名到内核态，分配文件描述符，完成文件打开操作，以及将打开的文件添加到进程文件表的过程。

转自：http://blog.csdn.net/new_abc/article/details/7607731

从前面的例子可以看到，我们在操作一个调和时都是通过open系统调用先去打开这个设备，不管是设备还是文件，我们要访问它都要称通过open函数来先打开，这样才能调用其它的函数如read、write来操作它,即通知内核新建一个代表该文件的结构，并且返回该文件的描述符(一个整数)，该描述符在进程内唯一。

在linux系统进程中，分为内核空间和用户空间，当一个任务（进程）执行系统调用而陷入内核代码中执行时，我们就称进程处于内核运行态（内核态）。在内核态下，CPU可执行任何指令。当进程在执行用户自己的代码时，则称其处于用户运行态（用户态）。用户态不能访问内核空间，包括代码和数据。所有进程的内核空间（3G－4G）都是共享的。当我们在用户空间调用open之后，会产生一个软中断，然后通过系统调用进入内核空间。通过系统调用号，我们就可以跳转到该中断例程的入口地址。

这里分析一下open的调用流程.

open()的格式如下：

int open(const char * pathname,int oflag, mode_t mode )
pathname:代表需要打开的文件的文件名；
oflag：表示打开的标识，具体的内核支持如下标记位(include\asm-generic\fcntl.h)：

[cpp]view plaincopy 
   
print?
 #define O_ACCMODE   00000003  
 #define O_RDONLY    00000000  //只读打开  
 #define O_WRONLY    00000001  //只写打开  
 #define O_RDWR      00000002  //读写打开  
 #ifndef O_CREAT  
 #define O_CREAT     00000100    //文件不存在则创建，需要mode_t  
 #endif  
 #ifndef O_EXCL  
 #define O_EXCL      00000200    //如果同时指定了O_CREAT，而文件已经存在，则出错   
 #endif  
 #ifndef O_NOCTTY  
 #define O_NOCTTY    00000400    //如果pathname代表终端设备，则不将此设备分配作为此进程的控制终端  
 #endif  
 #ifndef O_TRUNC  
 #define O_TRUNC     00001000    //如果此文件存在，而且为只读或只写成功打开，则将其长度截短为0   
 #endif  
 #ifndef O_APPEND  
 #define O_APPEND    00002000    //每次写时都加到文件的尾端  
 #endif  
 #ifndef O_NONBLOCK  
 #define O_NONBLOCK  00004000     //如果pathname指的是一个FIFO、一个块特殊文件或一个字符特殊文件，则此选择项为此文件的本次打开操作和后续的I / O操作设置非阻塞  
 #endif  
 #ifndef O_SYNC  
 #define O_SYNC      00010000    //使每次write都等到物理I/O操作完成  
 #endif  
 #ifndef FASYNC  
 #define FASYNC      00020000    //兼容BSD的fcntl同步操作  
 #endif  
 #ifndef O_DIRECT  
 #define O_DIRECT    00040000    //直接磁盘操作标识，每次读写都不使用内核提供的缓存，直接读写磁盘设备  
 #endif  
 #ifndef O_LARGEFILE  
 #define O_LARGEFILE 00100000    // 大文件标识  
 #endif  
 #ifndef O_DIRECTORY  
 #define O_DIRECTORY 00200000    //必须是目录  
 #endif  
 #ifndef O_NOFOLLOW  
 #define O_NOFOLLOW  00400000    //不获取连接文件  
 #endif  
 #ifndef O_NOATIME  
 #define O_NOATIME   01000000  
 #endif  
 #ifndef O_CLOEXEC  
 #define O_CLOEXEC   02000000    /* set close_on_exec */  
 #endif  
 #ifndef O_NDELAY  
 #define O_NDELAY    O_NONBLOCK  
 #endif  

当新创建一个文件时，需要指定mode参数，以下说明的格式如宏定义名称<实际常数值>:
描述如下(include\linux\stat.h)：

[cpp]view plaincopy 
   
print?
 #define S_IRWXU 00700   //文件拥有者有读写执行权限  
 #define S_IRUSR 00400   //文件拥有者仅有读权限      
 #define S_IWUSR 00200   //文件拥有者仅有写权限  
 #define S_IXUSR 00100   //文件拥有者仅有执行权限  
   
 #define S_IRWXG 00070   //组用户有读写执行权限  
 #define S_IRGRP 00040   //组用户仅有读权限  
 #define S_IWGRP 00020   //组用户仅有写权限  
 #define S_IXGRP 00010   //组用户仅有执行权限  
   
 #define S_IRWXO 00007   //其他用户有读写执行权限  
 #define S_IROTH 00004   //其他用户仅有读权限  
 #define S_IWOTH 00002   //其他用户仅有写权限  
 #define S_IXOTH 00001   //其他用户仅有执行权限  

系统调用号定义在arch/x86/include/asm/unistd_32.h中：

[cpp]view plaincopy 
   
print?
 #define __NR_restart_syscall      0  
 #define __NR_exit         1  
 #define __NR_fork         2  
 #define __NR_read         3  
 #define __NR_write        4  
 #define __NR_open         5  
 #define __NR_close        6  
 #define __NR_waitpid          7  
 #define __NR_creat        8  
 #define __NR_link         9  
 #define __NR_unlink      10  
 #define __NR_execve      11  
 #define __NR_chdir       12  
 #define __NR_time        13  
 #define __NR_mknod       14  
 #define __NR_chmod       15  

当open系统调用产生时，就会进入下面这个函数（）：

[cpp]view plaincopy 
   
print?
 SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)  
 {  
     long ret;  
   
     /*检查是否应该不考虑用户层传递的标志、总是强行设置  
     O_LARGEFILE标志。如果底层处理器的字长不是32位，就是这种  
     情况*/   
     if (force_o_largefile())  
         flags |= O_LARGEFILE;  
      /*实际工作*/   
     ret = do_sys_open(AT_FDCWD, filename, flags, mode);  
     /* avoid REGPARM breakage on x86: */  
     asmlinkage_protect(3, ret, filename, flags, mode);  
     return ret;  
 }  

我们看下SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode) 展开是怎么样的

首先看下宏SYSCALL_DEFINE3

[cpp]view plaincopy 
   
print?
 #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)  

再看下SYSCALL_DEFINEx

[cpp]view plaincopy 
   
print?
 #define SYSCALL_DEFINEx(x, sname, ...)              \  
     __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)  

再看下__SYSCALL_DEFINEx

[cpp]view plaincopy 
   
print?
 #define __SYSCALL_DEFINEx(x, name, ...)                 \  
     asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))  

这里对对应__SC_DECL3

[cpp]view plaincopy 
   
print?
 #define __SC_DECL1(t1, a1)  t1 a1  
 #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)  
 #define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)  

这们一步步展开SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)代替进去，可以得到

[cpp]view plaincopy 
   
print?
 SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)  
  = SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)   
 =asmlinkage long sys_open(__SC_DECL3(__VA_ARGS__))  
 =asmlinkage long sys_open(const char __user* filename, int flags, int mode)  

这个才是真正的函数原型

在sys_open里面继续调用do_sys_open完成 open操作

[cpp]view plaincopy 
   
print?
 long do_sys_open(int dfd, const char __user *filename, int flags, int mode)  
 {  
      /*从进程地址空间读取该文件的路径名*/    
     char *tmp = getname(filename);  
     int fd = PTR_ERR(tmp);  
   
     if (!IS_ERR(tmp)) {  
          /*在内核中，每个打开的文件由一个文件描述符表示  
         该描述符在特定于进程的数组中充当位置索引(数组是  
         task_struct->files->fd_arry)，该数组的元素包含了file结构，其中  
         包括每个打开文件的所有必要信息。因此，调用下面  
         函数查找一个未使用的文件描述符,返回的是上面  
         说的数组的下标*/    
         fd = get_unused_fd_flags(flags);  
         if (fd >= 0) {  
             /*fd获取成功则开始打开文件，此函数是主要完成打开功能的函数*/  
             //如果分配fd成功，则创建一个file对象  
             struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);  
             if (IS_ERR(f)) {  
                 put_unused_fd(fd);  
                 fd = PTR_ERR(f);  
             } else {  
                 /*文件如果打开成功，调用fsnotify_open()函数，根据inode所指定的信息进行打开 
             函数（参数为f）将该文件加入到文件监控的系统中。该系统是用来监控文件被打开，创建， 
             读写，关闭，修改等操作的*/   
                 fsnotify_open(f->f_path.dentry);  
                 /*将文件指针安装在fd数组中 
             将struct file *f加入到fd索引位置处的数组中。如果后续过程中，有对该文件描述符的 
             操作的话，就会通过查找该数组得到对应的文件结构，而后在进行相关操作。*/  
                 fd_install(fd, f);  
             }  
         }  
         putname(tmp);  
     }  
     return fd;  
 }  

该函数主要分为如下几个步骤来完成打开文件的操作：
1.将文件名参数从用户态拷贝至内核，调用函数get_name()；
2.从进程的文件表中找到一个空闲的文件表指针，调用了函数get_unused_fd_flgas()；
3.完成真正的打开操作，调用函数do_filp_open()；
4.将打开的文件添加到进程的文件表数组中，调用函数fd_install()；

getname函数主要的任务是将文件名filename从用户态拷贝至内核态

[cpp]view plaincopy 
   
print?
 char * getname(const char __user * filename)  
 {  
     char *tmp, *result;  
   
     result = ERR_PTR(-ENOMEM);  
     tmp = __getname(); //从内核缓存中分配空间；    
     if (tmp)  {  
         //将文件名从用户态拷贝至内核态；   
         int retval = do_getname(filename, tmp);  
   
         result = tmp;  
         if (retval < 0) {//如果拷贝失败，则调用__putname()释放__getname()中申请的空间；   
             __putname(tmp);  
             result = ERR_PTR(retval);  
         }  
     }  
     audit_getname(result);  
     return result;  
 }  

get_unused_fd_flags实际调用的是alloc_fd

[cpp]view plaincopy 
   
print?
 #define get_unused_fd_flags(flags) alloc_fd(0, (flags))  

[cpp]view plaincopy 
   
print?
 /* 
  * allocate a file descriptor, mark it busy. 
  */  
 int alloc_fd(unsigned start, unsigned flags)  
 {  
     struct files_struct *files = current->files;//获得当前进程的files_struct 结构  
     unsigned int fd;  
     int error;  
     struct fdtable *fdt;  
   
     spin_lock(&files->file_lock);  
 repeat:  
     fdt = files_fdtable(files);  
     fd = start;  
     if (fd < files->next_fd) //从上一次打开的fd的下一个fd开始搜索空闲的fd    
         fd = files->next_fd;  
   
     if (fd < fdt->max_fds)//寻找空闲的fd，返回值为空闲的fd    
         fd = find_next_zero_bit(fdt->open_fds->fds_bits,  
                        fdt->max_fds, fd);  
     //如果有必要，即打开的fd超过max_fds,则需要expand当前进程的fd表；    
     //返回值error<0表示出错，error=0表示无需expand，error=1表示进行了expand；  
     error = expand_files(files, fd);  
     if (error < 0)  
         goto out;  
   
     /* 
      * If we needed to expand the fs array we 
      * might have blocked - try again. 
      */  
      //error=1表示进行了expand，那么此时需要重新去查找空闲的fd；    
     if (error)  
         goto repeat;  
   
     //设置下一次查找的起始fd，即本次找到的空闲的fd的下一个fd，记录在files->next_fd中；    
     if (start <= files->next_fd)  
         files->next_fd = fd + 1;  
   
     FD_SET(fd, fdt->open_fds);  
     if (flags & O_CLOEXEC)  
         FD_SET(fd, fdt->close_on_exec);  
     else  
         FD_CLR(fd, fdt->close_on_exec);  
     error = fd;  
 #if 1  
     /* Sanity check */  
     if (rcu_dereference(fdt->fd[fd]) != NULL) {  
         printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);  
         rcu_assign_pointer(fdt->fd[fd], NULL);  
     }  
 #endif  
   
 out:  
     spin_unlock(&files->file_lock);  
     return error;  
 }  

该函数为需要打开的文件在当前进程内分配一个空闲的文件描述符fd，该fd就是open()系统调用的返回值

do_filp_open函数的一个重要作用就是根据传递近来的权限进行分析，并且分析传递近来的路径名字，根据路径名逐个解析成dentry，并且通过dentry找到inode，inode就是记录着该文件相关的信息，包括文件的创建时间和文件属性所有者等等信息，根据这些信息就可以找到对应的文件操作方法。在这个过程当中有一个临时的结构体用于保存在查找过程中的相关信息，就是

[cpp]view plaincopy 
   
print?
 struct nameidata {  
     struct path path;//当前目录的dentry数据结构  
     struct qstr last;//这个结构体也是临时性的，主要用来保存当前目录的名称，杂凑值。  
     unsigned int    flags;  
     int     last_type;  
     unsigned    depth;//连接文件的深度（可能一个连接文件跟到最后还是一个了连接文件）  
     //用来保存连接文件的一些信息，下标表示连接文件的深度  
     char *saved_names[MAX_NESTED_LINKS + 1];  
   
     /* Intent data */  
     union {  
         struct open_intent open;  
     } intent;  
 };  

[cpp]view plaincopy 
   
print?
 struct file *do_filp_open(int dfd, const char *pathname,  
         int open_flag, int mode, int acc_mode)  
 {  
     struct file *filp;  
     struct nameidata nd;  
     int error;  
     struct path path;  
     int count = 0;  
     int flag = open_to_namei_flags(open_flag); /*改变参数flag的值，具体做法是flag+1*/   
     int force_reval = 0;  
   
     if (!(open_flag & O_CREAT))  
         mode = 0;  
   
     /* 
      * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only 
      * check for O_DSYNC if the need any syncing at all we enforce it's 
      * always set instead of having to deal with possibly weird behaviour 
      * for malicious applications setting only __O_SYNC. 
      */  
     if (open_flag & __O_SYNC)/*根据__O_SYNC标志来设置O_DSYNC 标志，用以防止恶意破坏程序*/   
         open_flag |= O_DSYNC;  
   
     if (!acc_mode)/*设置访问权限*/   
         acc_mode = MAY_OPEN | ACC_MODE(open_flag);  
   
     /* O_TRUNC implies we need access checks for write permissions */  
     if (open_flag & O_TRUNC)/*根据 O_TRUNC标志设置写权限 */   
         acc_mode |= MAY_WRITE;  
   
     /* Allow the LSM permission hook to distinguish append  
        access from general write access. */  
     if (open_flag & O_APPEND)/* 设置O_APPEND 标志*/    
         acc_mode |= MAY_APPEND;  
   
     /* find the parent */  
 reval:  
     error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);//初始化nd  
     if (error)  
         return ERR_PTR(error);  
     if (force_reval)  
         nd.flags |= LOOKUP_REVAL;  
   
     current->total_link_count = 0;  
     error = link_path_walk(pathname, &nd);//路径名解析函数，将一个路径名最终转化为一个dentry  
     if (error) {  
         filp = ERR_PTR(error);  
         goto out;  
     }  
     if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))  
         audit_inode(pathname, nd.path.dentry);  
   
     /* 
      * We have the parent and last component. 
      */  
   
     error = -ENFILE;  
     filp = get_empty_filp();// 从进程文件表中获取一个未使用的文件结构指针，空则出错返回  
     if (filp == NULL)  
         goto exit_parent;  
     nd.intent.open.file = filp;  
     filp->f_flags = open_flag;  
     nd.intent.open.flags = flag;  
     nd.intent.open.create_mode = mode;  
     nd.flags &= ~LOOKUP_PARENT;  
     nd.flags |= LOOKUP_OPEN;  
     if (open_flag & O_CREAT) {  
         nd.flags |= LOOKUP_CREATE;  
         if (open_flag & O_EXCL)  
             nd.flags |= LOOKUP_EXCL;  
     }  
     if (open_flag & O_DIRECTORY)  
         nd.flags |= LOOKUP_DIRECTORY;  
     if (!(open_flag & O_NOFOLLOW))  
         nd.flags |= LOOKUP_FOLLOW;  
     filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);//返回一个file结构  
     while (unlikely(!filp)) { /* trailing symlink *///符号链接  
         struct path holder;  
         struct inode *inode = path.dentry->d_inode;  
         void *cookie;  
         error = -ELOOP;  
         /* S_ISDIR part is a temporary automount kludge */  
         if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))  
             goto exit_dput;  
         if (count++ == 32)  
             goto exit_dput;  
         /* 
          * This is subtle. Instead of calling do_follow_link() we do 
          * the thing by hands. The reason is that this way we have zero 
          * link_count and path_walk() (called from ->follow_link) 
          * honoring LOOKUP_PARENT.  After that we have the parent and 
          * last component, i.e. we are in the same situation as after 
          * the first path_walk().  Well, almost - if the last component 
          * is normal we get its copy stored in nd->last.name and we will 
          * have to putname() it when we are done. Procfs-like symlinks 
          * just set LAST_BIND. 
          */  
         nd.flags |= LOOKUP_PARENT;  
         error = security_inode_follow_link(path.dentry, &nd);  
         if (error)  
             goto exit_dput;  
         error = __do_follow_link(&path, &nd, &cookie);//查找符号链接对应的目录中的最后一项  
         if (unlikely(error)) {  
             /* nd.path had been dropped */  
             if (!IS_ERR(cookie) && inode->i_op->put_link)  
                 inode->i_op->put_link(path.dentry, &nd, cookie);  
             path_put(&path);  
             release_open_intent(&nd);  
             filp = ERR_PTR(error);  
             goto out;  
         }  
         holder = path;  
         nd.flags &= ~LOOKUP_PARENT;  
         filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);  
         if (inode->i_op->put_link)  
             inode->i_op->put_link(holder.dentry, &nd, cookie);  
         path_put(&holder);  
     }  
 out:  
     if (nd.root.mnt)  
         path_put(&nd.root);  
     if (filp == ERR_PTR(-ESTALE) && !force_reval) {  
         force_reval = 1;  
         goto reval;  
     }  
     return filp;//成功，返回  
   
 exit_dput:  
     path_put_conditional(&path, &nd);  
     if (!IS_ERR(nd.intent.open.file))  
         release_open_intent(&nd);  
 exit_parent:  
     path_put(&nd.path);  
     filp = ERR_PTR(error);  
     goto out;  
 }  

当内核要访问一个文件的时候，第一步要做的是找到这个文件，而查找文件的过程在vfs里面是由link_path_walk函数来完成的,在path_init的时候我们可以看到传进去的参数有一个LOOKUP_PARENT，它的含义是查找最后一个分量名所在的目录。也就是当这个函数返回的时候，我们得到了一个路径名中最后一个分量所在的目录。

接着调用do_last返回最后一个分量对应的file指针，我们关注一下这个函数

[cpp]view plaincopy 
   
print?
 static struct file *do_last(struct nameidata *nd, struct path *path,  
                 int open_flag, int acc_mode,  
                 int mode, const char *pathname)  
 {  
     struct dentry *dir = nd->path.dentry;  
     struct file *filp;  
     int error = -EISDIR;  
   
     switch (nd->last_type) {// 检查最后一段文件或目录名的属性情况  
     case LAST_DOTDOT:  
         follow_dotdot(nd);  
         dir = nd->path.dentry;  
     case LAST_DOT:  
         if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {  
             if (!dir->d_op->d_revalidate(dir, nd)) {  
                 error = -ESTALE;  
                 goto exit;  
             }  
         }  
         /* fallthrough */  
     case LAST_ROOT:  
         if (open_flag & O_CREAT)  
             goto exit;  
         /* fallthrough */  
     case LAST_BIND:  
         audit_inode(pathname, dir);  
         goto ok;  
     }  
   
     /* trailing slashes? */  
     if (nd->last.name[nd->last.len]) {  
         if (open_flag & O_CREAT)  
             goto exit;  
         nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;  
     }  
   
     /* just plain open? */  
     if (!(open_flag & O_CREAT)) {//没有创建标志，即文件存在  
         error = do_lookup(nd, &nd->last, path);//找到路径中最后一项对应的目录项  
         if (error)  
             goto exit;  
         error = -ENOENT;  
         if (!path->dentry->d_inode)  
             goto exit_dput;  
         if (path->dentry->d_inode->i_op->follow_link)  
             return NULL;  
         error = -ENOTDIR;  
         if (nd->flags & LOOKUP_DIRECTORY) {  
             if (!path->dentry->d_inode->i_op->lookup)  
                 goto exit_dput;  
         }  
         path_to_nameidata(path, nd);//赋值到nd结构  
         audit_inode(pathname, nd->path.dentry);  
         goto ok;  
     }  
   
     /* OK, it's O_CREAT */  
     //文件不存在，需要创建  
     mutex_lock(&dir->d_inode->i_mutex);  
   
     path->dentry = lookup_hash(nd);//获取最后路径名中最后一项对应的目录项  
     path->mnt = nd->path.mnt;  
   
     error = PTR_ERR(path->dentry);  
     if (IS_ERR(path->dentry)) {  
         mutex_unlock(&dir->d_inode->i_mutex);  
         goto exit;  
     }  
   
     if (IS_ERR(nd->intent.open.file)) {  
         error = PTR_ERR(nd->intent.open.file);  
         goto exit_mutex_unlock;  
     }  
   
     /* Negative dentry, just create the file */  
     if (!path->dentry->d_inode) {//没有索引节点与目录项关联  
         /* 
          * This write is needed to ensure that a 
          * ro->rw transition does not occur between 
          * the time when the file is created and when 
          * a permanent write count is taken through 
          * the 'struct file' in nameidata_to_filp(). 
          */  
         error = mnt_want_write(nd->path.mnt);  
         if (error)  
             goto exit_mutex_unlock;  
         error = __open_namei_create(nd, path, open_flag, mode);//创建相应的索引节点  
         if (error) {  
             mnt_drop_write(nd->path.mnt);  
             goto exit;  
         }  
         filp = nameidata_to_filp(nd);/*根据nameidata 得到相应的file结构*/  
         mnt_drop_write(nd->path.mnt);  
         if (!IS_ERR(filp)) {  
             error = ima_file_check(filp, acc_mode);  
             if (error) {  
                 fput(filp);  
                 filp = ERR_PTR(error);  
             }  
         }  
         return filp;  
     }  
   
     /* 
      * It already exists. 
      */  
     mutex_unlock(&dir->d_inode->i_mutex);  
     audit_inode(pathname, path->dentry);  
   
     error = -EEXIST;  
     if (open_flag & O_EXCL)  
         goto exit_dput;  
   
     if (__follow_mount(path)) {  
         error = -ELOOP;  
         if (open_flag & O_NOFOLLOW)  
             goto exit_dput;  
     }  
   
     error = -ENOENT;  
     if (!path->dentry->d_inode)  
         goto exit_dput;  
   
     if (path->dentry->d_inode->i_op->follow_link)  
         return NULL;  
   
     path_to_nameidata(path, nd);  
     error = -EISDIR;  
     if (S_ISDIR(path->dentry->d_inode->i_mode))  
         goto exit;  
 ok:  
     filp = finish_open(nd, open_flag, acc_mode);//完成文件打开操作  
     return filp;  
   
 exit_mutex_unlock:  
     mutex_unlock(&dir->d_inode->i_mutex);  
 exit_dput:  
     path_put_conditional(path, nd);  
 exit:  
     if (!IS_ERR(nd->intent.open.file))  
         release_open_intent(nd);  
     path_put(&nd->path);  
     return ERR_PTR(error);  
 }  

首先进行一些判断，然后看是否需要创建文件，如果需要创建的，则创建文件。如果文件存在的话，直接调用finish_open完成文件打开，我们这里关注下打开文件的

[cpp]view plaincopy 
   
print?
 static struct file *finish_open(struct nameidata *nd,  
                 int open_flag, int acc_mode)  
 {  
     struct file *filp;  
     int will_truncate;  
     int error;  
   
     /*检测是否截断文件标志*/  
     will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);  
     if (will_truncate) {/*要截断的话就要获取写权限*/  
         error = mnt_want_write(nd->path.mnt);  
         if (error)  
             goto exit;  
     }  
      //may_open执行权限检测、文件打开和truncate的操作  
     error = may_open(&nd->path, acc_mode, open_flag);  
     if (error) {  
         if (will_truncate)  
             mnt_drop_write(nd->path.mnt);  
         goto exit;  
     }  
     filp = nameidata_to_filp(nd);   /*根据nameidata 得到相应的file结构*/  
     if (!IS_ERR(filp)) {  
         error = ima_file_check(filp, acc_mode);  
         if (error) {  
             fput(filp);  
             filp = ERR_PTR(error);  
         }  
     }  
     if (!IS_ERR(filp)) {  
         if (will_truncate) {// //处理截断  
             error = handle_truncate(&nd->path);  
             if (error) {  
                 fput(filp);  
                 filp = ERR_PTR(error);  
             }  
         }  
     }  
     /* 
      * It is now safe to drop the mnt write 
      * because the filp has had a write taken 
      * on its behalf. 
      */  
     if (will_truncate)  //安全的放弃写权限  
         mnt_drop_write(nd->path.mnt);  
     return filp;  
   
 exit:  
     if (!IS_ERR(nd->intent.open.file))  
         release_open_intent(nd);  
     path_put(&nd->path);  
     return ERR_PTR(error);  
 }  

这里主要调用nameidata_to_filp得到相应的file结构

[cpp]view plaincopy 
   
print?
 struct file *nameidata_to_filp(struct nameidata *nd)  
 {  
     const struct cred *cred = current_cred();  
     struct file *filp;  
   
     /* Pick up the filp from the open intent */  
     filp = nd->intent.open.file;/// 把相关 file结构的指针赋予 filp  
     /* Has the filesystem initialised the file for us? */  
     if (filp->f_path.dentry == NULL)  
         filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,  
                      NULL, cred);  
     else  
         path_put(&nd->path);  
     return filp;  
 }  

调用__dentry_open

[cpp]view plaincopy 
   
print?
 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,  
                     struct file *f,  
                     int (*open)(struct inode *, struct file *),  
                     const struct cred *cred)  
 {  
     struct inode *inode;  
     int error;  
   
     f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |//初始化f_mode  
                 FMODE_PREAD | FMODE_PWRITE;  
     inode = dentry->d_inode;  
     if (f->f_mode & FMODE_WRITE) {  
         error = __get_file_write_access(inode, mnt);  
         if (error)  
             goto cleanup_file;  
         if (!special_file(inode->i_mode))  
             file_take_write(f);  
     }  
   
     f->f_mapping = inode->i_mapping;  
     f->f_path.dentry = dentry;//初始化目录项对象  
     f->f_path.mnt = mnt;//初始化文件系统对象  
     f->f_pos = 0;  
     f->f_op = fops_get(inode->i_fop);//为文件操作建立起所有方法  
     file_move(f, &inode->i_sb->s_files);//把文件对象插入到文件系统超级块的s_files字段所指向的打开文件的链表。  
   
     error = security_dentry_open(f, cred);  
     if (error)  
         goto cleanup_all;  
   
     if (!open && f->f_op)//传进来的open为NULL  
         open = f->f_op->open;  
     if (open) {  
         error = open(inode, f);  
         if (error)  
             goto cleanup_all;  
     }  
     ima_counts_get(f);  
   
     f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);//初始化f_f_flags  
   
     file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);//初始化预读的数据结构  
   
     /* NB: we're sure to have correct a_ops only after f_op->open */  
     if (f->f_flags & O_DIRECT) {//检查直接IO操作是否可以作用于文件  
         if (!f->f_mapping->a_ops ||  
             ((!f->f_mapping->a_ops->direct_IO) &&  
             (!f->f_mapping->a_ops->get_xip_mem))) {  
             fput(f);  
             f = ERR_PTR(-EINVAL);  
         }  
     }  
   
     return f;  
   
 cleanup_all:  
     fops_put(f->f_op);  
     if (f->f_mode & FMODE_WRITE) {  
         put_write_access(inode);  
         if (!special_file(inode->i_mode)) {  
             /* 
              * We don't consider this a real 
              * mnt_want/drop_write() pair 
              * because it all happenend right 
              * here, so just reset the state. 
              */  
             file_reset_write(f);  
             mnt_drop_write(mnt);  
         }  
     }  
     file_kill(f);  
     f->f_path.dentry = NULL;  
     f->f_path.mnt = NULL;  
 cleanup_file:  
     put_filp(f);  
     dput(dentry);  
     mntput(mnt);  
     return ERR_PTR(error);  
 }  

这里主要是进行一些赋值操作

对应于这里，传进来的open指针为NULL，如果相应file_operations结构存在的话就调用它的open函数

对于每个文件在创建的时候会赋值对其进行操作的file_operations结构，这个结构对于一类文件是一样的，例如对应于字符设备是chrdev_open

[cpp]view plaincopy 
   
print?
 const struct file_operations def_chr_fops = {  
     .open = chrdev_open,  
 };  

但打开之后，我们可以重新获取它们的file_operations结构，这个是在注册设备驱动的时候为该类设备赋予的，也就是我们在驱动里面实现的，而前面的缺省file_operations就是为了完成这个转换的， def_chr_fops只起过渡作用，它的open方法要去寻找硬件驱动的支撑。

[cpp]view plaincopy 
   
print?
 static int chrdev_open(struct inode *inode, struct file *filp)  
 {  
     struct cdev *p;  
     struct cdev *new = NULL;  
     int ret = 0;  
   
     spin_lock(&cdev_lock);  
     p = inode->i_cdev;  
     if (!p) { /* 很显然，第一次打开的时候是NULL */  
         struct kobject *kobj;  
         int idx;  
         spin_unlock(&cdev_lock);  
         kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);/* 找到和设备号i_rdev对应的kobj，其实就是cdev了，因为cdev中包含kobj；idx保存的是次设备号，后面会分析kobj_lookup()函数 */  
         if (!kobj)  
             return -ENXIO;  
         new = container_of(kobj, struct cdev, kobj);//得到cdev  
         spin_lock(&cdev_lock);  
         /* Check i_cdev again in case somebody beat us to it while 
            we dropped the lock. */  
         p = inode->i_cdev;  
         if (!p) {  
             inode->i_cdev = p = new;/* 把找到的cdev保存到inode的icdev中 */  
             list_add(&inode->i_devices, &p->list); /* inode加入到cdev的链表中 */  
             new = NULL;  
         } else if (!cdev_get(p))  
             ret = -ENXIO;  
     } else if (!cdev_get(p))  
         ret = -ENXIO;  
     spin_unlock(&cdev_lock);  
     cdev_put(new);  
     if (ret)  
         return ret;  
   
     ret = -ENXIO;  
     /*  
     保存用户的fops，以后你再调用read, write, ioctl系统调用的时候就直接使用了，你懂的  
     */  
     filp->f_op = fops_get(p->ops);  
     if (!filp->f_op) // 如果你没有注册fops  
         goto out_cdev_put;  
   
     if (filp->f_op->open) {//判断open函数是否存在  
         ret = filp->f_op->open(inode,filp);//* 调用用户的open函数，我们前面写的驱动  
         if (ret)  
             goto out_cdev_put;  
     }  
   
     return 0;  
   
  out_cdev_put:  
     cdev_put(p);  
     return ret;  
 }  

在这个函数里，我们重新对f_op赋值了，这里的f_op就是我们在写驱动时写的系统调用函数了。后面还调用了open方法

这里调用 kobj_lookup找到前面我们在注册驱动添加设备时添加的相应的kobj

[cpp]view plaincopy 
   
print?
 struct kobject *kobj_lookup(struct kobj_map *domain, dev_t dev, int *index)  
 {  
     struct kobject *kobj;  
     struct probe *p;  
     unsigned long best = ~0UL;  
   
 retry:  
     mutex_lock(domain->lock);  
      /* 根据主设备号和设备号查找它的一亩三分地。因为要支持2^12次方也就是4096个主设备号， 
      但只使用了前255个主设备号索引，所以这255个索引对应的probe结构都有一个单向 
      链表保存着大于255的主设备号（被255整除后的索引相等）  */  
     for (p = domain->probes[MAJOR(dev) % 255]; p; p = p->next) {  
         struct kobject *(*probe)(dev_t, int *, void *);  
         struct module *owner;  
         void *data;  
         // 比较，看是否真找到了，因为有链表存在  
         if (p->dev > dev || p->dev + p->range - 1 < dev)  
             continue;  
         if (p->range - 1 >= best)  
             break;  
         if (!try_module_get(p->owner))  
             continue;  
         owner = p->owner;  
         data = p->data;//data就是cdev  
         probe = p->get;  
         best = p->range - 1;  
         *index = dev - p->dev;//得到次设备号  
           /* 调用的lock就是exact_lock()函数，增加对该字符设备驱动的引用，防止被卸载什么的 */  
         if (p->lock && p->lock(dev, data) < 0) {  
             module_put(owner);  
             continue;  
         }  
         mutex_unlock(domain->lock);  
          /*调用的probe就是exact_match()函数，获取cdev的kobj指针 */  
         kobj = probe(dev, index, data);  
         /* Currently ->owner protects _only_ ->probe() itself. */  
         module_put(owner);  
         if (kobj)  
             return kobj;  
         goto retry;  
     }  
     mutex_unlock(domain->lock);  
     return NULL;  
 }  

到这里do_filp_open的流程就基本完成了，即返回了一个file结构

[cpp]view plaincopy 
   
print?
 void fd_install(unsigned int fd, struct file *file)  
 {  
     struct files_struct *files = current->files;  
     struct fdtable *fdt;  
     spin_lock(&files->file_lock);  
     fdt = files_fdtable(files);//获取fdtbale  
     BUG_ON(fdt->fd[fd] != NULL);  
     rcu_assign_pointer(fdt->fd[fd], file);//fd和file关系到fdtbale  
     spin_unlock(&files->file_lock);  
 }