open,close系统调用

http://blog.chinaunix.net/space.php?uid=12567959&do=blog&id=161001

open系统调用

open()系统调用的服务例程为sys_open()函数,该函数接收的参数为:要打开的文件的路径名filename、访问模式的一些标志flags,以及如果该文件被创建所需要的许可位掩码mode。如果该系统调用成功,就返回一个文件描述符,也就是指向文件对象的指针数组current->files-> fd_array或者current-> files-> fdtable.fd中新分配给文件的索引;否则,返回-1

 

open()系统调用的所有标志

---------------------------------------------------------------------

include/asm-generic/fcntl.h

#defineO_ACCMODE00000003

#defineO_RDONLY 00000000   /* 为只读而打开 */

#defineO_WRONLY 00000001   /* 为只写而打开 */

#defineO_RDWR      00000002  /* 为读和写而打开 */

#ifndefO_CREAT /*如果文件不存在则创建它 */

#defineO_CREAT     00000100   /* not fcntl */

#endif

#ifndefO_EXCL /*对于O_CREAT标志,如果文件已经存在,则失败 */

#defineO_EXCL      00000200   /* not fcntl */

#endif

#ifndefO_NOCTTY /*从不把文件看作终端 */

#defineO_NOCTTY 00000400   /* not fcntl */

#endif

#ifndefO_TRUNC /*截断文件(删除所有的现有内容) */

#defineO_TRUNC     00001000   /* not fcntl */

#endif

#ifndefO_APPEND /*总是在文件末尾写 */

#defineO_APPEND 00002000

#endif

#ifndefO_NONBLOCK /*非阻塞打开 */

#defineO_NONBLOCK  00004000

#endif

#ifndefO_DSYNC /*同步写(阻塞,直到物理写终止) */

#defineO_DSYNC     00010000   /* used to be O_SYNC, see below */

#endif

#ifndefFASYNC /*通过信号发出I/O事件通知 */

#defineFASYNC      00020000   /* fcntl, for BSD compatibility */

#endif

#ifndefO_DIRECT

#defineO_DIRECT 00040000   /* direct disk access hint */

#endif

/*大型文件(文件长度大于off_t所能表示的范围但小于off64_t*/

#ifndefO_LARGEFILE

#defineO_LARGEFILE 00100000

#endif

 

#ifndefO_DIRECTORY /*如果文件不是一个目录,则失败 */

#defineO_DIRECTORY 00200000   /* must be a directory */

#endif

 

#ifndefO_NOFOLLOW /*不解析路径名尾部的符号链接 */

#defineO_NOFOLLOW  00400000   /* don't follow links */

#endif

#ifndefO_NOATIME /*不更新索引节点的访问时间。*/

#defineO_NOATIME01000000

#endif

#ifndefO_CLOEXEC

#defineO_CLOEXEC02000000   /* set close_on_exec */

#endif

---------------------------------------------------------------------

有一些标志的定义是因体系结构而异的。

 

sys_open()定义如下:

---------------------------------------------------------------------

fs/open.c

SYSCALL_DEFINE3(open,const char __user *, filename, int, flags, int, mode)

{

   long ret;

 

   if (force_o_largefile())

      flags |= O_LARGEFILE;

 

   ret = do_sys_open(AT_FDCWD, filename, flags,mode);

   /* avoid REGPARM breakage on x86: */

   asmlinkage_protect(3, ret, filename, flags,mode);

   return ret;

}

---------------------------------------------------------------------

这个函数的操作如下:

首先,调用force_o_largefile()来判断是否支持大文件,若是,则设置标志的O_LARGEFILE位。force_o_largefile()其实是一个宏。这个宏也是因体系结构而异的。

其次,调用do_sys_open(AT_FDCWD, filename, flags, mode)来完成实际的打开文件的任务。下面有更详细说明。

最后,调用asmlinkage_protect()以使系统调用正确返回。它也是一个宏,为了防止编译器错误而设。其他平台为空,只有x86平台有定义,为:

---------------------------------------------------------------------

arch/x86/include/asm/linkage.h

/*

 * Make sure the compiler doesn't do anythingstupid with the

 * arguments on the stack - they are owned bythe *caller*, not

 * the callee. This just fools gcc into notspilling into them,

 * and keeps it from doing tailcall recursionand/or using the

 * stack slots for temporaries, since they arelive and "used"

 * all the way to the end of the function.

 *

 * NOTE! On x86-64, all the arguments are inregisters, so this

 * only matters on a 32-bit kernel.

 */#define asmlinkage_protect(n, ret, args...)\

   __asmlinkage_protect##n(ret, ##args)

#define__asmlinkage_protect_n(ret, args...) \

   __asm__ __volatile__ ("" :"=r" (ret) : "0" (ret), ##args)

#define__asmlinkage_protect0(ret) \

   __asmlinkage_protect_n(ret)

#define__asmlinkage_protect1(ret, arg1) \

   __asmlinkage_protect_n(ret, "g"(arg1))

#define__asmlinkage_protect2(ret, arg1, arg2) \

   __asmlinkage_protect_n(ret, "g"(arg1), "g" (arg2))

#define__asmlinkage_protect3(ret, arg1, arg2, arg3) \

   __asmlinkage_protect_n(ret, "g"(arg1), "g" (arg2), "g" (arg3))

---------------------------------------------------------------------

 

do_sys_open()函数定义如下:

---------------------------------------------------------------------

fs/open.c

longdo_sys_open(int dfd, const char __user *filename, int flags, int mode)

{

   char *tmp = getname(filename);

   int fd = PTR_ERR(tmp);

 

   if (!IS_ERR(tmp)) {

      fd = get_unused_fd_flags(flags);

      if (fd >= 0) {

          struct file *f = do_filp_open(dfd,tmp, flags, mode, 0);

          if (IS_ERR(f)) {

             put_unused_fd(fd);

             fd = PTR_ERR(f);

          } else {

             fsnotify_open(f->f_path.dentry);

             fd_install(fd, f);

          }

      }

      putname(tmp);

   }

   return fd;

}

---------------------------------------------------------------------

第一个参数是多么的眼熟啊,查找路径名的do_path_lookup()函数的第一个参数也是它,用于说明查找路径名的基目录。关于它,前面路径名查找已有说明了。其余参数则是sys_open()将传进来的参数传给了它。

这个函数执行如下操作:

1、调用getname(filename)从进程地址空间读取文件的路径名,将路径名的地址存放在局部变量tmpgetname(filename)本质上,首先从名为"names_cache"路径名slab缓存中分配内存区,然后将路径名从用户空间复制到该内存区中。

 

2、调用get_unused_fd_flags(flags)从当前进程的文件描述符表中找一个空位。其定义为:

---------------------------------------------------------------------

include/linux/file.h

#defineget_unused_fd_flags(flags) alloc_fd(0, (flags))

---------------------------------------------------------------------

这是一个宏,仅仅是对alloc_fd()函数特殊参数下调用的别名,对alloc_fd()函数定义如下:

---------------------------------------------------------------------

fs/file.c

/*

 * allocate a file descriptor, mark it busy.

 */

intalloc_fd(unsigned start, unsigned flags)

{

   struct files_struct *files =current->files;

   unsigned int fd;

   int error;

   struct fdtable *fdt;

 

   spin_lock(&files->file_lock);

repeat:

   fdt = files_fdtable(files);

   fd = start;

   if (fd < files->next_fd)                            

      fd = files->next_fd;

 

   if (fd < fdt->max_fds)

      fd = find_next_zero_bit(fdt->open_fds->fds_bits,

                    fdt->max_fds, fd);

 

   error = expand_files(files, fd);

   if (error < 0)

      goto out;

 

   /*

    * Ifwe needed to expand the fs array we

    *might have blocked - try again.

    */

   if (error)

      goto repeat;

 

   if (start <= files->next_fd)

      files->next_fd = fd + 1;

 

   FD_SET(fd, fdt->open_fds);

   if (flags & O_CLOEXEC)

      FD_SET(fd, fdt->close_on_exec);

   else

      FD_CLR(fd, fdt->close_on_exec);

   error = fd;

#if1

   /* Sanity check */

   if (rcu_dereference_raw(fdt->fd[fd]) !=NULL) {

      printk(KERN_WARNING "alloc_fd: slot%d not NULL!\n", fd);

      rcu_assign_pointer(fdt->fd[fd], NULL);

   }

#endif

 

out:

   spin_unlock(&files->file_lock);

   return error;

}

---------------------------------------------------------------------

这个函数执行如下操作:

a.current->files->next_fd字段赋值给局部变量。

b.调用find_next_zero_bit(fdt->open_fds->fds_bits,fdt->max_fds, fd)来在文件描述符表中寻找下一个可以分配的文件描述符。这个函数也是因体系结构而异,系统中通用的函数定义为:

---------------------------------------------------------------------

lib/find_next_bit.c

unsignedlong find_next_zero_bit(const unsigned long *addr,

unsigned long size,  unsigned long offset)

{

   const unsigned long *p = addr +BITOP_WORD(offset);

   unsigned long result = offset &~(BITS_PER_LONG-1);

   unsigned long tmp;

 

   if (offset >= size)

       returnsize;

   size -= result;

   offset %= BITS_PER_LONG;

   if (offset) {

      tmp = *(p++);

      tmp |= ~0UL >> (BITS_PER_LONG -offset);

      if (size < BITS_PER_LONG)

          goto found_first;

      if (~tmp)

          goto found_middle;

      size -= BITS_PER_LONG;

      result += BITS_PER_LONG;

   }

   while (size & ~(BITS_PER_LONG-1)) {

      if (~(tmp = *(p++)))

          goto found_middle;

      result += BITS_PER_LONG;

      size -= BITS_PER_LONG;

   }

   if (!size)

      return result;

   tmp = *p;

 

found_first:

   tmp |= ~0UL << size;

   if (tmp == ~0UL)  /* Are any bits zero? */

      return result + size;    /* Nope. */

found_middle:

   return result + ffz(tmp);

}

---------------------------------------------------------------------

(1)、上面的BITOP_WORD(offset)清一色定义为:

#defineBITOP_WORD(nr)     ((nr) / BITS_PER_LONG)

首先求得第一个要查找的long的位置。

(2)offset &~(BITS_PER_LONG-1)等价于offset除以32再乘以32,以此来求得所要查找的第一个字第0位在表中的位置,只不过会比除法运算效率高很多。

(3)、如果传递的起始查找位置甚至大于最大可能值,则返回最大可能值。

(4)、查找第一个0位所在的long型值的位置。而局部变量tmp中会保存该long型量的值。

(5)、返回查找到的第一个0位的位置或可能的最大值。

 

c.调用expand_files(files,fd),来扩展文件描述符表,files_structfd_array数组成员可以在打开的文件较少时使用,但当打开的文件较多时,就会对文件描述符表进行扩展。

 

d.更新files->next_fd字段,将分配的文件描述符添加进fdt->open_fds,如果设置了O_CLOEXEC则将文件描述符添加进fdt->close_on_exec,若没有,则清除fdt->close_on_exec中的相应位。

 

e.返回文件描述符。

 

3、调用do_filp_open(dfd,tmp, flags, mode, 0)函数,传递给它的参数依次为查找路径名的基目录、文件路径名、访问模式标志以及许可权位掩码、访问模式位。这个函数定义为:

---------------------------------------------------------------------

fs/namei.c

1761/*

1762 * Note that the low bits of the passed in"open_flag"

1763 * are not the same as in the local variable"flag". See

1764 * open_to_namei_flags() for more details.

1765 */

1766struct file *do_filp_open(int dfd, const char *pathname,

1767                int open_flag, int mode, intacc_mode)

1768{

1769        struct file *filp;

1770        struct nameidata nd;

1771        int error;

1772        struct path path;

1773        int count = 0;

1774        int flag =open_to_namei_flags(open_flag);

1775        int force_reval = 0;

1776

1777        if (!(open_flag & O_CREAT))

1778                mode = 0;

1779

1780        /*

1781         * O_SYNC is implemented as__O_SYNC|O_DSYNC.  As many places only

1782         * check for O_DSYNC if the need anysyncing at all we enforce it's

1783         * always set instead of having todeal with possibly weird behaviour

1784         * for malicious applications settingonly __O_SYNC.

1785         */

1786        if (open_flag & __O_SYNC)

1787                open_flag |= O_DSYNC;

1788

1789        if (!acc_mode)

1790                acc_mode = MAY_OPEN |ACC_MODE(open_flag);

1791

1792        /* O_TRUNC implies we need accesschecks for write permissions */

1793        if (open_flag & O_TRUNC)

1794                acc_mode |= MAY_WRITE;

1795

1796        /* Allow the LSM permission hook todistinguish append

1797           access from general write access.*/

1798        if (open_flag & O_APPEND)

1799                acc_mode |= MAY_APPEND;

1800

1801        /* find the parent */

1802reval:

1803        error = path_init(dfd, pathname,LOOKUP_PARENT, &nd);

1804        if (error)

1805                return ERR_PTR(error);

1806        if (force_reval)

1807                nd.flags |= LOOKUP_REVAL;

1808

1809        current->total_link_count = 0;

1810        error = link_path_walk(pathname,&nd);

1811        if (error) {

1812                filp = ERR_PTR(error);

1813                goto out;

1814        }

1815        if (unlikely(!audit_dummy_context())&& (open_flag & O_CREAT))

1816                audit_inode(pathname,nd.path.dentry);

1817

1818        /*

1819         * We have the parent and lastcomponent.

1820         */

1821

1822        error = -ENFILE;

1823        filp = get_empty_filp();

1824        if (filp == NULL)

1825                goto exit_parent;

1826        nd.intent.open.file = filp;

1827        filp->f_flags = open_flag;

1828        nd.intent.open.flags = flag;

1829        nd.intent.open.create_mode = mode;

1830        nd.flags &= ~LOOKUP_PARENT;

1831        nd.flags |= LOOKUP_OPEN;

1832        if (open_flag & O_CREAT) {

1833                nd.flags |= LOOKUP_CREATE;

1834                if (open_flag & O_EXCL)

1835                        nd.flags |=LOOKUP_EXCL;

1836        }

1837        if (open_flag & O_DIRECTORY)

1838                nd.flags |= LOOKUP_DIRECTORY;

1839        if (!(open_flag & O_NOFOLLOW))

1840                nd.flags |= LOOKUP_FOLLOW;

1841        filp = do_last(&nd, &path,open_flag, acc_mode, mode, pathname);

1842        while (unlikely(!filp)) { /* trailingsymlink */

1843                struct path holder;

1844                struct inode *inode =path.dentry->d_inode;

1845                void *cookie;

1846                error = -ELOOP;

1847                /* S_ISDIR part is a temporaryautomount kludge */

1848                if (!(nd.flags &LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))

1849                        goto exit_dput;

1850                if (count++ == 32)

1851                        goto exit_dput;

1852                /*

1853                 * This is subtle. Instead ofcalling do_follow_link() we do

1854                 * the thing by hands. Thereason is that this way we have zero

1855                 * link_count and path_walk()(called from ->follow_link)

1856                 * honoringLOOKUP_PARENT.  After that we have theparent and

1857                 * last component, i.e. we arein the same situation as after

1858                 * the first path_walk().  Well, almost - if the last component

1859                 * is normal we get its copystored in nd->last.name and we will

1860                 * have to putname() it whenwe are done. Procfs-like symlinks

1861                 * just set LAST_BIND.

1862                 */

1863                nd.flags |= LOOKUP_PARENT;

1864                error =security_inode_follow_link(path.dentry, &nd);

1865                if (error)

1866                        goto exit_dput;

1867                error =__do_follow_link(&path, &nd, &cookie);

1868                if (unlikely(error)) {

1869                        /* nd.path had beendropped */

1870                        if (!IS_ERR(cookie)&& inode->i_op->put_link)

1871                                inode->i_op->put_link(path.dentry, &nd, cookie);

1872                        path_put(&path);

1873                        release_open_intent(&nd);

1874                        filp = ERR_PTR(error);

1875                        goto out;

1876                }

1877                holder = path;

1878                nd.flags &=~LOOKUP_PARENT;

1879                filp = do_last(&nd,&path, open_flag, acc_mode, mode, pathname);

1880                if(inode->i_op->put_link)

1881                        inode->i_op->put_link(holder.dentry, &nd, cookie);

1882                path_put(&holder);

1883        }

1884out:

1885        if (nd.root.mnt)

1886                path_put(&nd.root);

1887        if (filp == ERR_PTR(-ESTALE)&& !force_reval) {

1888                force_reval = 1;

1889                goto reval;

1890        }

1891        return filp;

1892

1893exit_dput:

1894        path_put_conditional(&path,&nd);

1895        if (!IS_ERR(nd.intent.open.file))

1896                release_open_intent(&nd);

1897exit_parent:

1898        path_put(&nd.path);

1899        filp = ERR_PTR(error);

1900        goto out;

1901} ---------------------------------------------------------------------

这个函数一次执行下列步骤:

a.把访问模式拷贝到flag标志中,但是,用特殊的格式对方问模式标志O_RDONLYO_WRONLYO_RDWR进行编码。由函数open_to_namei_flags()完成,其定义如下:

---------------------------------------------------------------------

fs/namei.c

/*

 * Note that while the flag value (low twobits) for sys_open means:

 * 00 -read-only

 * 01 -write-only

 * 10 -read-write

 * 11 -special

 * it is changed into

 * 00 -no permissions needed

 * 01 -read-permission

 * 10 -write-permission

 * 11 -read-write

 * for the internal routines (ieopen_namei()/follow_link() etc)

 * This is more logical, and also allows the 00"no perm needed"

 * to be used for symlinks (where thepermissions are checked

 * later).

 *

*/

staticinline int open_to_namei_flags(int flag)

{

   if ((flag+1) & O_ACCMODE)

      flag++;

   return flag;

}

---------------------------------------------------------------------

注释中说的已经很清楚了。

根据open_flag原来的值适当更新打开标志open_flag,根据open_flag适当更新访问模式acc_mode

 

b.调用path_init(dfd,pathname, LOOKUP_PARENT, &nd),将查找的路径名的基路径找到,并赋给ndpath字段。注意在这个函数中设置了nd->flagsLOOKUP_PARENT,也就是要查找路径名最后一个分量的父目录。

 

c.设置current->total_link_count0

 

d.调用link_path_walk(pathname,&nd)查找路径名最后一个分量的父母的路径path结构体,保存在nd.path中。因为,路径名的最后一个分量有可能是不存在的而需要创建。

 

e.调用get_empty_filp()函数从fileslab缓冲区filp_cachep中分配一个file结构,并初始化它的一些字段。如果返回值为NULL,则返回错误码-ENFILE的指针形式。

 

f.设置nd.intent.open.file为上一步分配的file结构的地址filp,设置filp的打开文件时所制定的标志f_flags为传递进来并经过适当修改的打开标志open_flag。设置nd.intent.open.flags为访问模式flag,设置nd.intent.open.create_mode为创建模式mode。清除nd.flagsLOOKUP_PARENT标志,设置其LOOKUP_OPEN标志。如果在打开标志中设置了O_CREAT,则设置nd.flagsLOOKUP_CREATE标志,若同时设置了打开标志的O_EXCL,则同时设置nd.flagsOOKUP_EXCL。若设置了打开标志的O_DIRECTORY,则设置nd.flagsLOOKUP_DIRECTORY。若没有设置打开标志的O_NOFOLLOW位,则设置nd.flagsLOOKUP_FOLLOW

即是根据打开标志来设置nd.flags的相应位。这些标志似乎都只与查找的路径名的最后一个分量有关。

 

g.调用do_last()来完成路径名最后一个分量的处理。又是一个非常长的函数,它接受六个参数,nd为前面查找的路径名最后一个分量的父目录的nameidata结构,path为一个path结构的局部变量,打开标志,acc_mode,创建模式(如果需要的话)mode和路径名。该函数定义如下:

---------------------------------------------------------------------

fs/namei.c

1617static struct file *do_last(struct nameidata *nd, struct path *path,

1618                            int open_flag, int acc_mode,

1619                            int mode, constchar *pathname)

1620{

1621        struct dentry *dir =nd->path.dentry;

1622        struct file *filp;

1623        int error = -EISDIR;

1624

1625        switch (nd->last_type) {

1626        case LAST_DOTDOT:

1627                follow_dotdot(nd);

1628                dir = nd->path.dentry;

1629        case LAST_DOT:

1630                if(nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {

1631                        if(!dir->d_op->d_revalidate(dir, nd)) {

1632                                error =-ESTALE;

1633                                goto exit;

1634                        }

1635                }

1636                /* fallthrough */

1637        case LAST_ROOT:

1638                if (open_flag & O_CREAT)

1639                        goto exit;

1640                /* fallthrough */

1641        case LAST_BIND:

1642                audit_inode(pathname, dir);

1643                goto ok;

1644        }

1645

1646        /* trailing slashes? */

1647        if (nd->last.name[nd->last.len]){

1648                if (open_flag & O_CREAT)

1649                        goto exit;

1650                nd->flags |=LOOKUP_DIRECTORY | LOOKUP_FOLLOW;

1651        }

1652

1653        /* just plain open? */

1654        if (!(open_flag & O_CREAT)) {

1655                error = do_lookup(nd,&nd->last, path);

1656                if (error)

1657                        goto exit;

1658                error = -ENOENT;

1659                if(!path->dentry->d_inode)

1660                        goto exit_dput;

1661                if(path->dentry->d_inode->i_op->follow_link)

1662                        return NULL;

1663                error = -ENOTDIR;

1664                if (nd->flags &LOOKUP_DIRECTORY) {

1665                        if(!path->dentry->d_inode->i_op->lookup)

1666                                gotoexit_dput;

1667                }

1668                path_to_nameidata(path, nd);

1669                audit_inode(pathname,nd->path.dentry);

1670                goto ok;

1671        }

1672

1673        /* OK, it's O_CREAT */

1674        mutex_lock(&dir->d_inode->i_mutex);

1675

1676        path->dentry = lookup_hash(nd);

1677        path->mnt = nd->path.mnt;

1678

1679        error = PTR_ERR(path->dentry);

1680        if (IS_ERR(path->dentry)) {

1681                mutex_unlock(&dir->d_inode->i_mutex);

1682                goto exit;

1683        }

1684

1685        if (IS_ERR(nd->intent.open.file)) {

1686                error =PTR_ERR(nd->intent.open.file);

1687                goto exit_mutex_unlock;

1688        }

1689

1690        /* Negative dentry, just create thefile */

1691        if (!path->dentry->d_inode) {

1692                /*

1693                 * This write is needed toensure that a

1694                 * ro->rw transition doesnot occur between

1695                 * the time when the file iscreated and when

1696                 * a permanent write count istaken through

1697                 * the 'struct file' innameidata_to_filp().

1698                 */

1699                error =mnt_want_write(nd->path.mnt);

1700                if (error)

1701                        goto exit_mutex_unlock;

1702                error =__open_namei_create(nd, path, open_flag, mode);

1703                if (error) {

1704                        mnt_drop_write(nd->path.mnt);

1705                        goto exit;

1706                }

1707                filp = nameidata_to_filp(nd);

1708                mnt_drop_write(nd->path.mnt);

1709                if (!IS_ERR(filp)) {

1710                        error =ima_file_check(filp, acc_mode);

1711                        if (error) {

1712                                fput(filp);

1713                                filp =ERR_PTR(error);

1714                        }

1715                }

1716                return filp;

1717        }

1718

1719        /*

1720         * It already exists.

1721         */

1722        mutex_unlock(&dir->d_inode->i_mutex);

1723        audit_inode(pathname,path->dentry);

1724

1725        error = -EEXIST;

1726        if (open_flag & O_EXCL)

1727                goto exit_dput;

1728

1729        if (__follow_mount(path)) {

1730                error = -ELOOP;

1731                if (open_flag &O_NOFOLLOW)

1732                        goto exit_dput;

1733        }

1734

1735        error = -ENOENT;

1736        if (!path->dentry->d_inode)

1737                goto exit_dput;

1738

1739        if(path->dentry->d_inode->i_op->follow_link)

1740                return NULL;

1741

1742        path_to_nameidata(path, nd);

1743        error = -EISDIR;

1744        if(S_ISDIR(path->dentry->d_inode->i_mode))

1745                goto exit;

1746ok:

1747        filp = finish_open(nd, open_flag,acc_mode);

1748        return filp;

1749

1750exit_mutex_unlock:

1751        mutex_unlock(&dir->d_inode->i_mutex);

1752exit_dput:

1753        path_put_conditional(path, nd);

1754exit:

1755        if (!IS_ERR(nd->intent.open.file))

1756                release_open_intent(nd);

1757        path_put(&nd->path);

1758        return ERR_PTR(error);

1759}

---------------------------------------------------------------------

再对这些参数进行以下说明,nd指向的nameidatapath中存放的是路径名最后一个分量的父目录的路径path,其last字段中存放的是路径名最后一个分量的名字的信息qstr结构

(1)这个函数首先根据路径名的最后一个分量的名字信息,来采取一些动作。

如果最后一个分量是“..”则调用follow_dotdot(nd)返回上一级目录,并设置局部变量dirnd->path.dentry

若最后一个分量是“.,则检查nd->path.mnt->mnt_sb->s_type->fs_flagsFS_REVAL_DOT,若设置了该标志,则调用目录项的dir->d_op->d_revalidate(dir,nd)方法,若该方法失败,则释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-ESTALE

若最后一个分量为根目录。若设置了打开标志为O_CREAT,则释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-EISDIR

若为符号链接,则调用finish_open()来完成最后的打开文件操作,并返回file结构指针filpfinish_open()函数稍后解释。

这一步中处理那些最后一个分量的路径已经获得并保存在nd->path中或者最后一个分量为符号链接的情况。同时我们也可以看到,是可以直接使用open来打开目录的,但是不能创建目录。

 

(2)、若路径名的最后一个分量是以“/”结尾(这根据nd->last.name[nd->last.len]的值来判断,在link_path_walk()函数中求出nd->last值的相关部分可以看出,若已“/”结尾,则nd->last.name[nd->last.len]的值正是字符’ /’)的,则检查打开标志是否设置了O_CREAT,若是则释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-EISDIR;若没有则,设置nd查找标志nd->flagsLOOKUP_DIRECTORYLOOKUP_FOLLOW位。

 

(3)、若打开标志没有设置O_CREAT。则调用do_lookup(nd, &nd->last, path)来完成最路径名中最后一个分量路径的查找。

若返回错误码,则释放先前分配的file结构,减少nd->path的引用计数,并返回该错误码。

若查找的结果path->dentry->d_inodeNULL,则调用path_put_conditional(path, nd)来释放查找到的path->dentry,若最后一个分量表示的是挂载点则还要释放path->mnt。释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-ENOENT

若最后一个分量表示的是符号链接,则返回NULL

若设置了查找标志的LOOKUP_DIRECTORY位,则还有判断找到的是否为一个目录(通过检查path->dentry->d_inode->i_op->lookup),若不是目录,则调用path_put_conditional(path, nd)来释放查找到的path->dentry,若最后一个分量表示的是挂载点则还要释放path->mnt。释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-ENOTDIR

调用path_to_nameidata(path, nd)将使得nd->path中保存有路径名最后一个分量的路径。完成审计信息记录。

调用finish_open()来完成最后的打开文件操作,并返回file结构指针filpfinish_open()函数稍后解释。

 

(4)、打开标志设置了O_CREAT,若文件不存在则要创建的情况。首先要对父目录的inode上锁(mutex_lock(&dir->d_inode->i_mutex)),调用lookup_hash(nd)在目录项缓存中查找或者创建路径名最后一个分量的目录项。其定义为:

---------------------------------------------------------------------

fs/namei.c

1122static struct dentry *__lookup_hash(struct qstr *name,

1123                struct dentry *base, structnameidata *nd)

1124{

1125        struct dentry *dentry;

1126        struct inode *inode;

1127        int err;

1128

1129        inode = base->d_inode;

1130

1131        /*

1132         * See if the low-level filesystemmight want

1133         * to use its own hash..

1134         */

1135        if (base->d_op &&base->d_op->d_hash) {

1136                err =base->d_op->d_hash(base, name);

1137                dentry = ERR_PTR(err);

1138                if (err < 0)

1139                        goto out;

1140        }

1141

1142        dentry = __d_lookup(base, name);

1143

1144        /* lockess __d_lookup may fail due toconcurrent d_move()

1145         * in some unrelated directory, so trywith d_lookup

1146         */

1147        if (!dentry)

1148                dentry = d_lookup(base, name);

1149

1150        if (dentry && dentry->d_op&& dentry->d_op->d_revalidate)

1151                dentry = do_revalidate(dentry,nd);

1152

1153        if (!dentry) {

1154                struct dentry *new;

1155

1156                /* Don't create child dentryfor a dead directory. */

1157                dentry = ERR_PTR(-ENOENT);

1158                if (IS_DEADDIR(inode))

1159                        goto out;

1160

1161                new = d_alloc(base, name);

1162                dentry = ERR_PTR(-ENOMEM);

1163                if (!new)

1164                        goto out;

1165                dentry =inode->i_op->lookup(inode, new, nd);

1166                if (!dentry)

1167                        dentry = new;

1168                else

1169                        dput(new);

1170        }

1171out:

1172        return dentry;

1173}

 

1175/*

1176 * Restricted form of lookup. Doesn't followlinks, single-component only,

1177 * needs parent already locked. Doesn't followmounts.

1178 * SMP-safe.

1179 */

1180static struct dentry *lookup_hash(struct nameidata *nd)

1181{

1182        int err;

1183

1184        err =exec_permission(nd->path.dentry->d_inode);

1185        if (err)

1186                return ERR_PTR(err);

1187        return __lookup_hash(&nd->last,nd->path.dentry, nd);

1188}

---------------------------------------------------------------------

lookup_hash(nd)函数当在目录项缓存中没有找到要找的目录项时,会分配目录项,并且会调用父目录的inode->i_op->lookup(inode, new, nd)方法来创建所要查找的文件的inode等信息,并设置目录项的适当字段。但是在目录中没有所要查找的文件时,lookup(inode,new, nd)方法并不返回错误。

lookup_hash(nd)函数返回的结果被赋给path->dentrypath用来存放路径名最后一个分量的path结构。

初始化path->mnt为父目录的vfsmount对象。

 

(5)、检查path->dentry是否是一个错误码的指针形式,若是则对父目录的inode解锁(mutex_unlock(&dir->d_inode->i_mutex)),减少nd->path的引用计数,并返回该错误码。

 

(6)、检查nd->intent.open.file是否包含一个错误码,若是则首先对父目录的inode解锁,接着调用path_put_conditional(path, nd)来释放由lookup_hash(nd)查找到或创建的path->dentry,若最后一个分量表示的是挂载点则还要释放path->mnt。释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-ENOTDIR

 

(7)、检查path->dentry->d_inode是否为NULL,若是,则说明要创建一个文件。

首先调用mnt_want_write(nd->path.mnt),来确保在创建文件和在nameidata_to_filp()中通过struct file取得固定的写计数之间不会发生ro -> rw的转换。这个函数本质上增加vfsmount对象的写着计数器mnt->mnt_writers

 

调用__open_namei_create(nd, path, open_flag, mode)函数来创建一个文件,这个函数定义为:

---------------------------------------------------------------------

fs/namei.c

1502/*

1503 * Be careful about ever adding any morecallers of this

1504 * function. Its flags must be in the namei format, not

1505 * what get passed to sys_open().

1506 */

1507static int __open_namei_create(struct nameidata *nd, struct path *path,

1508                                int open_flag,int mode)

1509{

1510        int error;

1511        struct dentry *dir =nd->path.dentry;

1512

1513        if (!IS_POSIXACL(dir->d_inode))

1514                mode &= ~current_umask();

1515        error =security_path_mknod(&nd->path, path->dentry, mode, 0);

1516        if (error)

1517                goto out_unlock;

1518        error = vfs_create(dir->d_inode,path->dentry, mode, nd);

1519out_unlock:

1520        mutex_unlock(&dir->d_inode->i_mutex);

1521        dput(nd->path.dentry);

1522        nd->path.dentry = path->dentry;

1523        if (error)

1524                return error;

1525        /* Don't check for write permission,don't truncate */

1526        return may_open(&nd->path, 0,open_flag & ~O_TRUNC);

1527}

---------------------------------------------------------------------

__open_namei_create()函数在执行了访问权限检查后,就调用父目录inodecreate方法dir->i_op->create(dir, dentry,mode, nd)来创建文件。之后,__open_namei_create()解除对于父目录inode的锁定,释放父目录目录项,并将路径名最后一个分量目录项path->dentry赋给nd->path.dentry。然后返回对may_open(&nd->path,0, open_flag & ~O_TRUNC)调用的返回值。

 

调用nameidata_to_filp(nd)来将一个nameidata转换为一个打开的filp,这个函数本质上主要调用__dentry_open(nd->path.dentry,nd->path.mnt, filp,     NULL, cred)来根据当前进程的状态和获得的目录项来设置nd->intent.open.file所指向的file结构的各字段。

 

调用mnt_drop_write(nd->path.mnt)来减少nd->path.mnt写者计数器的值。

 

返回filp

 

(8)、尽管设置了打开标志的O_CREAT,但是却找到了所需的文件。则首先对父目录inode解锁。检查打开标志是否设置了O_EXCL,若是,则调用path_put_conditional(path, nd)来释放查找到的path->dentry,若最后一个分量表示的是挂载点则还要释放path->mnt。释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-EEXIST

 

调用__follow_mount(path)找到挂载在本路径上的文件系统,即vfsmount对象的地址和目录项对象地址。

 

调用finish_open()来完成打开草走。

 

(9)、返回filp

 

h.若最后一个分量是一个符号链接,则追踪符号链接。

 

i、返回filp

 

4、将文件安装在fd数组中。

 

5、释放路径名所占用的临时内存空间tmp

 

6 、返回文件描述符fd


close系统调用

close()系统调用

@font-face { font-family: "宋体";}@font-face { font-family: "@宋体";}p.MsoNormal, li.MsoNormal, div.MsoNormal { margin: 0cm 0cm 0.0001pt; text-align: justify; font-size: 10.5pt; font-family: "Times New Roman"; }div.Section1 { page: Section1; }

程序关闭打开的文件使用close()系统调用,它接收的参数为要关闭文件的文件描述符fdsys_close()服务例程定义如下:

---------------------------------------------------------------------

fs/open.c

/*

 * Careful here! We test whether the filepointer is NULL before

 * releasing the fd. This ensures that oneclone task can't release

 * an fd while another clone is opening it.

 */

SYSCALL_DEFINE1(close,unsigned int, fd)

{

   struct file * filp;

   struct files_struct *files =current->files;

   struct fdtable *fdt;

   int retval;

 

   spin_lock(&files->file_lock);

   fdt = files_fdtable(files);

   if (fd >= fdt->max_fds)

      goto out_unlock;

   filp = fdt->fd[fd];

   if (!filp)

      goto out_unlock;

   rcu_assign_pointer(fdt->fd[fd], NULL);

   FD_CLR(fd, fdt->close_on_exec);

   __put_unused_fd(files, fd);

   spin_unlock(&files->file_lock);

   retval = filp_close(filp, files);

 

   /* can't restart close syscall because filetable entry was cleared */

   if (unlikely(retval == -ERESTARTSYS ||

          retval == -ERESTARTNOINTR ||

          retval == -ERESTARTNOHAND ||

          retval == -ERESTART_RESTARTBLOCK))

      retval = -EINTR;

 

   return retval;

 

out_unlock:

   spin_unlock(&files->file_lock);

   return -EBADF;

}

---------------------------------------------------------------------

sys_close()服务例程执行下列操作:

1、获得存放在当前进程current->files->fdt->fd[fd]中的文件对象,如果它为NULL,则返回-EBADF

 

2、把current->files->fdt->fd[fd]置为NULL。释放文件描述符fd,这是通过清除current->files->fdt->close_on_exec字段相应的位及调用__put_unused_fd(files, fd)函数来进行的。__put_unused_fd()定义如下:

---------------------------------------------------------------------

fs/open.c

staticvoid __put_unused_fd(struct files_struct *files, unsigned int fd)

{

   struct fdtable *fdt = files_fdtable(files);

   __FD_CLR(fd, fdt->open_fds);

   if (fd < files->next_fd)

      files->next_fd = fd;

}

---------------------------------------------------------------------

这个函数清除current->files->fdt-> open_fds字段相应的位。然后对比fdfiles->next_fd,如果前者更小,则更新后者为前者。由此可见files_struct结构的next_fd中存放的是文件描述表中可用的最小的文件描述符。这个字段,一来可以为文件描述符的快速分配提供支持,二来则有助于缩小所需搜索的可用的文件描述符的范围。

 

3、调用filp_close(),该函数定义如下:

---------------------------------------------------------------------

fs/open.c

/*

 * "id" is the POSIX thread ID. Weuse the

 * files pointer for this..

 */

intfilp_close(struct file *filp, fl_owner_t id)

{

   int retval = 0;

 

   if (!file_count(filp)) {

      printk(KERN_ERR "VFS: Close: filecount is 0\n");

      return 0;

   }

 

   if (filp->f_op &&filp->f_op->flush)

      retval = filp->f_op->flush(filp,id);

 

   dnotify_flush(filp, id);

   locks_remove_posix(filp, id);

   fput(filp);

   return retval;

}

---------------------------------------------------------------------

该函数执行下列操作:

a.判断文件的引用计数是否为0,若是,则返回0

b.调用文件操作的flush方法(如果已定义)。

c.释放文件上的任何强制锁。参见后面“文件加锁”部分。

d.调用fput(filp)释放文件对象,该函数定义为:

---------------------------------------------------------------------

fs/file_table.c

/*__fput is called from task context when aio completion releases the last

 * last use of a struct file *. Do not use otherwise.

 */

void__fput(struct file *file)

{

   struct dentry *dentry =file->f_path.dentry;

   struct vfsmount *mnt = file->f_path.mnt;

   struct inode *inode = dentry->d_inode;

 

   might_sleep();

 

   fsnotify_close(file);

   /*

    * Thefunction eventpoll_release() should be the first called

    * inthe file cleanup chain.

    */

   eventpoll_release(file);

   locks_remove_flock(file);

 

   if (unlikely(file->f_flags & FASYNC)){

      if (file->f_op &&file->f_op->fasync)

          file->f_op->fasync(-1, file,0);

   }

   if (file->f_op &&file->f_op->release)

      file->f_op->release(inode, file);

   security_file_free(file);

   ima_file_free(file);

   if (unlikely(S_ISCHR(inode->i_mode)&& inode->i_cdev != NULL))

      cdev_put(inode->i_cdev);

   fops_put(file->f_op);

   put_pid(file->f_owner.pid);

   file_kill(file);

   if (file->f_mode & FMODE_WRITE)

      drop_file_write_access(file);

   file->f_path.dentry = NULL;

   file->f_path.mnt = NULL;

   file_free(file);

   dput(dentry);

   mntput(mnt);

}

 

voidfput(struct file *file)

{

   if(atomic_long_dec_and_test(&file->f_count))

      __fput(file);

}

---------------------------------------------------------------------

在这个函数中,监测与文件有关的file对象、目录项对象及vfsmount对象的引用计数,若引用计数为0,且条件合适,则将它们归还给相应的slab缓存。

 

4 、返回0,或一个出错码。出错码可由flush方法或文件中的前一个写操作错误产生。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值