函数原型:
struct dirent *readdir(DIR *dirp);
首先纠正一个很多人都错误理解的事实,readdir不是系统调用,它是glibc的封装函数,而且readdir系统调用是存在的,原型如下:
int readdir(unsigend int fd, struct old_linux_dirent *dirp, unsigned int count);
glibc的readdir所调用的系统调用不是readdir而是getdents系统调用。此处说明一下为什么采用封装getdents而不是readdir系统调用,最重要的一个理由是readdir系统调用每次只会读入一个目录项,而getdents会一下子读入尽可能多的目录项至缓冲。我先分析readdir系统调用的实现,具体的代码如下:
1 SYSCALL_DEFINE3(old_readdir, unsigned int, fd,2 struct old_linux_dirent __user *, dirent, unsigned int, count)3 {4 interror;5 struct fd f =fdget(fd);6 struct readdir_callback buf ={7 .ctx.actor =fillonedir,8 .dirent =dirent9 };10
11 if (!f.file)12 return -EBADF;13
14 error = iterate_dir(f.file, &buf.ctx);15 if(buf.result)16 error =buf.result;17
18 fdput(f);19 returnerror;20 }
6-9行:设置目录项填充函数为fillonedir,filonedir的具体实现不分析,只需知道每次只填充一个目录项即可(作为替代,将会分析更加复杂的filldir函数)
14行:iterate_dir是vfs的封装函数,该函数调用具体的文件系统的iterate函数填充目录
注: 3.11之前并不使用iterate作为读目录的函数而是使用readdir函数
总结:readdir系统调用忽略了count参数并且每次只读一个目录项
接下来分析glibc的readdir函数实现,这个过程可能比较复杂,有兴趣的可以看看,首先给出readdir的实现:
1 DIRENT_TYPE *
2 __READDIR (DIR *dirp)3 {4 DIRENT_TYPE *dp;5 int saved_errno =errno;6
7 #ifndef NOT_IN_libc8 __libc_lock_lock (dirp->lock);9 #endif
10
11 do
12 {13 size_t reclen;14
15 if (dirp->offset >= dirp->size)16 {17 /*We've emptied out our buffer. Refill it.*/
18
19 size_t maxread;20 ssize_t bytes;21
22 #ifndef _DIRENT_HAVE_D_RECLEN23 /*Fixed-size struct; must read one at a time (see below).*/
24 maxread = sizeof *dp;25 #else
26 maxread = dirp->allocation;27 #endif
28
29 bytes = __GETDENTS (dirp->fd, dirp->data, maxread);30 if (bytes <= 0)31 {32 /*On some systems getdents fails with ENOENT when the33 open directory has been rmdir'd already. POSIX.134 requires that we treat this condition like normal EOF.*/
35 if (bytes < 0 && errno ==ENOENT)36 bytes = 0;37
38 /*Don't modifiy errno when reaching EOF.*/
39 if (bytes == 0)40 __set_errno (saved_errno);41 dp =NULL;42 break;43 }44 dirp->size =(size_t) bytes;45
46 /*Reset the offset into the buffer.*/
47 dirp->offset = 0;48 }49
50 dp = (DIRENT_TYPE *) &dirp->data[dirp->offset];51
52 #ifdef _DIRENT_HAVE_D_RECLEN53 reclen = dp->d_reclen;54 #else
55 /*The only version of `struct dirent*' that lacks `d_reclen'56 is fixed-size.*/
57 assert (sizeof dp->d_name > 1);58 reclen = sizeof *dp;59 /*The name is not terminated if it is the largest possible size.60 Clobber the following byte to ensure proper null termination. We61 read jst one entry at a time above so we know that byte will not62 be used later.*/
63 dp->d_name[sizeof dp->d_name] = '\0';64 #endif
65
66 dirp->offset +=reclen;67
68 #ifdef _DIRENT_HAVE_D_OFF69 dirp->filepos = dp->d_off;70 #else
71 dirp->filepos +=reclen;72 #endif
73
74 /*Skip deleted files.*/
75 } while (dp->d_ino == 0);76 #ifndef NOT_IN_libc77 __libc_lock_unlock (dirp->lock);78 #endif
79
80 returndp;81 }
7-9行:加锁互斥量
11-75行:一个do while循环,该循环用于过滤已经删除的目录项
15-48行:具体的读目录项代码,调用getdents系统调用尽可能多的读入目录项至dirp->data缓冲区
总结:代码并不是特别负责,自己阅读应该可以理解。。readdir函数的逻辑是分配的一个缓冲区,然后每次尽可能多的读取目录项至缓冲区,然后从缓冲区读,读完了继续调用getdents读目录项至缓冲区
接下来分析最重要的getdents系统调用,代码如下:
1 SYSCALL_DEFINE3(getdents, unsigned int, fd,2 struct linux_dirent __user *, dirent, unsigned int, count)3 {4 structfd f;5 struct linux_dirent __user *lastdirent;6 struct getdents_callback buf ={7 .ctx.actor =filldir,8 .count =count,9 .current_dir =dirent10 };11 interror;12
13 if (!access_ok(VERIFY_WRITE, dirent, count))14 return -EFAULT;15
16 f =fdget(fd);17 if (!f.file)18 return -EBADF;19
20 error = iterate_dir(f.file, &buf.ctx);21 if (error >= 0)22 error =buf.error;23 lastdirent =buf.previous;24 if(lastdirent) {25 if (put_user(buf.ctx.pos, &lastdirent->d_off))26 error = -EFAULT;27 else
28 error = count -buf.count;29 }30 fdput(f);31 returnerror;32 }
6-9行:设置填充函数为filldir,等会分析该函数
20: 调用iterate_dir函数,该函数会调用具体的文件系统中的iterate函数,接下来作为例子给出PFS的实现(PFS是本人设计的一个文件系统,PFS的linux driver有兴趣的可以去https://sourceforge.net/projects/pfspfs看看)
总结:在分析了iterate和filldir后在分析getdents系统调用
iterate源码如下:(此处采用pfs的实现)
1 static int
2 pfs_readdir(struct file *file, struct dir_context *ctx)3 {4 int64_t dno;5 unsigned longoff;6 struct buffer_head *bh;7 struct pfs_dir_entry *de;8 struct inode *inode =file_inode(file);9
10 if(ctx->pos == 0)11 ctx->pos = PFS_DIRHASHSIZ * sizeof(int64_t) + sizeof(int64_t);12 for(off = ctx->pos & (PFS_BLOCKSIZ - 1); ctx->pos < inode->i_size; off = ctx->pos & (PFS_BLOCKSIZ - 1)){13 if(!(dno = pfs_get_block_number(inode, pfs_block_number(ctx->pos), 0)))14 gotoskip;15 if(!(bh = sb_bread(inode->i_sb, dno /PFS_STRS_PER_BLOCK))){16 pr_err("pfs: device %s: %s: failed to read block %lld of dir %lld\n",17 inode->i_sb->s_id, "pfs_readdir", pfs_block_number(ctx->pos), PFS_I(inode)->i_ino);18 gotoskip;19 }20 do{21 de = (struct pfs_dir_entry *)((char *)bh->b_data +off);22 if(de->d_ino){23 if(!(dir_emit(ctx, pfs_get_de_name(de), de->d_len, (int32_t)le64_to_cpu(de->d_ino), DT_UNKNOWN))){24 brelse(bh);25 return 0;26 }27 }28 off +=pfs_get_de_size(de);29 ctx->pos +=pfs_get_de_size(de);30 }while(off < PFS_BLOCKSIZ && ctx->pos < inode->i_size);31 brelse(bh);32 continue;33 skip:34 ctx->pos += PFS_BLOCKSIZ -off;35 }36 return 0;37 }
12-30行:代码完整的分析可能需要读者熟悉linux的内核,因此此处不给出代码的具体分析而给出代码的逻辑,pfs_readdir不断的读目录项,然后调用dir_emit填充目录项直到dir_emit调用失败,dir_emit是一个封装函数,实现为filldir() == 0,所以在filldir成功时dir_emit返回1,在失败时返回0
总结:不同的文件系统的目录的iterate都不同,不过大体都是差不多的,都是读目录项,然后调用dir_emit函数填充至用户空间
filldir函数的代码如下:
1 static int filldir(struct dir_context *ctx, const char *name, intnamlen,2 loff_t offset, u64 ino, unsigned intd_type)3 {4 struct linux_dirent __user *dirent;5 struct getdents_callback *buf =
6 container_of(ctx, structgetdents_callback, ctx);7 unsigned longd_ino;8 int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,9 sizeof(long));10
11 buf->error = -EINVAL; /*only used if we fail..*/
12 if (reclen > buf->count)13 return -EINVAL;14 d_ino =ino;15 if (sizeof(d_ino) < sizeof(ino) && d_ino !=ino) {16 buf->error = -EOVERFLOW;17 return -EOVERFLOW;18 }19 dirent = buf->previous;20 if(dirent) {21 if (__put_user(offset, &dirent->d_off))22 gotoefault;23 }24 dirent = buf->current_dir;25 if (__put_user(d_ino, &dirent->d_ino))26 gotoefault;27 if (__put_user(reclen, &dirent->d_reclen))28 gotoefault;29 if (copy_to_user(dirent->d_name, name, namlen))30 gotoefault;31 if (__put_user(0, dirent->d_name +namlen))32 gotoefault;33 if (__put_user(d_type, (char __user *) dirent + reclen - 1))34 gotoefault;35 buf->previous =dirent;36 dirent = (void __user *)dirent +reclen;37 buf->current_dir =dirent;38 buf->count -=reclen;39 return 0;40 efault:41 buf->error = -EFAULT;42 return -EFAULT;43 }
函数解释:filldir设置上一个填充的目录项的d_off为当前的偏移,然后填充当前的目录项,设置buf->previous为dirent,然后将buf->current指向下一个可用的空间
总结:最后作为一个考虑整个过程的示例,给出telldir函数的解释。telldir返回dirp->filepos, dirp->filepos在glibc的getdents函数中设置,dirp->filepos = dp->d_off,而正如在filldir的20到23行所示,dp->d_off是读下一个目录项时的偏移。