Linux内核源代码情景分析-文件的打开
打开文件的系统调用是open(),在内核中通过sys_open()实现,假设filename是"/usr/local/hello.c",且假设这个文件已经存在,代码如下:
asmlinkage long sys_open(const char * filename, int flags, int mode)
{
char * tmp;
int fd, error;
#if BITS_PER_LONG != 32
flags |= O_LARGEFILE;
#endif
tmp = getname(filename);//从用户空间把文件的路径名拷贝到系统空间
fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
fd = get_unused_fd();//从当前进程的"打开文件表"中找到一个空闲的表项,该表项的下标即为"打开文件号"
if (fd >= 0) {
struct file *f = filp_open(tmp, flags, mode);//获得一个关联文件的file结构
error = PTR_ERR(f);
if (IS_ERR(f))
goto out_error;
fd_install(fd, f);//将新建的file数据结构的指针"安装"到当前进程的file_struct结构中
}
out:
putname(tmp);
}
return fd;//最后返回文件号
out_error:
put_unused_fd(fd);
fd = error;
goto out;
}
get_unused_fd,从当前进程的"打开文件表"中找到一个空闲的表项,该表项的下标即为"打开文件号",代码如下:
int get_unused_fd(void)
{
struct files_struct * files = current->files;
int fd, error;
error = -EMFILE;
write_lock(&files->file_lock);
repeat:
fd = find_next_zero_bit(files->open_fds,
files->max_fdset,
files->next_fd);//在open_fds中,找到空闲打开文件号
/*
* N.B. For clone tasks sharing a files structure, this test
* will limit the total number of files that can be opened.
*/
if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
/* Do we need to expand the fdset array? */
if (fd >= files->max_fdset) {//如果位图容量不够,则扩展
error = expand_fdset(files, fd);
if (!error) {
error = -EMFILE;
goto repeat;
}
goto out;
}
/*
* Check whether we need to expand the fd array.
*/
if (fd >= files->max_fds) {//如果file结构指针数组的容量不够,则扩展
error = expand_fd_array(files, fd);
if (!error) {
error = -EMFILE;
goto repeat;
}
goto out;
}
FD_SET(fd, files->open_fds);//置位,下次就找不到了
FD_CLR(fd, files->close_on_exec);
files->next_fd = fd + 1;//下一个打开文件号加1
#if 1
/* Sanity check */
if (files->fd[fd] != NULL) {
printk("get_unused_fd: slot %d not NULL!\n", fd);
files->fd[fd] = NULL;
}
#endif
error = fd;
out:
write_unlock(&files->file_lock);
return error;
}struct files_struct {
atomic_t count;
rwlock_t file_lock;
int max_fds; //当前file结构指针数组的容量
int max_fdset;//位图的容量
int next_fd; //下个打开文件号
struct file ** fd;//指向了fd_array
fd_set *close_on_exec; //指向了close_on_exec_init
fd_set *open_fds; //指向了open_fds_init
fd_set close_on_exec_init;
fd_set open_fds_init;
struct file * fd_array[NR_OPEN_DEFAULT];
};
获得了打开文件号以后,filp_open来获得一个file结构,首先列出file结构如下:
struct file {
struct list_headf_list;
struct dentry*f_dentry;//指向文件的dentry结构的指针f_dentry
struct vfsmount *f_vfsmnt;//指向将文件所在设备安装在文件系统中的vfsmnt结构的指针
struct file_operations*f_op;
atomic_tf_count;
unsigned int f_flags;
mode_tf_mode;
loff_tf_pos;//当前的读写位置
unsigned long f_reada, f_ramax, f_raend, f_ralen, f_rawin;
struct fown_structf_owner;
unsigned intf_uid, f_gid;
intf_error;
unsigned longf_version;
/* needed for tty driver, and maybe others */
void*private_data;
}; file_open,代码如下:
struct file *filp_open(const char * filename, int flags, int mode)
{
int namei_flags, error;
struct nameidata nd;
namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;
if (namei_flags & O_TRUNC)
namei_flags |= 2;
error = open_namei(filename, namei_flags, mode, &nd);//获得nd->dentry结构
if (!error)
return dentry_open(nd.dentry, nd.mnt, flags);//根据nd->dentry结构填充file结构
return ERR_PTR(error);
}int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
{
int acc_mode, error = 0;
struct inode *inode;
struct dentry *dentry;
struct dentry *dir;
int count = 0;
acc_mode = ACC_MODE(flag);
/*
* The simplest case - just a plain lookup.
*/
if (!(flag & O_CREAT)) {//假设flag为O_CREATE,如果文件不存在就创建
if (path_init(pathname, lookup_flags(flag), nd))
error = path_walk(pathname, nd);
if (error)
return error;
dentry = nd->dentry;
goto ok;
}
/*
* Create - we need to know the parent.
*/
if (path_init(pathname, LOOKUP_PARENT, nd))
error = path_walk(pathname, nd);//找到父节点
if (error)
return error;
/*
* We have the parent and last component. First of all, check
* that we are not asked to creat(2) an obvious directory - that
* will not do.
*/
error = -EISDIR;
if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])//虽然nd->dentry保存的是父节点的dentry结构,而nd->last保存的是最后一个节点的名字,nd->last_type保存的是最后一个节点的类型;这里确保last_type是LAST_NORM,且last节点名必须以/0结尾
goto exit;
dir = nd->dentry;
down(&dir->d_inode->i_sem);
dentry = lookup_hash(&nd->last, nd->dentry);//寻找最后一个节点的dentry结构
do_last:
error = PTR_ERR(dentry);
if (IS_ERR(dentry)) {
up(&dir->d_inode->i_sem);
goto exit;
}
/* Negative dentry, just create the file */
if (!dentry->d_inode) {//我们假设最后一个节点存在,也就是inode结构存在
error = vfs_create(dir->d_inode, dentry, mode);
up(&dir->d_inode->i_sem);
dput(nd->dentry);
nd->dentry = dentry;
if (error)
goto exit;
/* Don't check for write permission, don't truncate */
acc_mode = 0;
flag &= ~O_TRUNC;
goto ok;
}
/*
* It already exists.
*/
up(&dir->d_inode->i_sem);
error = -EEXIST;
if (flag & O_EXCL)
goto exit_dput;
if (d_mountpoint(dentry)) {//是否是挂载点
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
do __follow_down(&nd->mnt,&dentry); while(d_mountpoint(dentry));
}
error = -ENOENT;
if (!dentry->d_inode)
goto exit_dput;
if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
goto do_link;
dput(nd->dentry);
nd->dentry = dentry;//最后一个节点的dentry结构保存在nd->dentry中
error = -EISDIR;
if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
goto exit;
ok:
error = -ENOENT;//往下暂不关心
inode = dentry->d_inode;
if (!inode)
goto exit;
error = -ELOOP;
if (S_ISLNK(inode->i_mode))
goto exit;
error = -EISDIR;
if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
goto exit;
error = permission(inode,acc_mode);
if (error)
goto exit;
/*
* FIFO's, sockets and device files are special: they don't
* actually live on the filesystem itself, and as such you
* can write to them even if the filesystem is read-only.
*/
if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
flag &= ~O_TRUNC;
} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
error = -EACCES;
if (IS_NODEV(inode))
goto exit;
flag &= ~O_TRUNC;
} else {
error = -EROFS;
if (IS_RDONLY(inode) && (flag & 2))
goto exit;
}
/*
* An append-only file must be opened in append mode for writing.
*/
error = -EPERM;
if (IS_APPEND(inode)) {
if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
goto exit;
if (flag & O_TRUNC)
goto exit;
}
/*
* Ensure there are no outstanding leases on the file.
*/
error = get_lease(inode, flag);
if (error)
goto exit;
if (flag & O_TRUNC) {
error = get_write_access(inode);
if (error)
goto exit;
/*
* Refuse to truncate files with mandatory locks held on them.
*/
error = locks_verify_locked(inode);
if (!error) {
DQUOT_INIT(inode);
error = do_truncate(dentry, 0);
}
put_write_access(inode);
if (error)
goto exit;
} else
if (flag & FMODE_WRITE)
DQUOT_INIT(inode);
return 0;
exit_dput:
dput(dentry);
exit:
path_release(nd);
return error;
do_link:
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
/*
* This is subtle. Instead of calling do_follow_link() we do the
* thing by hands. The reason is that this way we have zero link_count
* and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
* After that we have the parent and last component, i.e.
* we are in the same situation as after the first path_walk().
* Well, almost - if the last component is normal we get its copy
* stored in nd->last.name and we will have to putname() it when we
* are done. Procfs-like symlinks just set LAST_BIND.
*/
UPDATE_ATIME(dentry->d_inode);
error = dentry->d_inode->i_op->follow_link(dentry, nd);
dput(dentry);
if (error)
return error;
if (nd->last_type == LAST_BIND) {
dentry = nd->dentry;
goto ok;
}
error = -EISDIR;
if (nd->last_type != LAST_NORM)
goto exit;
if (nd->last.name[nd->last.len]) {
putname(nd->last.name);
goto exit;
}
if (count++==32) {
dentry = nd->dentry;
putname(nd->last.name);
goto ok;
}
dir = nd->dentry;
down(&dir->d_inode->i_sem);
dentry = lookup_hash(&nd->last, nd->dentry);
putname(nd->last.name);
goto do_last;
}
返回file_open,继续执行dentry_open,来填充file结构,代码如下:struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
{
struct file * f;
struct inode *inode;
int error;
error = -ENFILE;
f = get_empty_filp();//分配一个空闲的file数据结构
if (!f)
goto cleanup_dentry;
f->f_flags = flags;
f->f_mode = (flags+1) & O_ACCMODE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
if (error)
goto cleanup_file;
}
f->f_dentry = dentry;//该节点的dentry结构
f->f_vfsmnt = mnt;//该节点的vfsmount结构
f->f_pos = 0;
f->f_reada = 0;
f->f_op = fops_get(inode->i_fop);//f->f_op被赋值为inode_i_fop
if (inode->i_sb)
file_move(f, &inode->i_sb->s_files);//将其从中间队列脱链而挂入该文件所在设备的super_block结构中的file结构队列s_files
if (f->f_op && f->f_op->open) {
error = f->f_op->open(inode,f);
if (error)
goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
return f;
cleanup_all:
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITE)
put_write_access(inode);
f->f_dentry = NULL;
f->f_vfsmnt = NULL;
cleanup_file:
put_filp(f);
cleanup_dentry:
dput(dentry);
mntput(mnt);
return ERR_PTR(error);
} get_empty_filp,分配一个空闲的file数据结构。内核中有一个空闲file结构的队列free_list,需要file结构时就从该队列中摘下一个,并将其暂时挂入一个中间队列anon_list。在确认了对该文件可以进行写操作以后,就对这个空闲file结构进行初始化。然后通过file_move()将其从中间队列脱链而挂入该文件所在设备的super_block结构中的file结构队列s_files。
struct file * get_empty_filp(void)
{
static int old_max = 0;
struct file * f;
file_list_lock();
if (files_stat.nr_free_files > NR_RESERVED_FILES) {
used_one:
f = list_entry(free_list.next, struct file, f_list);
list_del(&f->f_list);//内核中有一个空闲file结构的队列free_list,需要file结构时就从该队列中摘下一个
files_stat.nr_free_files--;
new_one:
memset(f, 0, sizeof(*f));
atomic_set(&f->f_count,1);
f->f_version = ++event;
f->f_uid = current->fsuid;
f->f_gid = current->fsgid;
list_add(&f->f_list, &anon_list);//并将其暂时挂入一个中间队列anon_list
file_list_unlock();
return f;
}
/*
* Use a reserved one if we're the superuser
*/
if (files_stat.nr_free_files && !current->euid)
goto used_one;
/*
* Allocate a new one if we're below the limit.
*/
if (files_stat.nr_files < files_stat.max_files) {
file_list_unlock();
f = kmem_cache_alloc(filp_cachep, SLAB_KERNEL);
file_list_lock();
if (f) {
files_stat.nr_files++;
goto new_one;
}
/* Big problems... */
printk("VFS: filp allocation failed\n");
} else if (files_stat.max_files > old_max) {
printk("VFS: file-max limit %d reached\n", files_stat.max_files);
old_max = files_stat.max_files;
}
file_list_unlock();
return NULL;
} 至此,filp_open分析完成,返回到sys_open,执行fd_install,将新建的file数据结构的指针"安装"到当前进程的file_struct结构中,代码如下:
static inline void fd_install(unsigned int fd, struct file * file)
{
struct files_struct *files = current->files;
write_lock(&files->file_lock);
if (files->fd[fd])
BUG();
files->fd[fd] = file;
write_unlock(&files->file_lock);
}