先看函数原型asmlinkage long sys_open(const char __user *filename, int flags, int mode)在fs/open.c。
代码一:
asmlinkage long sys_open(const char __user *filename, int flags, int mode)
{
long ret;
if (force_o_largefile())
flags |= O_LARGEFILE;
ret = do_sys_open(AT_FDCWD, filename, flags, mode); //返回的fd,fd其实是fdt->fd[fd]的索引
/* avoid REGPARM breakage on x86: */
prevent_tail_call(ret);
return ret;
}
这函数的重点是do_sys_open,看ret的赋值类型,就大致能猜到这个函数的作用是返回文件描述符fd。
代码二:
long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
{
char *tmp = getname(filename);
int fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
fd = get_unused_fd();
if (fd >= 0) {
struct file *f = do_filp_open(dfd, tmp, flags, mode); //根据名字去打开当前fs中一个file结构
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
} else {
fsnotify_open(f->f_path.dentry);
fd_install(fd, f); //将fdt->fd[fd]指向f
}
}
putname(tmp);
}
return fd;
}
上面那个函数我们已经猜到返回的fd是我们需要的,那么这个函数的两个重点是1、do_file_open,2、fd_install
代码三:
static struct file *do_filp_open(int dfd, const char *filename, int flags,
int mode)
{
int namei_flags, error;
struct nameidata nd;
namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;
error = open_namei(dfd, filename, namei_flags, mode, &nd); //根据filename赋值nd的mnt,dentry last
if (!error)
return nameidata_to_filp(&nd, flags); // nd->intent.open.file ,返回一个file
return ERR_PTR(error);
}
根据代码二中,其实可以看出,do_file_open,其目的是对file进行赋值。那么代码三的重点,即为open_namei和nameidata_to_file
代码四:
open_namei原函数有点长,为了方便观察,裁掉一些看上去不影响分析的代码。函数原型在fs/namei.c
int open_namei(int dfd, const char *pathname, int flag,
int mode, struct nameidata *nd)
{
if (!(flag & O_CREAT)) {
error = path_lookup_open(dfd, pathname, lookup_flags(flag),
nd, flag);
if (error)
return error;
goto ok;
}
error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); //这里的nd->dentry为父节点的
//比如/dev/console 这里就是dev的dentry,nd->last中存放着console的hash和name
if (error)
return error;
/*
* We have the parent and last component. First of all, check
* that we are not asked to creat(2) an obvious directory - that
* will not do.
*/
error = -EISDIR;
if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
goto exit;
dir = nd->dentry;
nd->flags &= ~LOOKUP_PARENT;
mutex_lock(&dir->d_inode->i_mutex);
path.dentry = lookup_hash(nd); //根据last的hash去查找nd->dentry上的节点
//,没有的话生成一个新的dentry,挂载nd->dentry上
//可以通过dentry->subdirs查找
path.mnt = nd->mnt;
do_last:
error = PTR_ERR(path.dentry);
if (IS_ERR(path.dentry)) {
mutex_unlock(&dir->d_inode->i_mutex);
goto exit;
}
if (IS_ERR(nd->intent.open.file)) {
mutex_unlock(&dir->d_inode->i_mutex);
error = PTR_ERR(nd->intent.open.file);
goto exit_dput;
}
/* Negative dentry, just create the file */
if (!path.dentry->d_inode) {
error = open_namei_create(nd, &path, flag, mode);
if (error)
goto exit;
return 0;
}
/*
* It already exists.
*/
mutex_unlock(&dir->d_inode->i_mutex);
audit_inode_update(path.dentry->d_inode);
error = -EEXIST;
if (flag & O_EXCL)
goto exit_dput;
if (__follow_mount(&path)) {
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
}
error = -ENOENT;
if (!path.dentry->d_inode)
goto exit_dput;
if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
goto do_link;
path_to_nameidata(&path, nd);
error = -EISDIR;
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
goto exit;
ok:
error = may_open(nd, acc_mode, flag);
if (error)
goto exit;
return 0;
exit_dput:
dput_path(&path, nd);
exit:
if (!IS_ERR(nd->intent.open.file))
release_open_intent(nd);
path_release(nd);
return error;
do_link:
……//这里的一段代码,不想搭理
goto do_last;
}
可以观察到的是上面这个函数,的重点无非是path_lookup_create,而这个函数无非是对nd这个参数进行修改。好几层剥下去后,到了__path_lookup_intent_open这个函数。
代码五
static int __path_lookup_intent_open(int dfd, const char *name,
unsigned int lookup_flags, struct nameidata *nd,
int open_flags, int create_mode)
{
struct file *filp = get_empty_filp();
int err;
if (filp == NULL)
return -ENFILE;
nd->intent.open.file = filp;
nd->intent.open.flags = open_flags;
nd->intent.open.create_mode = create_mode;
err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); //
if (IS_ERR(nd->intent.open.file)) {
if (err == 0) {
err = PTR_ERR(nd->intent.open.file);
path_release(nd);
}
} else if (err != 0)
release_open_intent(nd);
return err;
这个看到了令人激动的file类型的参数,get_empty_file,看名字就知道是生成一个全新的file结构指针。然后do_path_lookup,在前面mknod的扯淡里已经有了一个大致的说明。nd->dentry,mnt为最后一个路径,nd->last中保存文件信息。比如/dev/console,那么nd->dentry->d_name.name="dev",nd->last.name="console",last里的hash也是console。
再返回到代码四中,通过 lookup_hash(nd),nd->dentry其实已经挂了一个dentry->d_name.name="console",dentry->d_parent=nd->dentry。
再返回到代码三中。
代码六
struct file *nameidata_to_filp(struct nameidata *nd, int flags)
{
struct file *filp;
/* Pick up the filp from the open intent */
filp = nd->intent.open.file; //filp指向前面开辟出来的
/* Has the filesystem initialised the file for us? */
if (filp->f_path.dentry == NULL)
filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
else
path_release(nd);
return filp;
}
dentry_open
代码七
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int flags, struct file *f,
int (*open)(struct inode *, struct file *))
{
struct inode *inode;
int error;
f->f_flags = flags;
f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
if (error)
goto cleanup_file;
}
f->f_mapping = inode->i_mapping;
f->f_path.dentry = dentry;
f->f_path.mnt = mnt;
f->f_pos = 0;
f->f_op = fops_get(inode->i_fop); //ops赋值,文件操作,就是inode->i_fop
file_move(f, &inode->i_sb->s_files);
if (!open && f->f_op)
open = f->f_op->open; //open函数
if (open) {
error = open(inode, f); //打开inode
if (error)
goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
/* NB: we're sure to have correct a_ops only after f_op->open */
if (f->f_flags & O_DIRECT) {
if (!f->f_mapping->a_ops ||
((!f->f_mapping->a_ops->direct_IO) &&
(!f->f_mapping->a_ops->get_xip_page))) {
fput(f);
f = ERR_PTR(-EINVAL);
}
}
return f;
cleanup_all:
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITE)
put_write_access(inode);
file_kill(f);
f->f_path.dentry = NULL;
f->f_path.mnt = NULL;
cleanup_file:
put_filp(f);
dput(dentry);
mntput(mnt);
return ERR_PTR(error);
}
这个函数就是对这个file进行更加深入的初始化,其中error = open(inode, f);
这一步,如果这个inode有打开操作函数的话,就进行打开操作。
代码三返回了相对应的file到代码二中,我们最后要得到的fd,而非file,所以还要再进行操作fd_install(fd, f);
void fastcall fd_install(unsigned int fd, struct file * file)
{
struct files_struct *files = current->files;
struct fdtable *fdt;
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
BUG_ON(fdt->fd[fd] != NULL);
rcu_assign_pointer(fdt->fd[fd], file); //fdt->fd[fd]指向file
spin_unlock(&files->file_lock);
}
这里我们可以看出,fd是怎么来着了,fdt先去查当前线程中的files->fdt->fd[table]这张表中,第一个空闲的元素,查到后,再将fdt->fd[fdt]指向刚修改过的file。
这样就返回了相应的fd了。