Linux内核源代码情景分析-特殊文件系统/proc-对/proc/self/cwd的访问
继上篇文章Linux内核源代码情景分析-特殊文件系统/proc,我们对/proc/loadavg访问后,这篇文章是对/proc/self/cwd的访问。
int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
{
char *tmp;
int err;
tmp = getname(name);//在系统空间分配一个页面,并从用户空间把文件名复制到这个页面
err = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
err = 0;
if (path_init(tmp, flags, nd))
err = path_walk(tmp, nd);
putname(tmp);
}
return err;
} name就为/proc/self/cwd,重要分析下path_walk函数,请参考Linux内核源代码情景分析-从路径名到目标节点。
第一次循环path_walk发现/proc是个安装节点而通过_follow_down找到了proc文件系统的根节点的dentry结构,nameidata结构中的指针dentry指向这个数据结构。、
第二次循环搜索路径名中的下一个节点self,由于这个节点并不是路径名的最后一个节点,所以执行的代码如下:
dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);//在内存中寻找该节点业已建立的dentry结构
if (!dentry) {//如果没有找到
dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);//那么就要建立该节点的dentry结构
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
} 参考Linux内核源代码情景分析-特殊文件系统/proc,最终也要通过proc_root_lookup()调用proc_lookup(),试图为节点建立起其dentry结构和inode结构。可是由于/proc/self并没有一个固定的proc_dir_entry结构,所以对proc_lookup()的调用必然会失败,因而会进一步调用proc_pid_lookup(),代码如下:
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry)
{
if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */
int nlink = proc_root.nlink;
nlink += nr_threads;
dir->i_nlink = nlink;
}
if (!proc_lookup(dir, dentry))///由于/proc/self并没有一个固定的proc_dir_entry结构,所以对proc_lookup()的调用必然会失败
return NULL;
return proc_pid_lookup(dir, dentry);//会调用这个函数
}struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry)
{
struct inode *inode;
struct proc_dir_entry * de;
int error;
error = -ENOENT;
inode = NULL;
de = (struct proc_dir_entry *) dir->u.generic_ip;
if (de) {//找不到/proc/self节点
for (de = de->subdir; de ; de = de->next) {
if (!de || !de->low_ino)
continue;
if (de->namelen != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
int ino = de->low_ino;
error = -EINVAL;
inode = proc_get_inode(dir->i_sb, ino, de);
break;
}
}
}
if (inode) {
dentry->d_op = &proc_dentry_operations;
d_add(dentry, inode);
return NULL;
}
return ERR_PTR(error);//返回错误码
}
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
{
unsigned int pid, c;
struct task_struct *task;
const char *name;
struct inode *inode;
int len;
pid = 0;
name = dentry->d_name.name;
len = dentry->d_name.len;
if (len == 4 && !memcmp(name, "self", 4)) {//执行这里,name等于self
inode = new_inode(dir->i_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = fake_ino(0, PROC_PID_INO);
inode->u.proc_i.file = NULL;
inode->u.proc_i.task = NULL;
inode->i_mode = S_IFLNK|S_IRWXUGO;
inode->i_uid = inode->i_gid = 0;
inode->i_size = 64;
inode->i_op = &proc_self_inode_operations;
d_add(dentry, inode);
return NULL;//返回了
}
while (len-- > 0) {
c = *name - '0';
name++;
if (c > 9)
goto out;
if (pid >= MAX_MULBY10)
goto out;
pid *= 10;
pid += c;
if (!pid)
goto out;
}
read_lock(&tasklist_lock);
task = find_task_by_pid(pid);
if (task)
get_task_struct(task);
read_unlock(&tasklist_lock);
if (!task)
goto out;
inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);
free_task_struct(task);
if (!inode)
goto out;
inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
inode->i_op = &proc_base_inode_operations;
inode->i_fop = &proc_base_operations;
inode->i_nlink = 3;
inode->i_flags|=S_IMMUTABLE;
dentry->d_op = &pid_base_dentry_operations;
d_add(dentry, inode);
return NULL;
out:
return ERR_PTR(-ENOENT);
} 其中proc_self_inode_operations结构定义如下:
static struct inode_operations proc_self_inode_operations = {
readlink:proc_self_readlink,
follow_link:proc_self_follow_link,
}; 还是第二轮循环,从proc_root_lookup返回到path_walk中以后,接着要检查和处理两件事,第一件是新找到的节点是否为安装点;第二件就是它是否是一个连接节点。这正是我们在这里所关心的,因为/proc/self就是个连接节点。继续看path_walk,代码如下:
if (inode->i_op->follow_link) {//看看这个指针是否为NULL,这个指针是在ext2_read_inode中设置的
err = do_follow_link(dentry, nd);static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)//参数dentry为proc/self节点的dentry结构
{
int err;
if (current->link_count >= 8)
goto loop;
current->link_count++;
UPDATE_ATIME(dentry->d_inode);
err = dentry->d_inode->i_op->follow_link(dentry, nd);//proc_self_follow_link
current->link_count--;
return err;
loop:
path_release(nd);
return -ELOOP;
} entry->d_inode->i_op->follow_link指向proc_self_follow_link,代码如下:
static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
char tmp[30];
sprintf(tmp, "%d", current->pid);
return vfs_follow_link(nd,tmp);
}int vfs_follow_link(struct nameidata *nd, const char *link)
{
return __vfs_follow_link(nd, link);
}static inline int
__vfs_follow_link(struct nameidata *nd, const char *link)
{
int res = 0;
char *name;
if (IS_ERR(link))
goto fail;
if (*link == '/') {
path_release(nd);
if (!walk_init_root(link, nd))
/* weird __emul_prefix() stuff did it */
goto out;
}
res = path_walk(link, nd);
out:
if (current->link_count || res || nd->last_type!=LAST_NORM)
return res;
/*
* If it is an iterative symlinks resolution in open_namei() we
* have to copy the last component. And all that crap because of
* bloody create() on broken symlinks. Furrfu...
*/
name = __getname();
if (IS_ERR(name))
goto fail_name;
strcpy(name, nd->last.name);
nd->last.name = name;
return 0;
fail_name:
link = name;
fail:
path_release(nd);
return PTR_ERR(link);
} 在__vfs_follow_link()中会调用path_walk()来寻找连接的目标节点,所以又会调用其父节点/proc的lookup函数,即proc_root_lookup(),不同的只是这次寻找的不是"self",而是当前进程的pid字符串。
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
{
unsigned int pid, c;
struct task_struct *task;
const char *name;
struct inode *inode;
int len;
pid = 0;
name = dentry->d_name.name;
len = dentry->d_name.len;
if (len == 4 && !memcmp(name, "self", 4)) {//不执行这里,name不等于self
inode = new_inode(dir->i_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = fake_ino(0, PROC_PID_INO);
inode->u.proc_i.file = NULL;
inode->u.proc_i.task = NULL;
inode->i_mode = S_IFLNK|S_IRWXUGO;
inode->i_uid = inode->i_gid = 0;
inode->i_size = 64;
inode->i_op = &proc_self_inode_operations;
d_add(dentry, inode);
return NULL;//返回了
}
while (len-- > 0) {//执行这里
c = *name - '0';
name++;
if (c > 9)
goto out;
if (pid >= MAX_MULBY10)
goto out;
pid *= 10;
pid += c;
if (!pid)
goto out;
}
read_lock(&tasklist_lock);
task = find_task_by_pid(pid);
if (task)
get_task_struct(task);
read_unlock(&tasklist_lock);
if (!task)
goto out;
inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);
free_task_struct(task);
if (!inode)
goto out;
inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
inode->i_op = &proc_base_inode_operations;//注意这个指针,一会会用到
inode->i_fop = &proc_base_operations;
inode->i_nlink = 3;
inode->i_flags|=S_IMMUTABLE;
dentry->d_op = &pid_base_dentry_operations;
d_add(dentry, inode);
return NULL;
out:
return ERR_PTR(-ENOENT);
}
static struct inode_operations proc_base_inode_operations = {
lookup:proc_base_lookup,
};
proc_pid_make_inode,为进程创建一个inode结构static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
{
struct inode * inode;
/* We need a new inode */
inode = new_inode(sb);
if (!inode)
goto out;
/* Common stuff */
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = fake_ino(task->pid, ino);
inode->u.proc_i.file = NULL;
/*
* grab the reference to task.
*/
inode->u.proc_i.task = task;//当前进程的task赋值到这里
get_task_struct(task);
if (!task->p_pptr)
goto out_unlock;
inode->i_uid = 0;
inode->i_gid = 0;
if (ino == PROC_PID_INO || task->dumpable) {
inode->i_uid = task->euid;
inode->i_gid = task->egid;
}
out:
return inode;
out_unlock:
iput(inode);
return NULL;
}
从path_walk返回后,nd->dentry已指向代表着当前进程的目录节点的dentry结构,之后层层返回到proc_self_follow_link,最后返回到主path_walk的第二次循环中,开始执行第三次循环。
第三次循环,最后一个节点是"cwd",这一次所搜索的节点已经是路径名中的最后一个节点,所以转到last_component的地方,同样也是在real_lookup()中通过父节点的inode_operations结构中的lookup函数指针执行实际的操作,也就是proc_base_lookup,代码如下:
static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
{
struct inode *inode;
int error;
struct task_struct *task = dir->u.proc_i.task;//取出当前进程的task
struct pid_entry *p;
error = -ENOENT;
inode = NULL;
for (p = base_stuff; p->name; p++) {//在base_stuff中找到cwd,关于base_stuff如下面所示
if (p->len != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, p->name, p->len))
break;
}
if (!p->name)
goto out;
error = -EINVAL;
inode = proc_pid_make_inode(dir->i_sb, task, p->type);//p->type为5
if (!inode)
goto out;
inode->i_mode = p->mode;
/*
* Yes, it does not scale. And it should not. Don't add
* new entries into /proc// without very good reasons.
*/
switch(p->type) {
case PROC_PID_FD:
inode->i_nlink = 2;
inode->i_op = &proc_fd_inode_operations;
inode->i_fop = &proc_fd_operations;
break;
case PROC_PID_EXE:
inode->i_op = &proc_pid_link_inode_operations;
inode->u.proc_i.op.proc_get_link = proc_exe_link;
break;
case PROC_PID_CWD:
inode->i_op = &proc_pid_link_inode_operations;//两者很重要
inode->u.proc_i.op.proc_get_link = proc_cwd_link;//两者很重要,inode->u.proc_i指向了proc_inode_info,结构如下面所示
break;
case PROC_PID_ROOT:
inode->i_op = &proc_pid_link_inode_operations;
inode->u.proc_i.op.proc_get_link = proc_root_link;
break;
case PROC_PID_ENVIRON:
inode->i_fop = &proc_info_file_operations;
inode->u.proc_i.op.proc_read = proc_pid_environ;
break;
case PROC_PID_STATUS:
inode->i_fop = &proc_info_file_operations;
inode->u.proc_i.op.proc_read = proc_pid_status;
break;
case PROC_PID_STAT:
inode->i_fop = &proc_info_file_operations;
inode->u.proc_i.op.proc_read = proc_pid_stat;
break;
case PROC_PID_CMDLINE:
inode->i_fop = &proc_info_file_operations;
inode->u.proc_i.op.proc_read = proc_pid_cmdline;
break;
case PROC_PID_STATM:
inode->i_fop = &proc_info_file_operations;
inode->u.proc_i.op.proc_read = proc_pid_statm;
break;
case PROC_PID_MAPS:
inode->i_fop = &proc_maps_operations;
break;
#ifdef CONFIG_SMP
case PROC_PID_CPU:
inode->i_fop = &proc_info_file_operations;
inode->u.proc_i.op.proc_read = proc_pid_cpu;
break;
#endif
case PROC_PID_MEM:
inode->i_op = &proc_mem_inode_operations;
inode->i_fop = &proc_mem_operations;
break;
default:
printk("procfs: impossible type (%d)",p->type);
iput(inode);
return ERR_PTR(-EINVAL);
}
dentry->d_op = &pid_dentry_operations;
d_add(dentry, inode);
return NULL;
out:
return ERR_PTR(error);
}struct pid_entry {
int type;
int len;
char *name;
mode_t mode;
};
enum pid_directory_inos {
PROC_PID_INO = 2,
PROC_PID_STATUS,
PROC_PID_MEM,
PROC_PID_CWD,
PROC_PID_ROOT,
PROC_PID_EXE,
PROC_PID_FD,
PROC_PID_ENVIRON,
PROC_PID_CMDLINE,
PROC_PID_STAT,
PROC_PID_STATM,
PROC_PID_MAPS,
PROC_PID_CPU,
PROC_PID_FD_DIR = 0x8000,/* 0x8000-0xffff */
};
#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
static struct pid_entry base_stuff[] = {
E(PROC_PID_FD,"fd",S_IFDIR|S_IRUSR|S_IXUSR),
E(PROC_PID_ENVIRON,"environ",S_IFREG|S_IRUSR),
E(PROC_PID_STATUS,"status",S_IFREG|S_IRUGO),
E(PROC_PID_CMDLINE,"cmdline",S_IFREG|S_IRUGO),
E(PROC_PID_STAT,"stat",S_IFREG|S_IRUGO),
E(PROC_PID_STATM,"statm",S_IFREG|S_IRUGO),
#ifdef CONFIG_SMP
E(PROC_PID_CPU,"cpu",S_IFREG|S_IRUGO),
#endif
E(PROC_PID_MAPS,"maps",S_IFREG|S_IRUGO),
E(PROC_PID_MEM,"mem",S_IFREG|S_IRUSR|S_IWUSR),
E(PROC_PID_CWD,"cwd",S_IFLNK|S_IRWXUGO),
E(PROC_PID_ROOT,"root",S_IFLNK|S_IRWXUGO),
E(PROC_PID_EXE,"exe",S_IFLNK|S_IRWXUGO),
{0,0,NULL,0}
};
#undef Estruct proc_inode_info {
struct task_struct *task;
int type;
union {
int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
int (*proc_read)(struct task_struct *task, char *page);
} op;
struct file *file;
}; 从proc_base_lookup()经由real_lookup()返回到path_walk时,返回值dentry已经指向了这个特定"cwd"节点dentry结构。但是接着同样要受到对其Inode结构中的i_op指针以及相应inode_operations结构的指针follow_link的检验,看path_walk的代码:
inode = dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW)//和第一次和第二次循环不同,必须LOOKUP_FOLLOW标志位置1
&& inode && inode->i_op && inode->i_op->follow_link) {
err = do_follow_link(dentry, nd);static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
{
int err;
if (current->link_count >= 8)
goto loop;
current->link_count++;
UPDATE_ATIME(dentry->d_inode);
err = dentry->d_inode->i_op->follow_link(dentry, nd);
current->link_count--;
return err;
loop:
path_release(nd);
return -ELOOP;
}
dentry->d_inode->i_op指向了proc_pid_link_inode_operations结构,是在这里设置的:
case PROC_PID_CWD:
inode->i_op = &proc_pid_link_inode_operations;//两者很重要
inode->u.proc_i.op.proc_get_link = proc_cwd_link;//两者很重要,inode->u.proc_i指向了proc_inode_info结构static struct inode_operations proc_pid_link_inode_operations = {
readlink:proc_pid_readlink,
follow_link:proc_pid_follow_link
}; dentry->d_inode->i_op->follow_link(dentry, nd),proc_pid_follow_link(dentry, nd),也就是代码如下:
static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)//参数dentry为"cwd"节点dentry结构
{
struct inode *inode = dentry->d_inode;
int error = -EACCES;
/* We don't need a base pointer in the /proc filesystem */
path_release(nd);
if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
goto out;
error = proc_check_root(inode);
if (error)
goto out;
error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);//也就是proc_cwd_link
nd->last_type = LAST_BIND;
out:
return error;
} inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt),也就是proc_cwd_link(inode, &nd->dentry, &nd->mnt),代码如下:
static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
{
struct fs_struct *fs;
int result = -ENOENT;
task_lock(inode->u.proc_i.task);
fs = inode->u.proc_i.task->fs;//task指向相应进程的task_struct结构,进而可以得到这个进程的fs_struct结构
if(fs)
atomic_inc(&fs->count);
task_unlock(inode->u.proc_i.task);
if (fs) {
read_lock(&fs->lock);
*mnt = mntget(fs->pwdmnt);//nd->mnt指向了该目录所在设备安装时的vfsmount结构
*dentry = dget(fs->pwd);//nd->dentry指向了该进程的"当前工作目录"的dentry结构
read_unlock(&fs->lock);
result = 0;
put_fs_struct(fs);
}
return result;
} 当从proc_cwd_link()经由do_follow_link()返回到path_walk()中时,nameidata结构中指针已经指向最终的目标,即当前进程的当前工作目录。
也就是:
int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
{
char *tmp;
int err;
tmp = getname(name);//在系统空间分配一个页面,并从用户空间把文件名复制到这个页面
err = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
err = 0;
if (path_init(tmp, flags, nd))
err = path_walk(tmp, nd);
putname(tmp);
}
return err;
} 此时nd->mnt指向了该目录所在设备安装时的vfsmount结构,nd->dentry指向了该进程的"当前工作目录"的dentry结构。