proc源码解析（七）--查找目录项

最新推荐文章于 2022-12-30 15:52:45 发布

mcgrady_tracy

最新推荐文章于 2022-12-30 15:52:45 发布

阅读量2.5k

点赞数

分类专栏： linux驱动文章标签： struct null reference immutable security buffer

linux驱动专栏收录该内容

31 篇文章 3 订阅

订阅专栏

前边几节讲了proc的实现和管理，还有一个重要的功能没有提到，那就是proc目录项的查找。
在查找的过程中，用户程序把proc看作是普通文件系统里的文件;也就是说当查找目录项时，do_lookup会调用real_lookup函数执行与文件系统相关的查找，real_lookup则会调用proc根节点inode的 proc_root_inode_operations中的proc_root_lookup函数，其定义为：

static const struct inode_operations proc_root_inode_operations = {
.lookup = proc_root_lookup,
.getattr = proc_root_getattr,
};

proc_root_lookup函数从/proc开始查找。该函数的实现比较简单，只要由两个函数执行不同类型的查找过程，其流程图如下：

proc_lookup:查找proc中的非进程文件目录项
proc_pid_lookup:查找进程相关的文件目录项
其函数定义为：

static struct dentry *proc_root_lookup(struct inode * dir,
                        struct dentry * dentry, struct nameidata *nd)
{
    if (!proc_lookup(dir, dentry, nd)) {
        return NULL;
    }
    return proc_pid_lookup(dir, dentry, nd);

}

proc_lookup实现
proc的实现比较简单，在这里就接合代码说明，不再画流程图。

struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
   struct inode *inode = NULL;
   struct proc_dir_entry * de;
   int error = -ENOENT;

   /* 大内核锁 */
   lock_kernel();
   spin_lock(&proc_subdir_lock);
   /* 获得与inode *dir对应的proc_dir_entry */
   de = PDE(dir);
   if (de) {
       /* 遍历 de链表，查找与denty匹配的 proc_dir_entry */
       for (de = de->subdir; de ; de = de->next) {
           /* 如果名字长度不同，接着进行下一个查找 */
           if (de->namelen != dentry->d_name.len)
               continue;
           /* 如果匹配成功，进行设置 */
           if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
               unsigned int ino;
               /* ？？没弄明白shadow_proc函数的作用，知道的请告知？？ */
               if (de->shadow_proc)
                   de = de->shadow_proc(current, de);
               ino = de->low_ino;
               /* 增加de的计数atomic_inc(&de->count); */
               de_get(de);
               spin_unlock(&proc_subdir_lock);
               error = -EINVAL;
               /* 获取与de的inode */
               inode = proc_get_inode(dir->i_sb, ino, de);
               spin_lock(&proc_subdir_lock);
               break;
           }
       }
   }

   /* 释放锁资源 */
   spin_unlock(&proc_subdir_lock);
   unlock_kernel();
   if (inode) {
       /* 设置dentry的d_op */
       dentry->d_op = &proc_dentry_operations;
       /* 将dentry加到hash链表上 */
       d_add(dentry, inode);
       return NULL;
   }
   de_put(de);
   return ERR_PTR(error);

}

proc_pid_lookup实现
不论是设计之初还是现在，proc文件系统的主要任务便是输出系统进程的详细信息。
proc_pid_lookup函数的目标便是产生一个inode以便进行与PID相关的进一步操作，这是因为/proc/pid的inode包含了所有与进程相关的信息的文件。该函数针对两类进程(当前进程和其它进程)进行不同的操作，这也把函数分成截然不同的两个步骤。该函数的执行流程图如下：

函数首先判断进程名是否是self，如果是就执行当前进程的操作，从2.6.20之后，与当前进程相关的操作被集中到一个名为proc_base_lookup的函数中,关于该函数的执行后边会详细分析，现在先搞定proc_pid_lookup函数。
name_to_int：将进程号由字符串转换成整形
find_task_by_pid_ns:查找相应进程的task_struct，该函数会在进程管理一章中详细讲述
proc_pid_instantiate:该函数会调用proc_pid_make_inode创建一个inode，然后设置inode的各个域。也就是说该函数才是proc_pid_lookup的主要处理过程。
proc_pid_lookup的实现如下：

struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
{
    struct dentry *result = ERR_PTR(-ENOENT);
    struct task_struct *task;
    unsigned tgid;
    struct pid_namespace *ns;
    /* 处理当前进程,稍后详细讲解 */
    result = proc_base_lookup(dir, dentry);
    /* 出错直接返回错误信息 */
    if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
        goto out;
    /* 转换进程号到整型 */
    tgid = name_to_int(dentry);
    if (tgid == ~0U)
        goto out;
    ns = dentry->d_sb->s_fs_info;
    rcu_read_lock();
    task = find_task_by_pid_ns(tgid, ns);
    if (task)
        get_task_struct(task);
    rcu_read_unlock();
    if (!task)
        goto out;

    result = proc_pid_instantiate(dir, dentry, task, NULL);
    put_task_struct(task);
out:
    return result;
}

在proc_pid_lookup函数中最重要的函数便是 proc_pid_instantiate，该函数完成了大部分的功能，现在分析如下：

static struct dentry *proc_pid_instantiate(struct inode *dir,
                       struct dentry * dentry,
                       struct task_struct *task, const void *ptr)
{
   struct dentry *error = ERR_PTR(-ENOENT);
   struct inode *inode;

   inode = proc_pid_make_inode(dir->i_sb, task);
   if (!inode)
       goto out;

   inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
   //设置inode的inode操作函数
    inode->i_op = &proc_tgid_base_inode_operations;
   //设置inode的file操作函数
   inode->i_fop = &proc_tgid_base_operations;
   inode->i_flags|=S_IMMUTABLE;
   inode->i_nlink = 5;
#ifdef CONFIG_SECURITY
   inode->i_nlink += 1;
#endif
   //设置dentry的dentry操作函数
   dentry->d_op = &pid_dentry_operations;
   //将dentry加入到hash链表
   d_add(dentry, inode);
   /*Close the race of the process dying before we return the dentry*/
   if (pid_revalidate(dentry, NULL))
       error = NULL;
out:
   return error;
}

static struct inode *proc_pid_make_inode(struct super_block * sb,
                                                   struct task_struct *task)
{
   struct inode * inode;
   struct proc_inode *ei;

   /* We need a new inode */
   inode = new_inode(sb);
   if (!inode)
       goto out;
   /* PROC_I函数再次使用，哈哈，想必已经清楚了吧，前边已讲过*/
   ei = PROC_I(inode);
   /* 设置inode的三个时间 */
   inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
   /* 设置inode的inode操作函数 */
   inode->i_op = &proc_def_inode_operations;
   /* grab the reference to task. */
   ei->pid = get_task_pid(task, PIDTYPE_PID);

   if (!ei->pid)
       goto out_unlock;
   inode->i_uid = 0;
   inode->i_gid = 0;

   if (task_dumpable(task)) {
       inode->i_uid = task->euid;
       inode->i_gid = task->egid;
   }
   security_task_to_inode(task, inode);

out:
   return inode;

out_unlock:
   iput(inode);
   return NULL;
}

在上边几个函数中，我们只是看到了创建inode以及设置过程，也就是最多能查到进程号的子目录，在向下的查找就没有了。例如对于进程1:以上的过程只能：找到/proc/1,而CWD，environ，cmdline等文件还没有找到。
root@xuhengyang:/proc/1# ls -l
dr-xr-xr-x 2 root root 0 2009-11-21 19:06 attr
-r-------- 1 root root 0 2009-11-21 19:06 auxv
-r--r--r-- 1 root root 0 2009-11-21 19:06 cgroup
--w------- 1 root root 0 2009-11-21 19:06 clear_refs
-r--r--r-- 1 root root 0 2009-11-21 19:06 cmdline
-rw-r--r-- 1 root root 0 2009-11-21 19:06 coredump_filter
lrwxrwxrwx 1 root root 0 2009-11-21 19:06 cwd -> /
。。。

   其实在设置inode的inode操作函数中我们前边没有提到，现在闲来看看inode的操作函数集，也就是代码中的红色部分inode->i_op = &proc_tgid_base_inode_operations ;它的定义如下：
static const struct inode_operations proc_tgid_base_inode_operations = {
    .lookup    = proc_tgid_base_lookup,
    .getattr    = pid_getattr,
    .setattr    = proc_setattr,
};
proc_tgid_base_lookup函数才是真正的查找函数。当需要查找进程目录里的文件时，该函数会接着以上的过程进行查找。该函数的程序流程如下图：

proc_tgid_base_lookup函数的实现很简单，但需要注意它传递的参数

static struct dentry *proc_tgid_base_lookup(struct inode *dir,
                        struct dentry *dentry, struct nameidata *nd)
{
    return proc_pident_lookup(dir, dentry,
                tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
}
struct pid_entry {
    char *name;
    int len;
    mode_t mode;
    const struct inode_operations *iop;
    const struct file_operations *fop;
    union proc_op op;};
static const struct pid_entry tgid_base_stuff[] = {
    DIR("task",       S_IRUGO|S_IXUGO, task),
    DIR("fd",         S_IRUSR|S_IXUSR, fd),
    DIR("fdinfo",     S_IRUSR|S_IXUSR, fdinfo),
    REG("environ",    S_IRUSR, environ),
    INF("auxv",       S_IRUSR, pid_auxv),
    INF("status",     S_IRUGO, pid_status),
    INF("limits",    S_IRUSR, pid_limits),
#ifdef CONFIG_SCHED_DEBUG
    REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
#endif
    INF("cmdline",    S_IRUGO, pid_cmdline),
    ...

};

看看这个数组中的task，fd，cmdline等字符串，是不是很熟悉，不错，正是/proc/1目录下的文件名字。DIR，REG，INF等是一些宏，具体实现请参考源代码。

这个函数的其实就是一个包装函数，真实干活的是 proc_pident_lookup，所以我们来看看它的实现，阅读它的代码时要时刻接合 proc_tgid_base_lookup传给它的参数

static struct dentry *proc_pident_lookup(struct inode *dir,
                    struct dentry *dentry,
                    const struct pid_entry *ents,
                    unsigned int nents)
{
    struct inode *inode;
    struct dentry *error;
    struct task_struct *task = get_proc_task(dir);
    const struct pid_entry *p, *last;

    error = ERR_PTR(-ENOENT);
    inode = NULL;

    if (!task)
        goto out_no_task;

    /*
    * Yes, it does not scale. And it should not. Don't add
    * new entries into /proc/<tgid>/ without very good reasons.
    */
    last = &ents[nents - 1];
    for (p = ents; p <= last; p++) {
        if (p->len != dentry->d_name.len)
            continue;
        if (!memcmp(dentry->d_name.name, p->name, p->len))
            break;
    }
    if (p > last)
        goto out;

    error = proc_pident_instantiate(dir, dentry, task, p);
out:
    put_task_struct(task);
out_no_task:
    return error;
}

这个函数的主要实现又是调用 proc_pident_instantiate函数

static struct dentry *proc_pident_instantiate(struct inode *dir,
    struct dentry *dentry, struct task_struct *task, const void *ptr)
{
    const struct pid_entry *p = ptr;
    struct inode *inode;
    struct proc_inode *ei;
    struct dentry *error = ERR_PTR(-EINVAL);

    inode = proc_pid_make_inode(dir->i_sb, task);
    if (!inode)
        goto out;

    ei = PROC_I(inode);
    inode->i_mode = p->mode;
    if (S_ISDIR(inode->i_mode))
        inode->i_nlink = 2;    /* Use getattr to fix if necessary */
    if (p->iop)
        inode->i_op = p->iop;
    if (p->fop)
        inode->i_fop = p->fop;
    ei->op = p->op;
    dentry->d_op = &pid_dentry_operations;
    d_add(dentry, inode);
    /*Close the race of the process dying before we return the dentry*/
    if (pid_revalidate(dentry, NULL))
        error = NULL;
out:
    return error;
}

这两个函数与前边的proc_lookup函数和 proc_pid_instantiaten函数实现类似，这里就不再注释，和前边对比着来看。
到此处，非当前进程的查找终于结束了，但是还有当前进程的查找，由于它的实现和前边有所不同，所以要单独列出来来看，不费话，来箩^_^.

proc_base_lookup实现
该函数的实现在框架上和前边调用该函数的proc_pid_lookup的其它部分类似，它的特别之处就在于self本身是个软链接，它指向当前进程的目录，所以在分析该函数时，我们要特别注意这个地方。我会用红色标出需要注意的地方。

static struct dentry *proc_base_lookup(struct inode *dir,
                                                struct dentry *dentry)
{
    struct dentry *error;
    struct task_struct *task = get_proc_task(dir);
    const struct pid_entry *p, *last;

    error = ERR_PTR(-ENOENT);

    if (!task)
        goto out_no_task;

    /* Lookup the directory entry */
    last = &proc_base_stuff [ARRAY_SIZE(proc_base_stuff) - 1];
    for (p = proc_base_stuff; p <= last; p++) {
        if (p->len != dentry->d_name.len)
            continue;
        if (!memcmp(dentry->d_name.name, p->name, p->len))
            break;
    }
    if (p > last)
        goto out;

    error = proc_base_instantiate(dir, dentry, task, p);

out:
    put_task_struct(task);
out_no_task:
    return error;

}

函数中的实现是老一套，先来看看红色部分是什么玩意儿吧：

/*proc base
* These are the directory entries in the root directory of /proc
* that properly belong to the /proc filesystem, as they describe
* describe something that is process related.
*/
static const struct pid_entry proc_base_stuff[] = {
NOD("self", S_IFLNK|S_IRWXUGO,
&proc_self_inode_operations, NULL, {}),
};

S_IFLNK说明self是个链接。
该函数和前边过程的不同之处就在于它的操作函数 proc_self_inode_operations

static const struct inode_operations proc_self_inode_operations = {
    .readlink    = proc_self_readlink,
    .follow_link    = proc_self_follow_link,
};

static int proc_self_readlink(struct dentry *dentry,
                            char __user *buffer,    int buflen)
{
    char tmp[PROC_NUMBUF];
    /* task_tgid_vnr(current)返回current进程的pid */
    sprintf(tmp, "%d", task_tgid_vnr(current));
    return vfs_readlink(dentry,buffer,buflen,tmp);
}

static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
    char tmp[PROC_NUMBUF];
    /* task_tgid_vnr(current)返回current进程的pid */
    sprintf(tmp, "%d", task_tgid_vnr(current));
    /* 实际调用__vfs_follow_link(nd, tmp)搜索current进程，该函数的实现在
        VFS文件系统中会详细解释。这也是一个重要的函数，用来进行软链接的搜索 */
    return ERR_PTR(vfs_follow_link(nd,tmp));
}

最后忍了忍还是把 proc_base_instantiate函数的实现贴出来吧，但是不再解释了，和前边没多大区别。

static struct dentry *proc_base_instantiate(struct inode *dir,
    struct dentry *dentry, struct task_struct *task, const void *ptr)
{
    const struct pid_entry *p = ptr;
    struct inode *inode;
    struct proc_inode *ei;
    struct dentry *error = ERR_PTR(-EINVAL);

    /* Allocate the inode */
    error = ERR_PTR(-ENOMEM);
    inode = new_inode(dir->i_sb);
    if (!inode)
        goto out;

    /* Initialize the inode */
    ei = PROC_I(inode);
    inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;

    /*
    * grab the reference to the task.
    */
    ei->pid = get_task_pid(task, PIDTYPE_PID);
    if (!ei->pid)
        goto out_iput;

    inode->i_uid = 0;
    inode->i_gid = 0;
    inode->i_mode = p->mode;
    if (S_ISDIR(inode->i_mode))
        inode->i_nlink = 2;
    if (S_ISLNK(inode->i_mode))
        inode->i_size = 64;
    if (p->iop)
        inode->i_op = p->iop;
    if (p->fop)
        inode->i_fop = p->fop;
    ei->op = p->op;
    dentry->d_op = &proc_base_dentry_operations;
    d_add(dentry, inode);
    error = NULL;
out:
    return error;
out_iput:
    iput(inode);
    goto out;

}

到此为止，proc的解析已全部结束，本来还有/proc/sys的实现，但是它和proc的实现没有多大关联，最关键的是我自己也没太弄明白。而且最近时间有点紧，以后补上吧。

mcgrady_tracy

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
proc源码解析（七）--查找目录项

前边几节讲了proc的实现和管理，还有一个重要的功能没有提到，那就是proc目录项的查找。在查找的过程中，用户程序把proc看作是普通文件系统里的文件;也就是说当查找目录项时，do_lookup会调用real_lookup函数执行与文件系统相关的查找，real_lookup则会调用proc根节点inode的 proc_root_inode_operations中的proc_root_l
复制链接

扫一扫

专栏目录