设备节点创建过程源代码分析

最新推荐文章于 2021-05-09 07:04:56 发布

gjq_1988

最新推荐文章于 2021-05-09 07:04:56 发布

阅读量2.3k

点赞数 1

分类专栏： linux技术文章标签：文件系统 linux kernel

本文链接：https://blog.csdn.net/gjq_1988/article/details/41483737

版权

linux技术专栏收录该内容

30 篇文章 1 订阅

订阅专栏

阅读本文需要linux文件系统基础知识。以下引用的kernel源代码，都是基于linux kernel源代码版本：3.4。

以下分析基于下面的假设：

根设备使用SD卡设备（块设备），根文件系统使用ext4文件系统。

linux kernel在初始化的最后阶段，会加载“根文件系统”，按照前面的假设，也就是加载一个ext4文件系统作为根文件系统，这个文件系统位于SD卡上。

在加载这个根文件系统前，kernel会先加载一个虚拟的根文件系统，名叫rootfs文件系统，这个rootfs文件系统才是kernel真正意义上mount的第一个文件系统。它是基于内存的文件系统（并没有对应一个非易失性存储设备，而像ext， fat这些文件系统都会对应一个外部存储设备）。

这个rootfs 文件系统会mount到自己的根目录 “/” 上，mount完毕后会将自己的根目录设为进程的根目录。现在假设这个rootfs文件系统已经mount好了（暂不分析rootfs文件系统的mount过程）。

kernel在安装根文件系统前会先安装根设备节点：/dev/root。ext4根文件系统必须存在于一个根设备上，比如硬盘，SD/MMC卡。根设备节点用来描述指定的根设备。

kernel在kernel_init函数中会调用prepare_namespace函数(init/do_mounts.c)，prepare_namespace函数先从启动参数中获得根设备名称，保存在root_device_name中，这里名称就是/dev/mmcblk0p1。

调用mount_root挂载根文件系统。在mount_root函数中就会创建设备节点了。

void __init prepare_namespace(void)
{
	int is_floppy;

	if (root_delay) {
		printk(KERN_INFO "Waiting %dsec before mounting root device...\n",
		       root_delay);
		ssleep(root_delay);
	}

	/*
	 * wait for the known devices to complete their probing
	 *
	 * Note: this is a potential source of long boot delays.
	 * For example, it is not atypical to wait 5 seconds here
	 * for the touchpad of a laptop to initialize.
	 */
	wait_for_device_probe();

	md_run_setup();

	if (saved_root_name[0]) {
		root_device_name = saved_root_name;
		if (!strncmp(root_device_name, "mtd", 3) ||
		    !strncmp(root_device_name, "ubi", 3)) {
			mount_block_root(root_device_name, root_mountflags);
			goto out;
		}
        printk(KERN_INFO "-----ROOT_DEV_NAME----: %s\n", root_device_name);
		ROOT_DEV = name_to_dev_t(root_device_name);                    //拿到设备名称后调用name_to_dev_t转换为设备号；
		if (strncmp(root_device_name, "/dev/", 5) == 0)
			root_device_name += 5;                                 //跳过 ‘/dev/’；
	}

	if (initrd_load())
		goto out;

	/* wait for any asynchronous scanning to complete */
	if ((ROOT_DEV == 0) && root_wait) {
		printk(KERN_INFO "Waiting for root device %s...\n",
			saved_root_name);
		while (driver_probe_done() != 0 ||
			(ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
			msleep(100);
		async_synchronize_full();
	}

	is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;

	if (is_floppy && rd_doload && rd_load_disk(0))
		ROOT_DEV = Root_RAM0;

	mount_root();                       //安装根文件系统；
out:
	devtmpfs_mount("dev");
	sys_mount(".", "/", NULL, MS_MOVE, NULL);
	sys_chroot((const char __user __force *)".");
}

mount_root(): (init/do_mounts.c)

void __init mount_root(void)
{
#ifdef CONFIG_ROOT_NFS
	if (ROOT_DEV == Root_NFS) {
		if (mount_nfs_root())
			return;

		printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
		ROOT_DEV = Root_FD0;
	}
#endif
#ifdef CONFIG_BLK_DEV_FD
	if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
		/* rd_doload is 2 for a dual initrd/ramload setup */
		if (rd_doload==2) {
			if (rd_load_disk(1)) {
				ROOT_DEV = Root_RAM1;
				root_device_name = NULL;
			}
		} else
			change_floppy("root floppy");
	}
#endif
#ifdef CONFIG_BLOCK
    
	create_dev("/dev/root", ROOT_DEV);            //创建根设备节点；
 
	mount_block_root("/dev/root", root_mountflags);
#endif
}

这个函数在create_dev函数中创建根设备节点。create_dev函数实际调用了mknod系统调用对应的内核函数sys_mknod。

参数为： name = "/dev/root"， mode = S_IFBLK|0600，dev = xxx；

static inline int create_dev(char *name, dev_t dev)
{
	sys_unlink(name);
	return sys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
}

SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
{
	return sys_mknodat(AT_FDCWD, filename, mode, dev);
}

最后实际是调用sys_mknodat函数(fs/namei.c)，SYSCALL_DEFINE4是函数定义的宏，展开后就变成sys_mknodat函数。

sys_mknodat函数调用user_path_create得到要创建节点的path，节点父目录的信息保存在path中，然后调用vfs_mknod创建节点。

SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
		unsigned, dev)
{
	struct dentry *dentry;
	struct path path;
	int error;

	if (S_ISDIR(mode))
		return -EPERM;

	dentry = user_path_create(dfd, filename, &path, 0);
	if (IS_ERR(dentry))
		return PTR_ERR(dentry);

	if (!IS_POSIXACL(path.dentry->d_inode))
		mode &= ~current_umask();
	error = may_mknod(mode);
	if (error)
		goto out_dput;
	error = mnt_want_write(path.mnt);
	if (error)
		goto out_dput;
	error = security_path_mknod(&path, dentry, mode, dev);
	if (error)
		goto out_drop_write;
	switch (mode & S_IFMT) {
		case 0: case S_IFREG:
			error = vfs_create(path.dentry->d_inode,dentry,mode,NULL);
			break;
		case S_IFCHR: case S_IFBLK:
			error = vfs_mknod(path.dentry->d_inode,dentry,mode,
					new_decode_dev(dev));
			break;
		case S_IFIFO: case S_IFSOCK:
			error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
			break;
	}
out_drop_write:
	mnt_drop_write(path.mnt);
out_dput:
	dput(dentry);
	mutex_unlock(&path.dentry->d_inode->i_mutex);
	path_put(&path);

	return error;
}

user_path_create: (fs/namei.c)

简单的调用了kern_path_create函数后返回。此时参数分别为dfd = AT_FDCWD，tmp = "/dev/root"，path为输出参数，is_dir = 0；

struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
{
	char *tmp = getname(pathname);
	struct dentry *res;
	if (IS_ERR(tmp))
		return ERR_CAST(tmp);
	res = kern_path_create(dfd, tmp, path, is_dir);
	putname(tmp);
	return res;
}

kern_path_create函数先调用do_path_lookup搜索路径，搜索方式是搜索最后一个目录项的父目录。然后再调用lookup_hash搜索目录最后一项，也就是root项。

struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir)
{
	struct dentry *dentry = ERR_PTR(-EEXIST);
	struct nameidata nd;
	int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
	if (error)
		return ERR_PTR(error);

	/*
	 * Yucky last component or no last component at all?
	 * (foo/., foo/.., /)
	 */
	if (nd.last_type != LAST_NORM)
		goto out;
	nd.flags &= ~LOOKUP_PARENT;
	nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
	nd.intent.open.flags = O_EXCL;

	/*
	 * Do the final lookup.
	 */
	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
	dentry = lookup_hash(&nd);
	if (IS_ERR(dentry))
		goto fail;

	if (dentry->d_inode)
		goto eexist;
	/*
	 * Special case - lookup gave negative, but... we had foo/bar/
	 * From the vfs_mknod() POV we just have a negative dentry -
	 * all is fine. Let's be bastards - you had / on the end, you've
	 * been asking for (non-existent) directory. -ENOENT for you.
	 */
	if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
		dput(dentry);
		dentry = ERR_PTR(-ENOENT);
		goto fail;
	}
	*path = nd.path;
	return dentry;
eexist:
	dput(dentry);
	dentry = ERR_PTR(-EEXIST);
fail:
	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
out:
	path_put(&nd.path);
	return dentry;
}

do_path_lookup函数如下(fs/namei.c)。

调用path_lookupat进行搜索。加上LOOKUP_RCU参数。vfs文件系统路径搜索主要有两种mode，rcu-walk和 ref-walk。这两种方式在源代码目录 Documentation/filesystems/path-lookup.txt 的文档中有详细说明。

static int do_path_lookup(int dfd, const char *name,
				unsigned int flags, struct nameidata *nd)
{
	int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
	if (unlikely(retval == -ECHILD))
		retval = path_lookupat(dfd, name, flags, nd);
	if (unlikely(retval == -ESTALE))
		retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);

	if (likely(!retval)) {
		if (unlikely(!audit_dummy_context())) {
			if (nd->path.dentry && nd->inode)
				audit_inode(name, nd->path.dentry);
		}
	}
	return retval;
}

path_lookupat函数先初始化路径，可以看作将设置路径的根（搜索的起点）。VFS中目录项是用dentry和inode描述的，这里可以看作设置根路径对应的dentry。

static int path_lookupat(int dfd, const char *name,
				unsigned int flags, struct nameidata *nd)
{
	struct file *base = NULL;
	struct path path;
	int err;

	/*
	 * Path walking is largely split up into 2 different synchronisation
	 * schemes, rcu-walk and ref-walk (explained in
	 * Documentation/filesystems/path-lookup.txt). These share much of the
	 * path walk code, but some things particularly setup, cleanup, and
	 * following mounts are sufficiently divergent that functions are
	 * duplicated. Typically there is a function foo(), and its RCU
	 * analogue, foo_rcu().
	 *
	 * -ECHILD is the error number of choice (just to avoid clashes) that
	 * is returned if some aspect of an rcu-walk fails. Such an error must
	 * be handled by restarting a traditional ref-walk (which will always
	 * be able to complete).
	 */
	err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);      //找到搜索的起点，保存在nd中；

	if (unlikely(err))
		return err;

	current->total_link_count = 0;
	err = link_path_walk(name, nd);                                   //从起点开始路径的搜索，其中nd用来返回搜索结果；

	if (!err && !(flags & LOOKUP_PARENT)) {
		err = lookup_last(nd, &path);
		while (err > 0) {
			void *cookie;
			struct path link = path;
			nd->flags |= LOOKUP_PARENT;
			err = follow_link(&link, nd, &cookie);
			if (!err)
				err = lookup_last(nd, &path);
			put_link(nd, &link, cookie);
		}
	}

	if (!err)
		err = complete_walk(nd);

	if (!err && nd->flags & LOOKUP_DIRECTORY) {
		if (!nd->inode->i_op->lookup) {
			path_put(&nd->path);
			err = -ENOTDIR;
		}
	}

	if (base)
		fput(base);

	if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
		path_put(&nd->root);
		nd->root.mnt = NULL;
	}
	return err;
}

主要调用set_root_rcu设置根路径，将根路径保存在nd->root。

static int path_init(int dfd, const char *name, unsigned int flags,
		     struct nameidata *nd, struct file **fp)
{
	int retval = 0;
	int fput_needed;
	struct file *file;

	nd->last_type = LAST_ROOT; /* if there are only slashes... */
	nd->flags = flags | LOOKUP_JUMPED;
	nd->depth = 0;
	if (flags & LOOKUP_ROOT) {
		struct inode *inode = nd->root.dentry->d_inode;
		if (*name) {
			if (!inode->i_op->lookup)
				return -ENOTDIR;
			retval = inode_permission(inode, MAY_EXEC);
			if (retval)
				return retval;
		}
		nd->path = nd->root;
		nd->inode = inode;
		if (flags & LOOKUP_RCU) {
			br_read_lock(vfsmount_lock);
			rcu_read_lock();
			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
		} else {
			path_get(&nd->path);
		}
		return 0;
	}

	nd->root.mnt = NULL;

	if (*name=='/') {
		if (flags & LOOKUP_RCU) {
			br_read_lock(vfsmount_lock);
			rcu_read_lock();                     
			set_root_rcu(nd);                       //设置根路径
		} else {
			set_root(nd);
			path_get(&nd->root);
		}
		nd->path = nd->root;                            //搜索的初始路径设为根路径
	} else if (dfd == AT_FDCWD) {
		if (flags & LOOKUP_RCU) {
			struct fs_struct *fs = current->fs;
			unsigned seq;

			br_read_lock(vfsmount_lock);
			rcu_read_lock();

			do {
				seq = read_seqcount_begin(&fs->seq);
				nd->path = fs->pwd;
				nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
			} while (read_seqcount_retry(&fs->seq, seq));
		} else {
			get_fs_pwd(current->fs, &nd->path);
		}
	} else {
		struct dentry *dentry;

		file = fget_raw_light(dfd, &fput_needed);
		retval = -EBADF;
		if (!file)
			goto out_fail;

		dentry = file->f_path.dentry;

		if (*name) {
			retval = -ENOTDIR;
			if (!S_ISDIR(dentry->d_inode->i_mode))
				goto fput_fail;

			retval = inode_permission(dentry->d_inode, MAY_EXEC);
			if (retval)
				goto fput_fail;
		}

		nd->path = file->f_path;
		if (flags & LOOKUP_RCU) {
			if (fput_needed)
				*fp = file;
			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
			br_read_lock(vfsmount_lock);
			rcu_read_lock();
		} else {
			path_get(&file->f_path);
			fput_light(file, fput_needed);
		}
	}

	nd->inode = nd->path.dentry->d_inode;             //设置根目录的inode
	return 0;

fput_fail:
	fput_light(file, fput_needed);
out_fail:
	return retval;
}

用当前进程的根路径设置nd->root。

static __always_inline void set_root_rcu(struct nameidata *nd)
{
	if (!nd->root.mnt) {
		struct fs_struct *fs = current->fs;
		unsigned seq;

		do {
			seq = read_seqcount_begin(&fs->seq);
			nd->root = fs->root;                                         //设置nd的root为当前进程fs的root；
			nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
		} while (read_seqcount_retry(&fs->seq, seq));
	}
}

path_init返回后，调用link_path_walk正式开始搜索，搜索的结果会保存在struct nameidata结构体nd中。

struct nameidata {
	struct path	path;
	struct qstr	last;                                         //保存最后一个目录项
	struct path	root;
	struct inode	*inode; /* path.dentry.d_inode */
	unsigned int	flags;
	unsigned	seq;
	int		last_type;
	unsigned	depth;
	char *saved_names[MAX_NESTED_LINKS + 1];

	/* Intent data */
	union {
		struct open_intent open;
	} intent;
};

如果路径名以‘/’开头，就把它跳过去，因为在这种情况下nd->path已经指向本进程的根目录了。如果路径名中仅仅包含‘/’字符的话，那么其搜索目标就是根目录，所以任务完成。

调用hash_name，hash_name函数计算第一个目录项的hash值并返回目录项长度，对于本例中的/dev/root，第一个目录项是dev，长度为3。用name和len构造struct qstr结构体。若发现是最后一个目录项，就将struct qstr赋给nd->last，然后直接返回，若不是最后一项，就调用walk_component搜索这个目录项。

static int link_path_walk(const char *name, struct nameidata *nd)
{
	struct path next;
	int err;
	
	while (*name=='/')
		name++;
	if (!*name)
		return 0;

	/* At this point we know we have a real path component. */
	for(;;) {
		struct qstr this;
		long len;
		int type;

		err = may_lookup(nd);
 		if (err)
			break;

		len = hash_name(name, &this.hash);
		this.name = name;
		this.len = len;

		type = LAST_NORM;
		if (name[0] == '.') switch (len) {
			case 2:
				if (name[1] == '.') {
					type = LAST_DOTDOT;
					nd->flags |= LOOKUP_JUMPED;
				}
				break;
			case 1:
				type = LAST_DOT;
		}
		if (likely(type == LAST_NORM)) {
			struct dentry *parent = nd->path.dentry;
			nd->flags &= ~LOOKUP_JUMPED;
			if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
				err = parent->d_op->d_hash(parent, nd->inode,
							   &this);
				if (err < 0)
					break;
			}
		}

		if (!name[len])                                                   //若是最后一项，就直接返回；
			goto last_component;
		/*
		 * If it wasn't NUL, we know it was '/'. Skip that
		 * slash, and continue until no more slashes.
		 */
		do {
			len++;
		} while (unlikely(name[len] == '/'));
		if (!name[len])
			goto last_component;

                name += len;                        //运行到这里，表示当前节点为中间节点；                        

		err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);      //搜索目录项；
		if (err < 0)
			return err;

		if (err) {
			err = nested_symlink(&next, nd);
			if (err)
				return err;
		}
		if (can_lookup(nd->inode))
			continue;
		err = -ENOTDIR; 
		break;
		/* here ends the main loop */

last_component:
		nd->last = this;
		nd->last_type = type;
		return 0;
	}
	terminate_walk(nd);
	return err;
}

权限检查。

static inline int may_lookup(struct nameidata *nd)
{
	if (nd->flags & LOOKUP_RCU) {
		int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
		if (err != -ECHILD)
			return err;
		if (unlazy_walk(nd, NULL))
			return -ECHILD;
	}
	return inode_permission(nd->inode, MAY_EXEC);
}

hash_name返回第一个目录项的长度。

static inline unsigned long hash_name(const char *name, unsigned int *hashp)
{
	unsigned long hash = init_name_hash();               //    #define init_name_hash()        0；
	unsigned long len = 0, c;

	c = (unsigned char)*name;
	do {
		len++;
		hash = partial_name_hash(c, hash);           // 计算并返回哈希值；
		c = (unsigned char)name[len];
	} while (c && c != '/');
	*hashp = end_name_hash(hash);                        // return hash；
	return len;
}

计算hash值；

static inline unsigned long
partial_name_hash(unsigned long c, unsigned long prevhash)
{
	return (prevhash + (c << 4) + (c >> 4)) * 11;
}

walk_component: (fs/namei.c)

调用do_lookup函数搜索目录项的inode。如果找不到inode节点，则终止搜索。如果这个inode节点被mount上另一个文件系统，更新搜索路径。

static inline int walk_component(struct nameidata *nd, struct path *path,
		struct qstr *name, int type, int follow)
{
	struct inode *inode;
	int err;
	/*
	 * "." and ".." are special - ".." especially so because it has
	 * to be able to know about the current root directory and
	 * parent relationships.
	 */
	if (unlikely(type != LAST_NORM))
		return handle_dots(nd, type);
	err = do_lookup(nd, name, path, &inode);
	if (unlikely(err)) {
		terminate_walk(nd);
		return err;
	}
	if (!inode) {
		path_to_nameidata(path, nd);
		terminate_walk(nd);
		return -ENOENT;
	}
	if (should_follow_link(inode, follow)) {
		if (nd->flags & LOOKUP_RCU) {
			if (unlikely(unlazy_walk(nd, path->dentry))) {
				terminate_walk(nd);
				return -ECHILD;
			}
		}
		BUG_ON(inode != path->dentry->d_inode);
		return 1;
	}
	path_to_nameidata(path, nd);                                              //将path的内容转化到nd中；
	nd->inode = inode;
	return 0;
}

do_lookup: (fs/namei.c) 做实际的路径搜索工作。

do_lookup先获取要搜索目录项的父目录parent，根据LOOPUP_RCU标志，调用__d_lookup_rcu或者__d_lookup。假设没有LOOKUP_RCU标志，则进入__d_lookup函数，若找到则__d_lookup返回找到的dentry，用找到的dentry给path->dentry赋值，将dentry对应的dentry->d_inode赋给输出参数inode，inode有可能为NULL。

static int do_lookup(struct nameidata *nd, struct qstr *name,
			struct path *path, struct inode **inode)
{
	struct vfsmount *mnt = nd->path.mnt;
	struct dentry *dentry, *parent = nd->path.dentry;
	int need_reval = 1;
	int status = 1;
	int err;

	/*
	 * Rename seqlock is not required here because in the off chance
	 * of a false negative due to a concurrent rename, we're going to
	 * do the non-racy lookup, below.
	 */
	if (nd->flags & LOOKUP_RCU) {
		unsigned seq;
		*inode = nd->inode;
		dentry = __d_lookup_rcu(parent, name, &seq, inode);
		if (!dentry)
			goto unlazy;

		/* Memory barrier in read_seqcount_begin of child is enough */
		if (__read_seqcount_retry(&parent->d_seq, nd->seq))
			return -ECHILD;
		nd->seq = seq;

		if (unlikely(d_need_lookup(dentry)))
			goto unlazy;
		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
			status = d_revalidate(dentry, nd);
			if (unlikely(status <= 0)) {
				if (status != -ECHILD)
					need_reval = 0;
				goto unlazy;
			}
		}
		path->mnt = mnt;
		path->dentry = dentry;
		if (unlikely(!__follow_mount_rcu(nd, path, inode)))
			goto unlazy;
		if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
			goto unlazy;
		return 0;
unlazy:
		if (unlazy_walk(nd, dentry))
			return -ECHILD;
	} else {
		dentry = __d_lookup(parent, name);
	}

	if (unlikely(!dentry))
		goto need_lookup;

	if (unlikely(d_need_lookup(dentry))) {
		dput(dentry);
		goto need_lookup;
	}

	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
		status = d_revalidate(dentry, nd);
	if (unlikely(status <= 0)) {
		if (status < 0) {
			dput(dentry);
			return status;
		}
		if (!d_invalidate(dentry)) {
			dput(dentry);
			goto need_lookup;
		}
	}
done:
	path->mnt = mnt;
	path->dentry = dentry;
	err = follow_managed(path, nd->flags);
	if (unlikely(err < 0)) {
		path_put_conditional(path, nd);
		return err;
	}
	if (err)
		nd->flags |= LOOKUP_JUMPED;
	*inode = path->dentry->d_inode;
	return 0;

need_lookup:
	BUG_ON(nd->inode != parent->d_inode);

	mutex_lock(&parent->d_inode->i_mutex);
	dentry = __lookup_hash(name, parent, nd);
	mutex_unlock(&parent->d_inode->i_mutex);
	if (IS_ERR(dentry))
		return PTR_ERR(dentry);
	goto done;
}

__d_lookup: (fs/dcache.c)

在内存中寻找该节点已经建立的dentry结构，内核中有个hash表dentry_hashtable，是一个struct hlist_bl_head结构的指针数组，每一个struct hlist_bl_head结构包含有一个队列头指针struct hlist_bl_node*。

一旦在内存中建立起一个目录节点的dentry结构，就根据其节点名的hash值和父目录的dentry值，调用d_hash获得dentry_hashtable中的一个队列头指针，然后将dentry成员变量dentry->d_hash挂入dentry_hashtable表中的这个队列，需要寻找时会根据hash值查找dentry_hashtable表。

将dentry->d_hash挂入dentry_hashtable的过程在d_rehash函数中完成，后面分析。

这里__d_lookup先调用d_hash从dentry_hashtable表中获得这个队列。遍历这个队列，看看队列中有没有目录项的hash值和要搜索的目录项的hash值相同的，如果有，再比较parent是否是同一个parent，最后调用dentry_cmp进一步比较是否是同一个目录项，如果是，表示找到了，就增加引用计数并将其返回。如果没有找到，返回NULL。

struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
{
	unsigned int len = name->len;
	unsigned int hash = name->hash;
	const unsigned char *str = name->name;
	struct hlist_bl_head *b = d_hash(parent, hash);
	struct hlist_bl_node *node;
	struct dentry *found = NULL;
	struct dentry *dentry;

	/*
	 * Note: There is significant duplication with __d_lookup_rcu which is
	 * required to prevent single threaded performance regressions
	 * especially on architectures where smp_rmb (in seqcounts) are costly.
	 * Keep the two functions in sync.
	 */

	/*
	 * The hash list is protected using RCU.
	 *
	 * Take d_lock when comparing a candidate dentry, to avoid races
	 * with d_move().
	 *
	 * It is possible that concurrent renames can mess up our list
	 * walk here and result in missing our dentry, resulting in the
	 * false-negative result. d_lookup() protects against concurrent
	 * renames using rename_lock seqlock.
	 *
	 * See Documentation/filesystems/path-lookup.txt for more details.
	 */
	rcu_read_lock();
	
	hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {                           //遍历散列
		const char *tname;
		int tlen;

		if (dentry->d_name.hash != hash)
			continue;

		spin_lock(&dentry->d_lock);
		if (dentry->d_parent != parent)
			goto next;
		if (d_unhashed(dentry))
			goto next;

		/*
		 * It is safe to compare names since d_move() cannot
		 * change the qstr (protected by d_lock).
		 */
		tlen = dentry->d_name.len;
		tname = dentry->d_name.name;
		if (parent->d_flags & DCACHE_OP_COMPARE) {
			if (parent->d_op->d_compare(parent, parent->d_inode,
						dentry, dentry->d_inode,
						tlen, tname, name))
				goto next;
		} else {
			if (dentry_cmp(tname, tlen, str, len))
				goto next;
		}

		dentry->d_count++;
		found = dentry;
		spin_unlock(&dentry->d_lock);
		break;
next:
		spin_unlock(&dentry->d_lock);
 	}
 	rcu_read_unlock();

 	return found;
}

d_hash为散列函数，它将parent与hash值相结合，映射到dentry_hashtable表中，返回相应的散列链。

static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
					unsigned int hash)
{
	hash += (unsigned long) parent / L1_CACHE_BYTES;
	hash = hash + (hash >> D_HASHBITS);
	return dentry_hashtable + (hash & D_HASHMASK);
}

__d_lookup然后遍历刚刚得到的队列上的每个元素。hlist_bl_for_each_entry_rcu实现遍历。

#define hlist_bl_for_each_entry_rcu(tpos, pos, head, member)		\
	for (pos = hlist_bl_first_rcu(head);				\
		pos &&							\
		({ tpos = hlist_bl_entry(pos, typeof(*tpos), member); 1; }); \
		pos = rcu_dereference_raw(pos->next))

static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
{
	return (struct hlist_bl_node *)
		((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK);
}

struct hlist_bl_head {
	struct hlist_bl_node *first;
};

struct hlist_bl_node {
	struct hlist_bl_node *next, **pprev;
};

container_of返回包含d_hash的struct dentry结构体；container_of是linux内核常用的宏，能根据结构体内部成员的指针返回包含这个成员的结构体的指针。

#define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member)

执行到这里，就层层返回到do_lookup，用找到的dentry给path赋值，接着调用follow_managed函数 (fs/namei.c)。

follow_managed处理一些有特殊用途的目录项。比如标记为mount点的目录项，如果目录项是一个mount点，那就切换到另一个文件系统，将path->dentry重新设为新文件系统的根。

最后，do_lookup函数将找到的dentry->d_inode赋值给输出参数inode，返回。

static int follow_managed(struct path *path, unsigned flags)
{
	struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
	unsigned managed;
	bool need_mntput = false;
	int ret = 0;

	/* Given that we're not holding a lock here, we retain the value in a
	 * local variable for each dentry as we look at it so that we don't see
	 * the components of that value change under us */
	while (managed = ACCESS_ONCE(path->dentry->d_flags),
	       managed &= DCACHE_MANAGED_DENTRY,
	       unlikely(managed != 0)) {
		/* Allow the filesystem to manage the transit without i_mutex
		 * being held. */
		if (managed & DCACHE_MANAGE_TRANSIT) {
			BUG_ON(!path->dentry->d_op);
			BUG_ON(!path->dentry->d_op->d_manage);
			ret = path->dentry->d_op->d_manage(path->dentry, false);
			if (ret < 0)
				break;
		}

		/* Transit to a mounted filesystem. */
		if (managed & DCACHE_MOUNTED) {
			struct vfsmount *mounted = lookup_mnt(path);
			if (mounted) {
				dput(path->dentry);
				if (need_mntput)
					mntput(path->mnt);
				path->mnt = mounted;
				path->dentry = dget(mounted->mnt_root);
				need_mntput = true;
				continue;
			}

			/* Something is mounted on this dentry in another
			 * namespace and/or whatever was mounted there in this
			 * namespace got unmounted before we managed to get the
			 * vfsmount_lock */
		}

		/* Handle an automount point */
		if (managed & DCACHE_NEED_AUTOMOUNT) {
			ret = follow_automount(path, flags, &need_mntput);
			if (ret < 0)
				break;
			continue;
		}

		/* We didn't change the current path point */
		break;
	}

	if (need_mntput && path->mnt == mnt)
		mntput(path->mnt);
	if (ret == -EISDIR)
		ret = 0;
	return ret < 0 ? ret : need_mntput;
}

返回到walk_component函数，检查inode是不是NULL，如果是则终止搜索。

调用path_to_nameidata函数，path_to_nameidata函数将path的内容转化到nd里面去。将找到的inode节点赋给nd->inode。

static inline void path_to_nameidata(const struct path *path,
					struct nameidata *nd)
{
	if (!(nd->flags & LOOKUP_RCU)) {
		dput(nd->path.dentry);
		if (nd->path.mnt != path->mnt)
			mntput(nd->path.mnt);
	}
	nd->path.mnt = path->mnt;
	nd->path.dentry = path->dentry;
}

返回到link_path_walk函数，继续循环搜索下一个目录项。若发现是最后一个目录项，就将struct qstr this赋给nd->last，然后直接返回到path_lookupat函数。

如果link_path_walk返回0，调用complete_walk函数完成搜索 (fs/namei.c)。

static int complete_walk(struct nameidata *nd)
{
	struct dentry *dentry = nd->path.dentry;
	int status;

	if (nd->flags & LOOKUP_RCU) {
		nd->flags &= ~LOOKUP_RCU;
		if (!(nd->flags & LOOKUP_ROOT))
			nd->root.mnt = NULL;
		spin_lock(&dentry->d_lock);
		if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
			spin_unlock(&dentry->d_lock);
			rcu_read_unlock();
			br_read_unlock(vfsmount_lock);
			return -ECHILD;
		}
		BUG_ON(nd->inode != dentry->d_inode);
		spin_unlock(&dentry->d_lock);
		mntget(nd->path.mnt);
		rcu_read_unlock();
		br_read_unlock(vfsmount_lock);
	}

	if (likely(!(nd->flags & LOOKUP_JUMPED)))
		return 0;

	if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
		return 0;

	if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
		return 0;

	/* Note: we do not d_invalidate() */
	status = d_revalidate(dentry, nd);
	if (status > 0)
		return 0;

	if (!status)
		status = -ESTALE;

	path_put(&nd->path);
	return status;
}

层层返回到kern_path_create，调用lookup_hash函数 (fs/namei.c)进行最后一个目录项的搜索。

static struct dentry *lookup_hash(struct nameidata *nd)
{
	return __lookup_hash(&nd->last, nd->path.dentry, nd);
}

调用__lookup_hash函数进行搜索，__lookup_hash调用lookup_dcache搜索dcache，若找不到则lookup_real调用对应文件系统的lookup函数去查找。

static struct dentry *__lookup_hash(struct qstr *name,
		struct dentry *base, struct nameidata *nd)
{
	bool need_lookup;
	struct dentry *dentry;

	dentry = lookup_dcache(name, base, nd, &need_lookup);
	if (!need_lookup)
		return dentry;

	return lookup_real(base->d_inode, dentry, nd);
}

调用d_lookup搜索。

static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
				    struct nameidata *nd, bool *need_lookup)
{
	struct dentry *dentry;
	int error;

	*need_lookup = false;
	dentry = d_lookup(dir, name);
	if (dentry) {
		if (d_need_lookup(dentry)) {
			*need_lookup = true;
		} else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
			error = d_revalidate(dentry, nd);
			if (unlikely(error <= 0)) {
				if (error < 0) {
					dput(dentry);
					return ERR_PTR(error);
				} else if (!d_invalidate(dentry)) {
					dput(dentry);
					dentry = NULL;
				}
			}
		}
	}

	if (!dentry) {
		dentry = d_alloc(dir, name);
		if (unlikely(!dentry))
			return ERR_PTR(-ENOMEM);

		*need_lookup = true;
	}
	return dentry;
}

d_lookup进一步调用__d_lookup。__d_lookup上面已经分析过了，就是在内存的dentry_hashtable表中查找dentry结构。这里/dev下的root还没有创建，所以肯定找不到，返回NULL。层层返回到lookup_dcache，如果d_lookup返回的dentry为空，则调用d_alloc分配一个dentry对象。

struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
{
	struct dentry *dentry;
	unsigned seq;

        do {
                seq = read_seqbegin(&rename_lock);
                dentry = __d_lookup(parent, name);
                if (dentry)
			break;
	} while (read_seqretry(&rename_lock, seq));
	return dentry;
}

d_alloc: (fs/dcache.c)

调用__d_alloc分配一个dcache对象并初始化。设置dentry的父目录，并将新的dentry链入其父目录的d_subdirs链表中。

struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
{
	struct dentry *dentry = __d_alloc(parent->d_sb, name);
	if (!dentry)
		return NULL;

	spin_lock(&parent->d_lock);
	/*
	 * don't need child lock because it is not subject
	 * to concurrency here
	 */
	__dget_dlock(parent);
	dentry->d_parent = parent;
	list_add(&dentry->d_u.d_child, &parent->d_subdirs);
	spin_unlock(&parent->d_lock);

	return dentry;
}

__d_alloc: (fs/dcache.c)

分配并初始化一个dcache对象。

struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{
	struct dentry *dentry;
	char *dname;

	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
	if (!dentry)
		return NULL;

	if (name->len > DNAME_INLINE_LEN-1) {
		dname = kmalloc(name->len + 1, GFP_KERNEL);
		if (!dname) {
			kmem_cache_free(dentry_cache, dentry); 
			return NULL;
		}
	} else  {
		dname = dentry->d_iname;
	}	
	dentry->d_name.name = dname;

	dentry->d_name.len = name->len;
	dentry->d_name.hash = name->hash;
	memcpy(dname, name->name, name->len);
	dname[name->len] = 0;

	dentry->d_count = 1;
	dentry->d_flags = 0;
	spin_lock_init(&dentry->d_lock);
	seqcount_init(&dentry->d_seq);
	dentry->d_inode = NULL;
	dentry->d_parent = dentry;
	dentry->d_sb = sb;
	dentry->d_op = NULL;
	dentry->d_fsdata = NULL;
	INIT_HLIST_BL_NODE(&dentry->d_hash);
	INIT_LIST_HEAD(&dentry->d_lru);
	INIT_LIST_HEAD(&dentry->d_subdirs);
	INIT_LIST_HEAD(&dentry->d_alias);
	INIT_LIST_HEAD(&dentry->d_u.d_child);
	d_set_d_op(dentry, dentry->d_sb->s_d_op);

	this_cpu_inc(nr_dentry);
    printk("__d_alloc: dentry name: %s\n", dentry->d_name.name);
	return dentry;
}

返回到lookup_dcache中，将need_lookup置为真，向上返回到__lookup_hash，__lookup_hash最后调用lookup_real通过文件系统自己的lookup函数从设备读目录信息。

lookup_real: (fs/namei.c)

在缓存中无法找到指定的目录项，那就创建该目录项。通过底层文件系统的lookup函数从设备读，lookup在读目录项的过程中，也把该目录项对应的inode结构从磁盘读出来，并赋值给dentry的d_inode字段。

static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
				  struct nameidata *nd)
{
	struct dentry *old;

	/* Don't create child dentry for a dead directory. */
	if (unlikely(IS_DEADDIR(dir))) {
		dput(dentry);
		return ERR_PTR(-ENOENT);
	}

	old = dir->i_op->lookup(dir, dentry, nd);
	if (unlikely(old)) {
		dput(dentry);
		dentry = old;
	}
	return dentry;
}

simple_lookup: (fs/libfs.c)

struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
	static const struct dentry_operations simple_dentry_operations = {
		.d_delete = simple_delete_dentry,
	};

	if (dentry->d_name.len > NAME_MAX)
		return ERR_PTR(-ENAMETOOLONG);
	d_set_d_op(dentry, &simple_dentry_operations);
	d_add(dentry, NULL);
	return NULL;
}

d_set_d_op: (fs/dcache.c)

用simple_dentry_operations初始化dentry->d_op。

void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
{
	WARN_ON_ONCE(dentry->d_op);
	WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH	|
				DCACHE_OP_COMPARE	|
				DCACHE_OP_REVALIDATE	|
				DCACHE_OP_DELETE ));
	dentry->d_op = op;
	if (!op)
		return;
	if (op->d_hash)
		dentry->d_flags |= DCACHE_OP_HASH;
	if (op->d_compare)
		dentry->d_flags |= DCACHE_OP_COMPARE;
	if (op->d_revalidate)
		dentry->d_flags |= DCACHE_OP_REVALIDATE;
	if (op->d_delete)
		dentry->d_flags |= DCACHE_OP_DELETE;
	if (op->d_prune)
		dentry->d_flags |= DCACHE_OP_PRUNE;

}

d_add: (include/linux/cache.h)

传入的inode参数为NULL。d_instantiate函数将dentry->d_inode置为NULL。

d_rehash将dentry加入hash表。

static inline void d_add(struct dentry *entry, struct inode *inode)
{
	d_instantiate(entry, inode);
	d_rehash(entry);
}

d_rehash: (fs/dcache.c)

void d_rehash(struct dentry * entry)
{
	spin_lock(&entry->d_lock);
	_d_rehash(entry);
	spin_unlock(&entry->d_lock);
}

_d_rehash: (fs/dcache.c)

static void _d_rehash(struct dentry * entry)
{
	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
}

__d_rehash: (fs/dcache.c)

通过hlist_bl_add_head_rcu将dentry链入dentry_hashtable表。

static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
{
	BUG_ON(!d_unhashed(entry));
	hlist_bl_lock(b);
	entry->d_flags |= DCACHE_RCUACCESS;
	hlist_bl_add_head_rcu(&entry->d_hash, b);
	hlist_bl_unlock(b);
}

hlist_bl_add_head_rcu: (include/linux/rculist_bl.h)

static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n,
					struct hlist_bl_head *h)
{
	struct hlist_bl_node *first;

	/* don't need hlist_bl_first_rcu because we're under lock */
	first = hlist_bl_first(h);

	n->next = first;
	if (first)
		first->pprev = &n->next;
	n->pprev = &h->first;

	/* need _rcu because we can have concurrent lock free readers */
	hlist_bl_set_first_rcu(h, n);
}

lookup_real返回后，向上一直返回到kern_path_create函数，接着kern_path_create函数检查返回的dentry->d_inode是否不空，如果不空表示inode节点已经存在了，不需要创建，本例中/dev下的root节点是不存在的（需要创建），因此将nd.path赋值给path后，直接返回dentry。

返回到sys_mknodat函数，执行mnt_want_write，这个函数告诉文件系统将要执行写操作了，如果不可写，将会返回错误。

如果可写，调用vfs_mknod函数创建inode节点。

vfs_mknod : (fs/namei.c)

这个函数其实就是调用了要创建节点的父目录的inode节点的方法i_op->mknod。

int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
	int error = may_create(dir, dentry);

	if (error)
		return error;

	if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
	    !ns_capable(inode_userns(dir), CAP_MKNOD))
		return -EPERM;

	if (!dir->i_op->mknod)
		return -EPERM;

	error = devcgroup_inode_mknod(mode, dev);
	if (error)
		return error;

	error = security_inode_mknod(dir, dentry, mode, dev);
	if (error)
		return error;

	error = dir->i_op->mknod(dir, dentry, mode, dev);
	if (!error)
		fsnotify_create(dir, dentry);
	return error;
}

inode节点的方法是创建inode节点时初始化的，这里也就是/dev节点创建时初始化的，dev节点是在rootfs虚拟文件系统下创建的（前面说过rootfs虚拟文件系统是linux kernel真正意义上挂载的第一个根文件系统），它的i_op被赋值为ramfs_dir_inode_operations。

(fs/ramfs/inode.c)

static const struct inode_operations ramfs_dir_inode_operations = {
	.create		= ramfs_create,
	.lookup		= simple_lookup,
	.link		= simple_link,
	.unlink		= simple_unlink,
	.symlink	= ramfs_symlink,
	.mkdir		= ramfs_mkdir,
	.rmdir		= simple_rmdir,
	.mknod		= ramfs_mknod,
	.rename		= simple_rename,
};

所以i_op->mknod方法就是调用的ramfs_mknod函数。

ramfs_mknod: (fs/ramfs/inode.c)

ramfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
{
	struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev);
	int error = -ENOSPC;

	if (inode) {
		d_instantiate(dentry, inode);
		dget(dentry);	/* Extra count - pin the dentry in core */
		error = 0;
		dir->i_mtime = dir->i_ctime = CURRENT_TIME;
	}
	return error;
}

ramfs_mknod函数调用了ramfs_get_inode函数创建一个新的inode。

ramfs_get_inode: (fs/ramfs/inode.c)

struct inode *ramfs_get_inode(struct super_block *sb,
				const struct inode *dir, umode_t mode, dev_t dev)
{
	struct inode * inode = new_inode(sb);

	if (inode) {
		inode->i_ino = get_next_ino();
		inode_init_owner(inode, dir, mode);
		inode->i_mapping->a_ops = &ramfs_aops;
		inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
		mapping_set_unevictable(inode->i_mapping);
		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
		switch (mode & S_IFMT) {
		default:
			init_special_inode(inode, mode, dev);
			break;
		case S_IFREG:
			inode->i_op = &ramfs_file_inode_operations;
			inode->i_fop = &ramfs_file_operations;
			break;
		case S_IFDIR:
			inode->i_op = &ramfs_dir_inode_operations;
			inode->i_fop = &simple_dir_operations;

			/* directory inodes start off with i_nlink == 2 (for "." entry) */
			inc_nlink(inode);
			break;
		case S_IFLNK:
			inode->i_op = &page_symlink_inode_operations;
			break;
		}
	}
	return inode;
}

调用new_inode分配一个新inode。

new_inode: (fs/inode.c)

new_inode进一步调用new_inode_pseudo函数新建inode。如果创建成功，则将其加入到该文件系统超级块的inode链表。

struct inode *new_inode(struct super_block *sb)
{
	struct inode *inode;

	spin_lock_prefetch(&inode_sb_list_lock);

	inode = new_inode_pseudo(sb);
	if (inode)
		inode_sb_list_add(inode);
	return inode;
}

new_inode_pseudo: (fs/inode.c)

pseudo是“虚拟”的意思，因为当前是在虚拟根文件系统————rootfs文件系统下创建节点，前面提过rootfs文件系统是基于内存的特殊文件系统，它的inode是只存在于内存中的。而其它像ext， fat等文件系统的inode节点都存储在硬盘等外部设备中。

new_inode_pseudo调用alloc_inode创建inode。

struct inode *new_inode_pseudo(struct super_block *sb)
{
	struct inode *inode = alloc_inode(sb);

	if (inode) {
		spin_lock(&inode->i_lock);
		inode->i_state = 0;
		spin_unlock(&inode->i_lock);
		INIT_LIST_HEAD(&inode->i_sb_list);
	}
	return inode;
}

alloc_inode: (fs/inode.c)

如果对应文件系统超级块的s_op->alloc_inode存在，则调用它，否则调用kmem_cache_alloc函数分配inode。

static struct inode *alloc_inode(struct super_block *sb)
{
	struct inode *inode;

	if (sb->s_op->alloc_inode)
		inode = sb->s_op->alloc_inode(sb);
	else
		inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);

	if (!inode)
		return NULL;

	if (unlikely(inode_init_always(sb, inode))) {
		if (inode->i_sb->s_op->destroy_inode)
			inode->i_sb->s_op->destroy_inode(inode);
		else
			kmem_cache_free(inode_cachep, inode);
		return NULL;
	}

	return inode;
}

在挂载rootfs虚拟根文件系统的时候，对应超级块的s_op已经被设置为ramfs_ops了。可以看到，alloc_inode没有设置，所以调用kmem_cache_alloc函数来分配inode。

static const struct super_operations ramfs_ops = {
	.statfs		= simple_statfs,
	.drop_inode	= generic_delete_inode,
	.show_options	= generic_show_options,
};

inode分配好后就将它返回new_inode_pseudo，再返回到new_inode，接着new_inode函数调用inode_sb_list_add将新的inode加入到文件系统超级块的inode链表中。

返回到ramfs_get_inode。
从new_inode返回后，进行一些初始化工作（包括inode的节点号），然后进入init_special_inode函数。

init_special_inode: (fs/inode.c)

前面create_dev的时候传进来的参数mode是块设备，所以这里将inode节点操作函数设为def_blk_fops，设备号设为rdev。

void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
	inode->i_mode = mode;
    	if (S_ISCHR(mode)) {
		inode->i_fop = &def_chr_fops;
		inode->i_rdev = rdev;
	} else if (S_ISBLK(mode)) {
       		inode->i_fop = &def_blk_fops;
		inode->i_rdev = rdev;
	} else if (S_ISFIFO(mode))
		inode->i_fop = &def_fifo_fops;
	else if (S_ISSOCK(mode))
		inode->i_fop = &bad_sock_fops;
	else
		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
				  " inode %s:%lu\n", mode, inode->i_sb->s_id,
				  inode->i_ino);
}

至此，inode节点就创建好了，向上层层返回到ramfs_get_inode，ramfs_get_inode就将创建好的inode节点直接返回到ramfs_mknod。

ramfs_mknod进一步调用d_instantiate。

d_instantiate: (fs/dcache.c)

d_instantiate函数封装了__d_instantiate。

void d_instantiate(struct dentry *entry, struct inode * inode)
{
	BUG_ON(!list_empty(&entry->d_alias));
	if (inode)
		spin_lock(&inode->i_lock);
	__d_instantiate(entry, inode);
	if (inode)
		spin_unlock(&inode->i_lock);
	security_d_instantiate(entry, inode);
}

__d_instantiate: (fs/dcache.c)

__d_instantiate将inode与dentry关联起来。

static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
	spin_lock(&dentry->d_lock);
	if (inode) {
		if (unlikely(IS_AUTOMOUNT(inode)))
			dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
		list_add(&dentry->d_alias, &inode->i_dentry);
	}
	dentry->d_inode = inode;
	dentry_rcuwalk_barrier(dentry);
	spin_unlock(&dentry->d_lock);
	fsnotify_d_instantiate(dentry, inode);
}

层层返回，至此，/dev/root设备就创建完毕了。

gjq_1988

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
设备节点创建过程源代码分析

阅读本文需要linux文件系统基础知识。以下分析基于下面的假设：根设备使用SD卡设备（块设备），根文件系统使用ext4文件系统。linux kernel在初始化的最后阶段，会加载“根文件系统”，按照前面的假设，也就是加载一个ext4文件系统作为根文件系统，这个文件系统位于SD卡上。在加载这个根文件系统前，kernel会先加载一个虚拟的根文件系统，名叫rootfs
复制链接

扫一扫