linux文件系统初始化流程

博主能力有限,若博文有错误,请各位大侠不吝赐教。
本文分析linux文件系统的初始化过程,将从代码层面,将文件系统的初始化流程捋一捋。代码基于linux4.4.198.
在start_kernel()函数中,分两个地方进行文件系统相关的初始化动作。第一个地方位于函数vfs_caches_init(),第二个地方位于rest_init(),前者为后者做必要的初始化,后者执行真正的文件系统挂载动作。下面分两部分进行文件系统初始化讲解。
第一步:vfs_caches_init()函数。
这个函数主要做一些必要的初始化操作,为initrd的挂载做准备。

void __init vfs_caches_init(void)
{
	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

	dcache_init(); //为dentry cache做初始化工作,dentry_hashtable
	inode_init(); //为inode cache做初始化工作,inode_hashtable
	files_init();
	files_maxfiles_init();
	mnt_init(); //这个函数完成sysfs和rootfs文件系统的注册和挂载
	bdev_cache_init();
	chrdev_init();
}

重点工作位于mnt_init函数中,这个函数完成sysfs和rootfs文件系统的注册和挂载。我们来看下这个函数。

void __init mnt_init(void)
{
	unsigned u;
	int err;

	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);

	mount_hashtable = alloc_large_system_hash("Mount-cache",
				sizeof(struct hlist_head),
				mhash_entries, 19,
				0,
				&m_hash_shift, &m_hash_mask, 0, 0); //分配mount_hashtable空间
	mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
				sizeof(struct hlist_head),
				mphash_entries, 19,
				0,
				&mp_hash_shift, &mp_hash_mask, 0, 0); //分配mountpoint_hashtable空间

	if (!mount_hashtable || !mountpoint_hashtable)
		panic("Failed to allocate mount hash table\n");

	for (u = 0; u <= m_hash_mask; u++)
		INIT_HLIST_HEAD(&mount_hashtable[u]);
	for (u = 0; u <= mp_hash_mask; u++)
		INIT_HLIST_HEAD(&mountpoint_hashtable[u]);

	kernfs_init(); //sysfs依赖于kernfs,这里的初始化是为sysfs的初始化做准备

	err = sysfs_init(); //初始化并注册sysfs,注意sysfs先于rootfs注册
	if (err)
		printk(KERN_WARNING "%s: sysfs_init error: %d\n",
			__func__, err);
	fs_kobj = kobject_create_and_add("fs", NULL); //为sysfs创建fs目录
	if (!fs_kobj)
		printk(KERN_WARNING "%s: kobj create error\n", __func__);
	init_rootfs(); //注册rootfs
	init_mount_tree(); //挂载rootfs
}

怎么mnt_init里就挂载了rootfs了呢?
这里的rootfs和我们说的initrd不是一个概念,rootfs是一个文件系统类型,相关定义如下:

static struct file_system_type rootfs_fs_type = {
	.name		= "rootfs",
	.mount		= rootfs_mount,
	.kill_sb	= kill_litter_super,
};

这里的挂载rootfs指的是将rootfs这种文件系统类型挂载到一个linux内核认为的根目录,也就是从这里开始创建了根目录。下面来具体看看init_mount_tree这个函数就明白了。

static void __init init_mount_tree(void)
{
	struct vfsmount *mnt;
	struct mnt_namespace *ns;
	struct path root;
	struct file_system_type *type;

	type = get_fs_type("rootfs");
	if (!type)
		panic("Can't find rootfs type");
	mnt = vfs_kern_mount(type, 0, "rootfs", NULL);//挂载rootfs到根目录
	put_filesystem(type);
	if (IS_ERR(mnt))
		panic("Can't create rootfs");

	ns = create_mnt_ns(mnt); //创建一个命名空间
	if (IS_ERR(ns))
		panic("Can't allocate initial namespace");

	init_task.nsproxy->mnt_ns = ns; //设置init_task的命名空间
	get_mnt_ns(ns);

	root.mnt = mnt;
	root.dentry = mnt->mnt_root;
	mnt->mnt_flags |= MNT_LOCKED;

	set_fs_pwd(current->fs, &root); //设置init_task的当前目录
	set_fs_root(current->fs, &root); //设置init_task的根目录
}

到这里,第一步的准备工作便完成了,下面可以继续第二步的工作了。

第二步:rest_init()函数。
在这里,将进行initrd文件的解析,或者直接把跟文件系统从mtd等硬件上读取出来。
具体流程为:rest_init()->kernel_init()->kernel_init_freeable()
我们来看这个函数。

static noinline void __init kernel_init_freeable(void)
{
	/*
	 * Wait until kthreadd is all set-up.
	 */
	wait_for_completion(&kthreadd_done);

	/* Now the scheduler is fully set up and can do blocking allocations */
	gfp_allowed_mask = __GFP_BITS_MASK;

	/*
	 * init can allocate pages on any node
	 */
	set_mems_allowed(node_states[N_MEMORY]);
	/*
	 * init can run on any cpu.
	 */
	set_cpus_allowed_ptr(current, cpu_all_mask);

	cad_pid = task_pid(current);

	smp_prepare_cpus(setup_max_cpus);

	do_pre_smp_initcalls();
	lockup_detector_init();

	smp_init();
	sched_init_smp();

	page_alloc_init_late();

	do_basic_setup(); //这里会调用populate_rootfs解析initrd

	/* Open the /dev/console on the rootfs, this should never fail */
	if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
		pr_err("Warning: unable to open an initial console.\n");

	(void) sys_dup(0); //调用两次这个函数复制文件描述符0,其实就是创建标准输入、输出、错误
	(void) sys_dup(0);
	/*
	 * check if there is an early userspace init.  If yes, let it do all
	 * the work
	 */

	if (!ramdisk_execute_command)
		ramdisk_execute_command = "/init";

	if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
		ramdisk_execute_command = NULL;
		prepare_namespace(); //如果根文件系统不是initrd,则会走到这里处理硬件上的根文件系统
	}

	/*
	 * Ok, we have completed the initial bootup, and
	 * we're essentially up and running. Get rid of the
	 * initmem segments and start the user-mode stuff..
	 *
	 * rootfs is available now, try loading the public keys
	 * and default modules
	 */

	integrity_load_keys();
	load_default_modules();
}

总的来说,第二步又分为3小步,首先是解析initrd,若这步没问题,则第三小步不用执行;然后,创建标准输入、输出、错误;最后,若initrd解析失败,则应挂载物理硬件上的文件系统,这时就要根据启动参数去对应硬件挂载根文件系统。下面我们一步步来看。
首先是解析initrd。do_basic_setup()函数会调用do_initcalls(),这个函数会调到populate_rootfs(),在populate_rootfs()里完成从内存中读取并解析initrd的动作。

static int __init populate_rootfs(void)
{
	char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size); //这里处理的是initramfs
	if (err)
		panic("%s", err); /* Failed to decompress INTERNAL initramfs */
	if (initrd_start) { //如果配置了initrd的启动参数,则initrd_start不为0
#ifdef CONFIG_BLK_DEV_RAM
		int fd;
		printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n");
		err = unpack_to_rootfs((char *)initrd_start,
			initrd_end - initrd_start); //解析initrd
		if (!err) {
			free_initrd();
			goto done;
		} else {
			clean_rootfs();
			unpack_to_rootfs(__initramfs_start, __initramfs_size);
		}
		printk(KERN_INFO "rootfs image is not initramfs (%s)"
				"; looks like an initrd\n", err);
		fd = sys_open("/initrd.image",
			      O_WRONLY|O_CREAT, 0700); //这里为挂载initrd后的“真正”根文件系统做准备
		if (fd >= 0) {
			ssize_t written = xwrite(fd, (char *)initrd_start,
						initrd_end - initrd_start);

			if (written != initrd_end - initrd_start)
				pr_err("/initrd.image: incomplete write (%zd != %ld)\n",
				       written, initrd_end - initrd_start);

			sys_close(fd);
			free_initrd();
		}
	done:
#else
		printk(KERN_INFO "Unpacking initramfs...\n");
		err = unpack_to_rootfs((char *)initrd_start,
			initrd_end - initrd_start);
		if (err)
			printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
		free_initrd();
#endif
		/*
		 * Try loading default modules from initramfs.  This gives
		 * us a chance to load before device_initcalls.
		 */
		load_default_modules();
	}
	return 0;
}
rootfs_initcall(populate_rootfs);

真正解析initrd的函数是unpack_to_rootfs,我们来看下这个函数。

static char * __init unpack_to_rootfs(char *buf, unsigned long len)
{
	long written;
	decompress_fn decompress;
	const char *compress_name;
	static __initdata char msg_buf[64];
//分配解压所需内存
	header_buf = kmalloc(110, GFP_KERNEL);
	symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL);
	name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL);

	if (!header_buf || !symlink_buf || !name_buf)
		panic("can't allocate buffers");

	state = Start; //解析cpio格式的initrd程序是用的状态机模式,这里先初始化该模式
	this_header = 0;
	message = NULL;
	while (!message && len) {
		loff_t saved_offset = this_header;
		if (*buf == '0' && !(this_header & 3)) { //initrd.cpio文件会走这里,不会往下个if走了
			state = Start;
			written = write_buffer(buf, len);
			buf += written;
			len -= written;
			continue;
		}
		if (!*buf) {
			buf++;
			len--;
			this_header++;
			continue;
		}
		this_header = 0;
		decompress = decompress_method(buf, len, &compress_name); //获取压缩算法
		pr_debug("Detected %s compressed data\n", compress_name);
		if (decompress) {
			int res = decompress(buf, len, NULL, flush_buffer, NULL,
				   &my_inptr, error); //解压initrd,并通过flush_buffer函数向rootfs中添加条目
			if (res)
				error("decompressor failed");
		} else if (compress_name) {
			if (!message) {
				snprintf(msg_buf, sizeof msg_buf,
					 "compression method %s not configured",
					 compress_name);
				message = msg_buf;
			}
		} else
			error("junk in compressed archive");
		if (state != Reset)
			error("junk in compressed archive");
		this_header = saved_offset + my_inptr;
		buf += my_inptr;
		len -= my_inptr;
	}
	dir_utime();
	kfree(name_buf);
	kfree(symlink_buf);
	kfree(header_buf);
	return message;
}

解压过程中传入钩子flush_buffer,flush_buffer又会调用到write_buffer

static long __init flush_buffer(void *bufv, unsigned long len)
{
	char *buf = (char *) bufv;
	long written;
	long origLen = len;
	if (message)
		return -1;
	while ((written = write_buffer(buf, len)) < len && !message) {
		char c = buf[written];
		if (c == '0') {
			buf += written;
			len -= written;
			state = Start;
		} else if (c == 0) {
			buf += written;
			len -= written;
			state = Reset;
		} else
			error("junk in compressed archive");
	}
	return origLen;
}

最终会调用到状态机中的相关状态处理函数,这些函数会完成真正的“解析根文件系统”的操作

static __initdata int (*actions[])(void) = {
	[Start]		= do_start,
	[Collect]	= do_collect,
	[GotHeader]	= do_header,
	[SkipIt]	= do_skip,
	[GotName]	= do_name,
	[CopyFile]	= do_copy,
	[GotSymlink]	= do_symlink,
	[Reset]		= do_reset,
};

static long __init write_buffer(char *buf, unsigned long len)
{
	byte_count = len;
	victim = buf;

	while (!actions[state]()) //调用状态机相应状态函数
		;
	return len - byte_count;
}

如果启动参数未设置initrd参数,则会走到第三小步,即prepare_namespace()函数

/*
 * Prepare the namespace - decide what/where to mount, load ramdisks, etc.
 */
void __init prepare_namespace(void)
{
	int is_floppy;

	if (root_delay) {
		printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
		       root_delay);
		ssleep(root_delay);
	}

	/*
	 * wait for the known devices to complete their probing
	 *
	 * Note: this is a potential source of long boot delays.
	 * For example, it is not atypical to wait 5 seconds here
	 * for the touchpad of a laptop to initialize.
	 */
	wait_for_device_probe();

	md_run_setup();

	if (saved_root_name[0]) {
		root_device_name = saved_root_name;
		if (!strncmp(root_device_name, "mtd", 3) ||
		    !strncmp(root_device_name, "ubi", 3)) {
			mount_block_root(root_device_name, root_mountflags); //把mtd或ubi上的文件系统挂载,这里是配置了root=/dev/mtd或者/dev/ubi
			goto out;
		}
		ROOT_DEV = name_to_dev_t(root_device_name);
		if (strncmp(root_device_name, "/dev/", 5) == 0)
			root_device_name += 5;
	}

	if (initrd_load()) //这里的initrd不是我们之前说的initrd,而是磁盘上的文件系统,见下面分析
		goto out;

	/* wait for any asynchronous scanning to complete */
	if ((ROOT_DEV == 0) && root_wait) {
		printk(KERN_INFO "Waiting for root device %s...\n",
			saved_root_name);
		while (driver_probe_done() != 0 ||
			(ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
			msleep(100);
		async_synchronize_full();
	}

	is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;

	if (is_floppy && rd_doload && rd_load_disk(0))
		ROOT_DEV = Root_RAM0;

	mount_root();
out:
	devtmpfs_mount("dev");
	sys_mount(".", "/", NULL, MS_MOVE, NULL); //将挂载点从当前目录(实际当前目录在mount_root()或mount_block_root()中指定)移至根目录
	sys_chroot(".");//改变根目录
}
int __init initrd_load(void)
{
	if (mount_initrd) {
		create_dev("/dev/ram", Root_RAM0);
		/*
		 * Load the initrd data into /dev/ram0. Execute it as initrd
		 * unless /dev/ram0 is supposed to be our actual root device,
		 * in that case the ram disk is just set up here, and gets
		 * mounted in the normal path.
		 */
		if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) { //这里处理之前保存在文件/initrd.image的根文件系统
			sys_unlink("/initrd.image");
			handle_initrd();
			return 1;
		}
	}
	sys_unlink("/initrd.image");
	return 0;
}

终于写完了。

  • 2
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值