【学习分享】linux ext4默认ordered模式加载源码分析

Jason Yan

已于 2023-11-21 10:40:52 修改

阅读量295

点赞数

分类专栏： Linux高阶学习专栏文章标签：学习 linux

于 2023-11-21 10:35:16 首次发布

本文链接：https://blog.csdn.net/Jason_Yansir/article/details/134524231

版权

Linux高阶学习专栏专栏收录该内容

3 篇文章 0 订阅

订阅专栏

文章目录

概要

从Log中看到ext4的mount是ordered模式，那么为什么是这个模式，而不是journal或者writeback模式呢？

[/init]: RootDevice is "/dev/mmcblk0p7"
[/init]: Try to load EMMC ...
e2fsck 1.41.14 (22-Dec-2010)
/dev/mmcblk0p7: recovering journal
/dev/mmcblk0p7: clean, 8845/192768 files, 140292/770048 blocks
[    4.875543] EXT4-fs (mmcblk0p7): barriers disabled
[    4.882724] EXT4-fs (mmcblk0p7): mounted filesystem with ordered data mode. Opts: noauto_da_al

源码分析

从源代码手册中可以查询到默认是ordered模式

data=journal		All data are committed into the journal prior to being
			written into the main file system.  Enabling
			this mode will disable delayed allocation and
			O_DIRECT support.

data=ordered	(*)	All data are forced directly out to the main file
			system prior to its metadata being committed to the
			journal.

data=writeback		Data ordering is not preserved, data may be written
			into the main file system after its metadata has been
			committed to the journal.

先从Log定位到源码位置
fs/ext4/super.c/ext4_fill_super(struct super_block *sb, void *data, int silent)

	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
		 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
		 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);

打印是由descr而来

	if (EXT4_SB(sb)->s_journal) {
		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
			descr = " journalled data mode";
		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
			descr = " ordered data mode";
		else
			descr = " writeback data mode";
	} else
		descr = "out journal";

这段非常难理解。根据定义。

#define EXT4_MOUNT_DATA_FLAGS		0x00C00	/* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA		0x00400	/* Write data to journal */
#define EXT4_MOUNT_ORDERED_DATA		0x00800	/* Flush data before commit */
#define EXT4_MOUNT_WRITEBACK_DATA	0x00C00	/* No data ordering */

EXT4_MOUNT_DATA_FLAGS是0x00C00,所以大多数人会认为走的是EXT4_MOUNT_WRITEBACK_DATA，那不就是writeback模式吗？

#define EXT4_MOUNT_DATA_FLAGS		0x00C00	/* Mode for data writes: */

带着疑问还是要继续看代码，重点是这句。

test_opt(sb, DATA_FLAGS)

查看test_opt定义，还有一个参数影响到了判断条件，那就是sb->s_mount_opt。

#define test_opt(sb, opt)		(EXT4_SB(sb)->s_mount_opt & \
					 EXT4_MOUNT_##opt)

这是ext4_sb_info超级块信息中的一个参数。同一个位置还有这样一个定义，这不就是s_mount_opt的赋值吗？

#define set_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt |= \
						EXT4_MOUNT_##opt

从set_opt我们又找到了判断的地方

	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
		set_opt(sb, JOURNAL_DATA);
	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
		set_opt(sb, ORDERED_DATA);
	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
		set_opt(sb, WRITEBACK_DATA);

也就是说是def_mount_opts参数和EXT4_DEFM_JMODE的判断。

/*
 * Default mount options
 */
#define EXT4_DEFM_DEBUG		0x0001
#define EXT4_DEFM_BSDGROUPS	0x0002
#define EXT4_DEFM_XATTR_USER	0x0004
#define EXT4_DEFM_ACL		0x0008
#define EXT4_DEFM_UID16		0x0010
#define EXT4_DEFM_JMODE		0x0060
#define EXT4_DEFM_JMODE_DATA	0x0020
#define EXT4_DEFM_JMODE_ORDERED	0x0040
#define EXT4_DEFM_JMODE_WBACK	0x0060
#define EXT4_DEFM_NOBARRIER	0x0100
#define EXT4_DEFM_BLOCK_VALIDITY 0x0200
#define EXT4_DEFM_DISCARD	0x0400
#define EXT4_DEFM_NODELALLOC	0x0800

又震惊了一把，乍一看默认选项又是writeback模式。

#define EXT4_DEFM_JMODE		0x0060

不慌，再找找看def_mount_opts的赋值在哪里

	/* Set defaults before we parse the mount options */
	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);

这个参数定位下s_default_mount_opts，在庞大的超级块结构中，我们终于找到了。

/*
 * Structure of the super block
 */
struct ext4_super_block {
/*00*/	__le32	s_inodes_count;		/* Inodes count */
	__le32	s_blocks_count_lo;	/* Blocks count */
	__le32	s_r_blocks_count_lo;	/* Reserved blocks count */
	__le32	s_free_blocks_count_lo;	/* Free blocks count */
/*10*/	__le32	s_free_inodes_count;	/* Free inodes count */
	__le32	s_first_data_block;	/* First Data Block */
	__le32	s_log_block_size;	/* Block size */
	__le32	s_log_cluster_size;	/* Allocation cluster size */
/*20*/	__le32	s_blocks_per_group;	/* # Blocks per group */
	__le32	s_clusters_per_group;	/* # Clusters per group */
	__le32	s_inodes_per_group;	/* # Inodes per group */
	__le32	s_mtime;		/* Mount time */
/*30*/	__le32	s_wtime;		/* Write time */
	__le16	s_mnt_count;		/* Mount count */
	__le16	s_max_mnt_count;	/* Maximal mount count */
	__le16	s_magic;		/* Magic signature */
	__le16	s_state;		/* File system state */
	__le16	s_errors;		/* Behaviour when detecting errors */
	__le16	s_minor_rev_level;	/* minor revision level */
/*40*/	__le32	s_lastcheck;		/* time of last check */
	__le32	s_checkinterval;	/* max. time between checks */
	__le32	s_creator_os;		/* OS */
	__le32	s_rev_level;		/* Revision level */
/*50*/	__le16	s_def_resuid;		/* Default uid for reserved blocks */
	__le16	s_def_resgid;		/* Default gid for reserved blocks */
	/*
	 * These fields are for EXT4_DYNAMIC_REV superblocks only.
	 *
	 * Note: the difference between the compatible feature set and
	 * the incompatible feature set is that if there is a bit set
	 * in the incompatible feature set that the kernel doesn't
	 * know about, it should refuse to mount the filesystem.
	 *
	 * e2fsck's requirements are more strict; if it doesn't know
	 * about a feature in either the compatible or incompatible
	 * feature set, it must abort and not try to meddle with
	 * things it doesn't understand...
	 */
	__le32	s_first_ino;		/* First non-reserved inode */
	__le16  s_inode_size;		/* size of inode structure */
	__le16	s_block_group_nr;	/* block group # of this superblock */
	__le32	s_feature_compat;	/* compatible feature set */
/*60*/	__le32	s_feature_incompat;	/* incompatible feature set */
	__le32	s_feature_ro_compat;	/* readonly-compatible feature set */
/*68*/	__u8	s_uuid[16];		/* 128-bit uuid for volume */
/*78*/	char	s_volume_name[16];	/* volume name */
/*88*/	char	s_last_mounted[64];	/* directory where last mounted */
/*C8*/	__le32	s_algorithm_usage_bitmap; /* For compression */
	/*
	 * Performance hints.  Directory preallocation should only
	 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
	 */
	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
	__le16	s_reserved_gdt_blocks;	/* Per group desc for online growth */
	/*
	 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
	 */
/*D0*/	__u8	s_journal_uuid[16];	/* uuid of journal superblock */
/*E0*/	__le32	s_journal_inum;		/* inode number of journal file */
	__le32	s_journal_dev;		/* device number of journal file */
	__le32	s_last_orphan;		/* start of list of inodes to delete */
	__le32	s_hash_seed[4];		/* HTREE hash seed */
	__u8	s_def_hash_version;	/* Default hash version to use */
	__u8	s_jnl_backup_type;
	__le16  s_desc_size;		/* size of group descriptor */
/*100*/	__le32	s_default_mount_opts;
	__le32	s_first_meta_bg;	/* First metablock block group */
	__le32	s_mkfs_time;		/* When the filesystem was created */
	__le32	s_jnl_blocks[17];	/* Backup of the journal inode */
	/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
/*150*/	__le32	s_blocks_count_hi;	/* Blocks count */
	__le32	s_r_blocks_count_hi;	/* Reserved blocks count */
	__le32	s_free_blocks_count_hi;	/* Free blocks count */
	__le16	s_min_extra_isize;	/* All inodes have at least # bytes */
	__le16	s_want_extra_isize; 	/* New inodes should reserve # bytes */
	__le32	s_flags;		/* Miscellaneous flags */
	__le16  s_raid_stride;		/* RAID stride */
	__le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
	__le64  s_mmp_block;            /* Block for multi-mount protection */
	__le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
	__u8	s_log_groups_per_flex;  /* FLEX_BG group size */
	__u8	s_checksum_type;	/* metadata checksum algorithm used */
	__le16  s_reserved_pad;
	__le64	s_kbytes_written;	/* nr of lifetime kilobytes written */
	__le32	s_snapshot_inum;	/* Inode number of active snapshot */
	__le32	s_snapshot_id;		/* sequential ID of active snapshot */
	__le64	s_snapshot_r_blocks_count; /* reserved blocks for active
					      snapshot's future use */
	__le32	s_snapshot_list;	/* inode number of the head of the
					   on-disk snapshot list */
#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
	__le32	s_error_count;		/* number of fs errors */
	__le32	s_first_error_time;	/* first time an error happened */
	__le32	s_first_error_ino;	/* inode involved in first error */
	__le64	s_first_error_block;	/* block involved of first error */
	__u8	s_first_error_func[32];	/* function where the error happened */
	__le32	s_first_error_line;	/* line number where error happened */
	__le32	s_last_error_time;	/* most recent time of an error */
	__le32	s_last_error_ino;	/* inode involved in last error */
	__le32	s_last_error_line;	/* line number where error happened */
	__le64	s_last_error_block;	/* block involved of last error */
	__u8	s_last_error_func[32];	/* function where the error happened */
#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
	__u8	s_mount_opts[64];
	__le32	s_usr_quota_inum;	/* inode for tracking user quota */
	__le32	s_grp_quota_inum;	/* inode for tracking group quota */
	__le32	s_overhead_clusters;	/* overhead blocks/clusters in fs */
	__le32	s_reserved[108];	/* Padding to the end of the block */
	__le32	s_checksum;		/* crc32c(superblock) */
};

所以定义的值呢？请看左边100,这居然还是二进制数！

/*100*/	__le32	s_default_mount_opts;

换算一下，100&110=100,所以是0x0040，再看下定义，终于是ordered模式了。

#define EXT4_DEFM_JMODE_ORDERED	0x0040

那么这个参数怎么修改呢？根据下文提示
Ext4文件系统挂载默认选项
其实就是tune2fs工具展示的Default mount options，这个值是在磁盘上永久保存的，一般都是当mkfs创建文件系统的时候写入，也可以通过tune2fs工具来修改。

那也就是说是默认已经烧录进去的，一般也不需要去修改。毕竟ordered也基本上可以满足需求了吧。

小结

通过以上分析，终于知道为什么ext4是按照ordered模式加载的，如果需要修改成其他模式的话可以借助下tune2fs工具，或者强制修改试下。在终端直接修改试了下没成功，有了解的彦祖可以提供下思路。
在这里插入图片描述

Jason Yan

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
打赏
0
评论
【学习分享】linux ext4默认ordered模式加载源码分析

通过以上分析，终于知道为什么ext4是按照ordered模式加载的，如果需要修改成其他模式的话可以借助下tune2fs工具，或者强制修改试下。其实就是tune2fs工具展示的Default mount options，这个值是在磁盘上永久保存的，一般都是当mkfs创建文件系统的时候写入，也可以通过tune2fs工具来修改。EXT4_MOUNT_DATA_FLAGS是0x00C00,所以大多数人会认为走的是EXT4_MOUNT_WRITEBACK_DATA，那不就是writeback模式吗？
复制链接

扫一扫