f2fs mkfs 格式化过程系列 0

f2fs开源的代码分为f2fs-tools与f2fs kernel代码,f2fs-tools的代码主要是格式化make_f2fs工具,fsck工具等,可以在开源网站

https://git.kernel.org/pub/scm/linux/kernel/git/上面下载。

格式化的源码在f2fs-tools源码f2fs-format.c中,f2fs_format_device()中执行的,次函数中依次填充sb, sit, nat, root_dir, cp等。系列1写了格式化过程往cp block中写了哪些数据,本次分析一下源码往super block里面写了些什么,个人以为,只有了解了磁盘格式,才能真正的理解一个文件系统。

本次看一下f2fs_prepare_super_block()函数。


static int f2fs_prepare_super_block(void)
{
	u_int32_t blk_size_bytes;
	u_int32_t log_sectorsize, log_sectors_per_block;
	u_int32_t log_blocksize, log_blks_per_seg;
	u_int32_t segment_size_bytes, zone_size_bytes;
	u_int32_t sit_segments, nat_segments;
	u_int32_t blocks_for_sit, blocks_for_nat, blocks_for_ssa;
	u_int32_t total_valid_blks_available;
	u_int64_t zone_align_start_offset, diff;
	u_int64_t total_meta_zones, total_meta_segments;
	u_int32_t sit_bitmap_size, max_sit_bitmap_size;
	u_int32_t max_nat_bitmap_size, max_nat_segments;
	u_int32_t total_zones;
	enum quota_type qtype;
	int i;

    //super block magic
	set_sb(magic, F2FS_SUPER_MAGIC);
	set_sb(major_ver, F2FS_MAJOR_VERSION);
	set_sb(minor_ver, F2FS_MINOR_VERSION);

    
	log_sectorsize = log_base_2(c.sector_size);//sector size,512bytes
	log_sectors_per_block = log_base_2(c.sectors_per_blk);//sertors per block
	log_blocksize = log_sectorsize + log_sectors_per_block;//block size
	log_blks_per_seg = log_base_2(c.blks_per_seg);//blocks per segment, default 1

	set_sb(log_sectorsize, log_sectorsize);
	set_sb(log_sectors_per_block, log_sectors_per_block);

	set_sb(log_blocksize, log_blocksize);
	set_sb(log_blocks_per_seg, log_blks_per_seg);

	set_sb(segs_per_sec, c.segs_per_sec);
	set_sb(secs_per_zone, c.secs_per_zone);

	blk_size_bytes = 1 << log_blocksize;//block size, 4096 bytes
	segment_size_bytes = blk_size_bytes * c.blks_per_seg;//default 
    //zone size, secs_per_zone is 1, segs_per_sec is 1
	zone_size_bytes =
		blk_size_bytes * c.secs_per_zone *
		c.segs_per_sec * c.blks_per_seg;

	set_sb(checksum_offset, 0);

	set_sb(block_count, c.total_sectors >> log_sectors_per_block);//block count

    //zone alignment, 每个zone其实与segment大小是相同的(默认一个segment等于一个zone)
    //这里是segment对齐的起始位置,前面是superblock,superblock后都是segment对齐的
	zone_align_start_offset =
		(c.start_sector * c.sector_size +
		2 * F2FS_BLKSIZE + zone_size_bytes - 1) /
		zone_size_bytes * zone_size_bytes -
		c.start_sector * c.sector_size;

	if (c.start_sector % c.sectors_per_blk) {
		MSG(1, "\t%s: Align start sector number to the page unit\n",
				c.zoned_mode ? "FAIL" : "WARN");
		MSG(1, "\ti.e., start sector: %d, ofs:%d (sects/page: %d)\n",
				c.start_sector,
				c.start_sector % c.sectors_per_blk,
				c.sectors_per_blk);
		if (c.zoned_mode)
			return -1;
	}
    //将segment0_blkaddr设置为zone_align_start_offset,也就是cp segment其实位置
	set_sb(segment0_blkaddr, zone_align_start_offset / blk_size_bytes);
	sb->cp_blkaddr = sb->segment0_blkaddr;//如前面所述,cp blkaddr设置为segment0_blkaddr

	MSG(0, "Info: zone aligned segment0 blkaddr: %u\n",
					get_sb(segment0_blkaddr));

	if (c.zoned_mode && (get_sb(segment0_blkaddr) + c.start_sector /
					c.sectors_per_blk) % c.zone_blocks) {
		MSG(1, "\tError: Unaligned segment0 block address %u\n",
				get_sb(segment0_blkaddr));
		return -1;
	}

	for (i = 0; i < c.ndevs; i++) {
		if (i == 0) {
			c.devices[i].total_segments =
				(c.devices[i].total_sectors *
				c.sector_size - zone_align_start_offset) /
				segment_size_bytes;
			c.devices[i].start_blkaddr = 0;
			c.devices[i].end_blkaddr = c.devices[i].total_segments *
						c.blks_per_seg - 1 +
						sb->segment0_blkaddr;
		} else {
			c.devices[i].total_segments =
				c.devices[i].total_sectors /
				(c.sectors_per_blk * c.blks_per_seg);
			c.devices[i].start_blkaddr =
					c.devices[i - 1].end_blkaddr + 1;
			c.devices[i].end_blkaddr = c.devices[i].start_blkaddr +
					c.devices[i].total_segments *
					c.blks_per_seg - 1;
		}
		if (c.ndevs > 1) {
			memcpy(sb->devs[i].path, c.devices[i].path, MAX_PATH_LEN);
			sb->devs[i].total_segments =
					cpu_to_le32(c.devices[i].total_segments);
		}

		c.total_segments += c.devices[i].total_segments;
	}
	set_sb(segment_count, (c.total_segments / c.segs_per_zone *
						c.segs_per_zone));//segment count totaly
	set_sb(segment_count_ckpt, F2FS_NUMBER_OF_CHECKPOINT_PACK);//cp pack count

	set_sb(sit_blkaddr, get_sb(segment0_blkaddr) +
			get_sb(segment_count_ckpt) * c.blks_per_seg);//sit area addr

	blocks_for_sit = SIZE_ALIGN(get_sb(segment_count), SIT_ENTRY_PER_BLOCK);

	sit_segments = SEG_ALIGN(blocks_for_sit);

	set_sb(segment_count_sit, sit_segments * 2);//有两个sit area

	set_sb(nat_blkaddr, get_sb(sit_blkaddr) + get_sb(segment_count_sit) *
			c.blks_per_seg);//nat area block address

	total_valid_blks_available = (get_sb(segment_count) -
			(get_sb(segment_count_ckpt) +
			get_sb(segment_count_sit))) * c.blks_per_seg;

	blocks_for_nat = SIZE_ALIGN(total_valid_blks_available,
			NAT_ENTRY_PER_BLOCK);//计算nat area需要多少个block

	if (c.large_nat_bitmap) {
		nat_segments = SEG_ALIGN(blocks_for_nat) *
						DEFAULT_NAT_ENTRY_RATIO / 100;
		set_sb(segment_count_nat, nat_segments ? nat_segments : 1);
		max_nat_bitmap_size = (get_sb(segment_count_nat) <<
						log_blks_per_seg) / 8;
		set_sb(segment_count_nat, get_sb(segment_count_nat) * 2);
	} else {//假设没有设置large_nat_bitmap
		set_sb(segment_count_nat, SEG_ALIGN(blocks_for_nat));
		max_nat_bitmap_size = 0;
	}

	/*
	 * The number of node segments should not be exceeded a "Threshold".
	 * This number resizes NAT bitmap area in a CP page.
	 * So the threshold is determined not to overflow one CP page
	 */
	sit_bitmap_size = ((get_sb(segment_count_sit) / 2) <<
				log_blks_per_seg) / 8;//共需要多大的bitmap size来描述sit area

	if (sit_bitmap_size > MAX_SIT_BITMAP_SIZE)
		max_sit_bitmap_size = MAX_SIT_BITMAP_SIZE;
	else
		max_sit_bitmap_size = sit_bitmap_size;

	if (c.large_nat_bitmap) {
		/* use cp_payload if free space of f2fs_checkpoint is not enough */
		if (max_sit_bitmap_size + max_nat_bitmap_size >
						MAX_BITMAP_SIZE_IN_CKPT) {
			u_int32_t diff =  max_sit_bitmap_size +
						max_nat_bitmap_size -
						MAX_BITMAP_SIZE_IN_CKPT;
			set_sb(cp_payload, F2FS_BLK_ALIGN(diff));
		} else {
			set_sb(cp_payload, 0);
		}
	} else {
		/*
		 * It should be reserved minimum 1 segment for nat.
		 * When sit is too large, we should expand cp area.
		 * It requires more pages for cp.
		 */

		//1个segment 2M, 2*1024*1024/4096 512个block, 512/8 等于64,
		//所以1个segment需要64bytes 大小的bitmapsize来描述
		//所以1 cp中,留给sit bitmap size的最大值就是MAX_SIT_BITMAP_SIZE_IN_CKPT
		if (max_sit_bitmap_size > MAX_SIT_BITMAP_SIZE_IN_CKPT) {
			//如果sit bitmap size比cp中预留的最大值还大
			//将cp中预留的空间设置为max_nat_bitmap_size
			max_nat_bitmap_size = CHECKSUM_OFFSET -
					sizeof(struct f2fs_checkpoint) + 1;
			//设置cp需要额外多少个block描述sit bitmap size
			set_sb(cp_payload, F2FS_BLK_ALIGN(max_sit_bitmap_size));
	        } else {
	        //否则的话,cp中nat bitmap紧跟在sit bitmap后
			max_nat_bitmap_size =
				CHECKSUM_OFFSET - sizeof(struct f2fs_checkpoint) + 1
				- max_sit_bitmap_size;
			set_sb(cp_payload, 0);//不需要额外的cp block描述sit bitmap,设置cp_payload 0
		}
		//max nat bitmap 最多能描述多少个segments
		max_nat_segments = (max_nat_bitmap_size * 8) >> log_blks_per_seg;

        //如果mat nat bitmap最多能描述的segments小于存储中的实际需要的nat segments count
		if (get_sb(segment_count_nat) > max_nat_segments)
			set_sb(segment_count_nat, max_nat_segments);
        //共有2个nat area
		set_sb(segment_count_nat, get_sb(segment_count_nat) * 2);
	}
    //nat后为ssa area
	set_sb(ssa_blkaddr, get_sb(nat_blkaddr) + get_sb(segment_count_nat) *
			c.blks_per_seg);
    //此时剩余的valid blocks
	total_valid_blks_available = (get_sb(segment_count) -
			(get_sb(segment_count_ckpt) +
			get_sb(segment_count_sit) +
			get_sb(segment_count_nat))) *
			c.blks_per_seg;
    //ssa area需要多少个segments
	blocks_for_ssa = total_valid_blks_available /
				c.blks_per_seg + 1;

	set_sb(segment_count_ssa, SEG_ALIGN(blocks_for_ssa));
    //cp sit nat ssa共占有多少个segments
	total_meta_segments = get_sb(segment_count_ckpt) +
		get_sb(segment_count_sit) +
		get_sb(segment_count_nat) +
		get_sb(segment_count_ssa);
	diff = total_meta_segments % (c.segs_per_zone);
	if (diff)
		set_sb(segment_count_ssa, get_sb(segment_count_ssa) +
			(c.segs_per_zone - diff));
    //total meta zones大小
	total_meta_zones = ZONE_ALIGN(total_meta_segments *
						c.blks_per_seg);
    //main_blkaddr为除了原数据后的空间
	set_sb(main_blkaddr, get_sb(segment0_blkaddr) + total_meta_zones *
				c.segs_per_zone * c.blks_per_seg);

	if (c.zoned_mode) {
		/*
		 * Make sure there is enough randomly writeable
		 * space at the beginning of the disk.
		 */
		unsigned long main_blkzone = get_sb(main_blkaddr) / c.zone_blocks;

		if (c.devices[0].zoned_model == F2FS_ZONED_HM &&
				c.devices[0].nr_rnd_zones < main_blkzone) {
			MSG(0, "\tError: Device does not have enough random "
					"write zones for F2FS volume (%lu needed)\n",
					main_blkzone);
			return -1;
		}
	}
    //total zones,每个zone只有一个segment默认
	total_zones = get_sb(segment_count) / (c.segs_per_zone) -
							total_meta_zones;
    //默认每个zone只有一个section
	set_sb(section_count, total_zones * c.secs_per_zone);
    //默认每个section包含一个segment
	set_sb(segment_count_main, get_sb(section_count) * c.segs_per_sec);

	/* Let's determine the best reserved and overprovisioned space */
	if (c.overprovision == 0)
		c.overprovision = get_best_overprovision(sb);

	if (c.overprovision == 0 || c.total_segments < F2FS_MIN_SEGMENTS ||
		(c.devices[0].total_sectors *
			c.sector_size < zone_align_start_offset) ||
		(get_sb(segment_count_main) - 2) < c.reserved_segments) {
		MSG(0, "\tError: Device size is not sufficient for F2FS volume\n");
		return -1;
	}
    //设置预留的segments,给gc等用
	c.reserved_segments =
			(2 * (100 / c.overprovision + 1) + 6)
			* c.segs_per_sec;

	uuid_generate(sb->uuid);

	/* precompute checksum seed for metadata */
	if (c.feature & cpu_to_le32(F2FS_FEATURE_INODE_CHKSUM))
		c.chksum_seed = f2fs_cal_crc32(~0, sb->uuid, sizeof(sb->uuid));

	utf8_to_utf16(sb->volume_name, (const char *)c.vol_label,
				MAX_VOLUME_NAME, strlen(c.vol_label));
	set_sb(node_ino, 1);//设置node inode 1
	set_sb(meta_ino, 2);//设置meta inode 2
	set_sb(root_ino, 3);//设置root inode 3
	c.next_free_nid = 4;//下一个free nid为4

    //quota相关的先不看
	if (c.feature & cpu_to_le32(F2FS_FEATURE_QUOTA_INO)) {
		quotatype_bits = QUOTA_USR_BIT | QUOTA_GRP_BIT;
		if (c.feature & cpu_to_le32(F2FS_FEATURE_PRJQUOTA))
			quotatype_bits |= QUOTA_PRJ_BIT;
	}

	for (qtype = 0; qtype < F2FS_MAX_QUOTAS; qtype++) {
		if (!((1 << qtype) & quotatype_bits))
			continue;
		sb->qf_ino[qtype] = cpu_to_le32(c.next_free_nid++);
		MSG(0, "Info: add quota type = %u => %u\n",
					qtype, c.next_free_nid - 1);
	}
    //这是什么feature?先不看
	if (c.feature & cpu_to_le32(F2FS_FEATURE_LOST_FOUND))
		c.lpf_ino = c.next_free_nid++;

	if (total_zones <= 6) {
		MSG(1, "\tError: %d zones: Need more zones "
			"by shrinking zone size\n", total_zones);
		return -1;
	}
    //是否配置了heap属性,如果配置,hot node为最后一个segment,否则为0
	if (c.heap) {
		c.cur_seg[CURSEG_HOT_NODE] =
				last_section(last_zone(total_zones));
		c.cur_seg[CURSEG_WARM_NODE] = prev_zone(CURSEG_HOT_NODE);
		c.cur_seg[CURSEG_COLD_NODE] = prev_zone(CURSEG_WARM_NODE);
		c.cur_seg[CURSEG_HOT_DATA] = prev_zone(CURSEG_COLD_NODE);
		c.cur_seg[CURSEG_COLD_DATA] = 0;
		c.cur_seg[CURSEG_WARM_DATA] = next_zone(CURSEG_COLD_DATA);
	} else {
		c.cur_seg[CURSEG_HOT_NODE] = 0;
		c.cur_seg[CURSEG_WARM_NODE] = next_zone(CURSEG_HOT_NODE);
		c.cur_seg[CURSEG_COLD_NODE] = next_zone(CURSEG_WARM_NODE);
		c.cur_seg[CURSEG_HOT_DATA] = next_zone(CURSEG_COLD_NODE);
		c.cur_seg[CURSEG_COLD_DATA] =
				max(last_zone((total_zones >> 2)),
					next_zone(CURSEG_COLD_NODE));
		c.cur_seg[CURSEG_WARM_DATA] =
				max(last_zone((total_zones >> 1)),
					next_zone(CURSEG_COLD_DATA));
	}

	/* if there is redundancy, reassign it */
	verify_cur_segs();

	cure_extension_list();

	/* get kernel version */
	if (c.kd >= 0) {
		dev_read_version(c.version, 0, VERSION_LEN);
		get_kernel_version(c.version);
		MSG(0, "Info: format version with\n  \"%s\"\n", c.version);
	} else {
		get_kernel_uname_version(c.version);
	}

	memcpy(sb->version, c.version, VERSION_LEN);
	memcpy(sb->init_version, c.version, VERSION_LEN);

	sb->feature = c.feature;

	return 0;
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值