本文以f2fs-tools 1.9为例,描述一下f2fs格式化过程做了那些事,都往f2fs mete data里面写了什么东西。
f2fs metedata包含sb, cp(2 segments), sit, nat, ssa,
先从cp开始,看一下格式化过程往这两个check point里面写了哪些数据。
static int f2fs_write_check_point_pack(void)
{
struct f2fs_summary_block *sum = NULL;
struct f2fs_journal *journal;
u_int32_t blk_size_bytes;
u_int32_t nat_bits_bytes, nat_bits_blocks;
unsigned char *nat_bits = NULL, *empty_nat_bits;
u_int64_t cp_seg_blk = 0;
u_int32_t crc = 0, flags;
unsigned int i;
char *cp_payload = NULL;
char *sum_compact, *sum_compact_p;
struct f2fs_summary *sum_entry;
int ret = -1;
cp = calloc(F2FS_BLKSIZE, 1);
if (cp == NULL) {
MSG(1, "\tError: Calloc Failed for f2fs_checkpoint!!!\n");
return ret;
}
sum = calloc(F2FS_BLKSIZE, 1);
if (sum == NULL) {
MSG(1, "\tError: Calloc Failed for summay_node!!!\n");
goto free_cp;
}
sum_compact = calloc(F2FS_BLKSIZE, 1);
if (sum_compact == NULL) {
MSG(1, "\tError: Calloc Failed for summay buffer!!!\n");
goto free_sum;
}
sum_compact_p = sum_compact;
//共有多少个node block, 用byte表示
//为什么<<5呢(segment_count_nat * 32)?
//segment_count_nat是两份nat, 一份就是segment_count_nat/2
//共有block数为segment_count_nat * 512 /2,所以用byte再除以8
//最终结果就是get_sb(segment_count_nat) << 5;
nat_bits_bytes = get_sb(segment_count_nat) << 5;
//用block表示
nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
F2FS_BLKSIZE - 1);
nat_bits = calloc(F2FS_BLKSIZE, nat_bits_blocks);
if (nat_bits == NULL) {
MSG(1, "\tError: Calloc Failed for nat bits buffer!!!\n");
goto free_sum_compact;
}
cp_payload = calloc(F2FS_BLKSIZE, 1);
if (cp_payload == NULL) {
MSG(1, "\tError: Calloc Failed for cp_payload!!!\n");
goto free_nat_bits;
}
/* 1. cp page 1 of checkpoint pack 1 */
//将current node segment与current data segment赋值
//c.cur_seg是在prepare_super_block赋值的
//cur_node_segno[0] = 0;cur_node_segno[1] = 1;cur_node_segno[2] = 2;
//cur_data_segno[0] = 3;
cp->checkpoint_ver = cpu_to_le64(rand() | 0x1);
set_cp(cur_node_segno[0], c.cur_seg[CURSEG_HOT_NODE]);
set_cp(cur_node_segno[1], c.cur_seg[CURSEG_WARM_NODE]);
set_cp(cur_node_segno[2], c.cur_seg[CURSEG_COLD_NODE]);
set_cp(cur_data_segno[0], c.cur_seg[CURSEG_HOT_DATA]);
set_cp(cur_data_segno[1], c.cur_seg[CURSEG_WARM_DATA]);
set_cp(cur_data_segno[2], c.cur_seg[CURSEG_COLD_DATA]);
for (i = 3; i < MAX_ACTIVE_NODE_LOGS; i++) {
set_cp(cur_node_segno[i], 0xffffffff);
set_cp(cur_data_segno[i], 0xffffffff);
}
//current node segment只写了一个root inode,所以cur_node_blkoff为1
//current data segment只写了一个root dentry,所以cur_data_blkoff为1
set_cp(cur_node_blkoff[0], 1);
set_cp(cur_data_blkoff[0], 1);
//只有两个有效的block,root inode和root dentry(data block)
set_cp(valid_block_count, 2);
//计算reserved segmetn,供gc用
set_cp(rsvd_segment_count, c.reserved_segments);
set_cp(overprov_segment_count, (get_sb(segment_count_main) -
get_cp(rsvd_segment_count)) *
c.overprovision / 100);
set_cp(overprov_segment_count, get_cp(overprov_segment_count) +
get_cp(rsvd_segment_count));
MSG(0, "Info: Overprovision ratio = %.3lf%%\n", c.overprovision);
MSG(0, "Info: Overprovision segments = %u (GC reserved = %u)\n",
get_cp(overprov_segment_count),
c.reserved_segments);
/* main segments - reserved segments - (node + data segments) */
//此处有一点疑虑,为什么-6,是减去hot/warm/cold的node与data么?
set_cp(free_segment_count, get_sb(segment_count_main) - 6);
set_cp(user_block_count, ((get_cp(free_segment_count) + 6 -
get_cp(overprov_segment_count)) * c.blks_per_seg));
/* cp page (2), data summaries (1), node summaries (3) */
//两个cp page block,一个data summary block, 3个node summary blocks
set_cp(cp_pack_total_block_count, 6 + get_sb(cp_payload));
flags = CP_UMOUNT_FLAG | CP_COMPACT_SUM_FLAG;
if (get_cp(cp_pack_total_block_count) <=
(1 << get_sb(log_blocks_per_seg)) - nat_bits_blocks)
flags |= CP_NAT_BITS_FLAG;
if (c.trimmed)
flags |= CP_TRIMMED_FLAG;
set_cp(ckpt_flags, flags);
set_cp(cp_pack_start_sum, 1 + get_sb(cp_payload));
set_cp(valid_node_count, 1);
set_cp(valid_inode_count, 1);
set_cp(next_free_nid, get_sb(root_ino) + 1);
set_cp(sit_ver_bitmap_bytesize, ((get_sb(segment_count_sit) / 2) <<
get_sb(log_blocks_per_seg)) / 8);
set_cp(nat_ver_bitmap_bytesize, ((get_sb(segment_count_nat) / 2) <<
get_sb(log_blocks_per_seg)) / 8);
set_cp(checksum_offset, CHECKSUM_OFFSET);
crc = f2fs_cal_crc32(F2FS_SUPER_MAGIC, cp, CHECKSUM_OFFSET);
*((__le32 *)((unsigned char *)cp + CHECKSUM_OFFSET)) =
cpu_to_le32(crc);
blk_size_bytes = 1 << get_sb(log_blocksize);
if (blk_size_bytes != F2FS_BLKSIZE) {
MSG(1, "\tError: Wrong block size %d / %d!!!\n",
blk_size_bytes, F2FS_BLKSIZE);
goto free_cp_payload;
}
//cp写入的地址
cp_seg_blk = get_sb(segment0_blkaddr);
DBG(1, "\tWriting main segments, cp at offset 0x%08"PRIx64"\n",
cp_seg_blk);
if (dev_write_block(cp, cp_seg_blk)) {
MSG(1, "\tError: While writing the cp to disk!!!\n");
goto free_cp_payload;
}
//一个cp page已经够了,不需要扩展cp page,所以cp_payload值为0
//如果需要扩展cp_payload,则需要把扩展的部分也写回flash
for (i = 0; i < get_sb(cp_payload); i++) {
cp_seg_blk++;
if (dev_fill_block(cp_payload, cp_seg_blk)) {
MSG(1, "\tError: While zeroing out the sit bitmap area "
"on disk!!!\n");
goto free_cp_payload;
}
}
/* Prepare and write Segment summary for HOT/WARM/COLD DATA
*
* The structure of compact summary
* +-------------------+
* | nat_journal |
* +-------------------+
* | sit_journal |
* +-------------------+
* | hot data summary |
* +-------------------+
* | warm data summary |
* +-------------------+
* | cold data summary |
* +-------------------+
*/
memset(sum, 0, sizeof(struct f2fs_summary_block));
SET_SUM_TYPE((&sum->footer), SUM_TYPE_DATA);
//初始化summary, node entry相关的信息,第一个是root inode info
journal = &sum->journal;
journal->n_nats = cpu_to_le16(1);//只有一个inode
journal->nat_j.entries[0].nid = sb->root_ino;//node id
journal->nat_j.entries[0].ne.version = 0;
journal->nat_j.entries[0].ne.ino = sb->root_ino;
//root inode block address
journal->nat_j.entries[0].ne.block_addr = cpu_to_le32(
get_sb(main_blkaddr) +
get_cp(cur_node_segno[0]) * c.blks_per_seg);
//将journal nats拷贝到sum_compact_p
memcpy(sum_compact_p, &journal->n_nats, SUM_JOURNAL_SIZE);
sum_compact_p += SUM_JOURNAL_SIZE;
memset(sum, 0, sizeof(struct f2fs_summary_block));
/* inode sit for root */
journal->n_sits = cpu_to_le16(6);//segment info,hot/warm/code segment共6个
journal->sit_j.entries[0].segno = cp->cur_node_segno[0];//hot node no
//segment 中valid block counts,后10位用来表示valid block count
//此时只有一个valid block(inode block)
journal->sit_j.entries[0].se.vblocks =
cpu_to_le16((CURSEG_HOT_NODE << 10) | 1);
//将valid_map的第一位设置为1
f2fs_set_bit(0, (char *)journal->sit_j.entries[0].se.valid_map);
//设置current warm segment info
journal->sit_j.entries[1].segno = cp->cur_node_segno[1];//segment number
journal->sit_j.entries[1].se.vblocks =
cpu_to_le16((CURSEG_WARM_NODE << 10));//valid blocks
journal->sit_j.entries[2].segno = cp->cur_node_segno[2];//cold segment number
journal->sit_j.entries[2].se.vblocks =
cpu_to_le16((CURSEG_COLD_NODE << 10));//valid blocks
/* data sit for root */
journal->sit_j.entries[3].segno = cp->cur_data_segno[0];//hot data segment
//hot data segment中的valid block,目前只有一个valid block(root dentry block)
journal->sit_j.entries[3].se.vblocks =
cpu_to_le16((CURSEG_HOT_DATA << 10) | 1);
f2fs_set_bit(0, (char *)journal->sit_j.entries[3].se.valid_map);
journal->sit_j.entries[4].segno = cp->cur_data_segno[1];
journal->sit_j.entries[4].se.vblocks =
cpu_to_le16((CURSEG_WARM_DATA << 10));
journal->sit_j.entries[5].segno = cp->cur_data_segno[2];
journal->sit_j.entries[5].se.vblocks =
cpu_to_le16((CURSEG_COLD_DATA << 10));
memcpy(sum_compact_p, &journal->n_sits, SUM_JOURNAL_SIZE);
sum_compact_p += SUM_JOURNAL_SIZE;
/* hot data summary */
sum_entry = (struct f2fs_summary *)sum_compact_p;
sum_entry->nid = sb->root_ino;
sum_entry->ofs_in_node = 0;
/* warm data summary, nothing to do */
/* cold data summary, nothing to do */
//将hot data summary 信息写入flash, warm/cold data不需要操作
cp_seg_blk++;
DBG(1, "\tWriting Segment summary for HOT/WARM/COLD_DATA, at offset 0x%08"PRIx64"\n",
cp_seg_blk);
if (dev_write_block(sum_compact, cp_seg_blk)) {
MSG(1, "\tError: While writing the sum_blk to disk!!!\n");
goto free_cp_payload;
}
/* Prepare and write Segment summary for HOT_NODE */
memset(sum, 0, sizeof(struct f2fs_summary_block));
SET_SUM_TYPE((&sum->footer), SUM_TYPE_NODE);
sum->entries[0].nid = sb->root_ino;
sum->entries[0].ofs_in_node = 0;
cp_seg_blk++;
DBG(1, "\tWriting Segment summary for HOT_NODE, at offset 0x%08"PRIx64"\n",
cp_seg_blk);
if (dev_write_block(sum, cp_seg_blk)) {
MSG(1, "\tError: While writing the sum_blk to disk!!!\n");
goto free_cp_payload;
}
/* Fill segment summary for WARM_NODE to zero. */
memset(sum, 0, sizeof(struct f2fs_summary_block));
SET_SUM_TYPE((&sum->footer), SUM_TYPE_NODE);
cp_seg_blk++;
DBG(1, "\tWriting Segment summary for WARM_NODE, at offset 0x%08"PRIx64"\n",
cp_seg_blk);
if (dev_write_block(sum, cp_seg_blk)) {
MSG(1, "\tError: While writing the sum_blk to disk!!!\n");
goto free_cp_payload;
}
/* Fill segment summary for COLD_NODE to zero. */
memset(sum, 0, sizeof(struct f2fs_summary_block));
SET_SUM_TYPE((&sum->footer), SUM_TYPE_NODE);
cp_seg_blk++;
DBG(1, "\tWriting Segment summary for COLD_NODE, at offset 0x%08"PRIx64"\n",
cp_seg_blk);
if (dev_write_block(sum, cp_seg_blk)) {
MSG(1, "\tError: While writing the sum_blk to disk!!!\n");
goto free_cp_payload;
}
//以上将hot/cold/warm node summary写入
/* cp page2 *///cp checkpoint 1, cp page2,两个cp checkpoint,每个checkpoint包含两个cp page
cp_seg_blk++;
DBG(1, "\tWriting cp page2, at offset 0x%08"PRIx64"\n", cp_seg_blk);
if (dev_write_block(cp, cp_seg_blk)) {
MSG(1, "\tError: While writing the cp to disk!!!\n");
goto free_cp_payload;
}
/* write NAT bits, if possible */
if (flags & CP_NAT_BITS_FLAG) {
uint32_t i;
*(__le64 *)nat_bits = get_cp_crc(cp);
empty_nat_bits = nat_bits + 8 + nat_bits_bytes;
memset(empty_nat_bits, 0xff, nat_bits_bytes);
test_and_clear_bit_le(0, empty_nat_bits);
/* write the last blocks in cp pack */
cp_seg_blk = get_sb(segment0_blkaddr) + (1 <<
get_sb(log_blocks_per_seg)) - nat_bits_blocks;
DBG(1, "\tWriting NAT bits pages, at offset 0x%08"PRIx64"\n",
cp_seg_blk);
for (i = 0; i < nat_bits_blocks; i++) {
if (dev_write_block(nat_bits + i *
F2FS_BLKSIZE, cp_seg_blk + i)) {
MSG(1, "\tError: write NAT bits to disk!!!\n");
goto free_cp_payload;
}
}
}
/* cp page 1 of check point pack 2
* Initiatialize other checkpoint pack with version zero
*/
cp->checkpoint_ver = 0;
crc = f2fs_cal_crc32(F2FS_SUPER_MAGIC, cp, CHECKSUM_OFFSET);
*((__le32 *)((unsigned char *)cp + CHECKSUM_OFFSET)) =
cpu_to_le32(crc);
cp_seg_blk = get_sb(segment0_blkaddr) + c.blks_per_seg;
DBG(1, "\tWriting cp page 1 of checkpoint pack 2, at offset 0x%08"PRIx64"\n",
cp_seg_blk);
if (dev_write_block(cp, cp_seg_blk)) {
MSG(1, "\tError: While writing the cp to disk!!!\n");
goto free_cp_payload;
}
for (i = 0; i < get_sb(cp_payload); i++) {
cp_seg_blk++;
if (dev_fill_block(cp_payload, cp_seg_blk)) {
MSG(1, "\tError: While zeroing out the sit bitmap area "
"on disk!!!\n");
goto free_cp_payload;
}
}
/* cp page 2 of check point pack 2 */
cp_seg_blk += (le32_to_cpu(cp->cp_pack_total_block_count) -
get_sb(cp_payload) - 1);
DBG(1, "\tWriting cp page 2 of checkpoint pack 2, at offset 0x%08"PRIx64"\n",
cp_seg_blk);
if (dev_write_block(cp, cp_seg_blk)) {
MSG(1, "\tError: While writing the cp to disk!!!\n");
goto free_cp_payload;
}
ret = 0;
free_cp_payload:
free(cp_payload);
free_nat_bits:
free(nat_bits);
free_sum_compact:
free(sum_compact);
free_sum:
free(sum);
free_cp:
free(cp);
return ret;
}