f2fs gc garbage collect 过程分析

最新推荐文章于 2024-04-23 20:56:03 发布

东大坡居士

最新推荐文章于 2024-04-23 20:56:03 发布

阅读量1.3k

点赞数 1

分类专栏： linux 文件系统 f2fs 文章标签： linux filesystem f2fs gc

本文链接：https://blog.csdn.net/tianweishuiguo/article/details/102512212

版权

linux 文件系统 f2fs 专栏收录该内容

9 篇文章 4 订阅

订阅专栏

f2fs mount时，会启动garbage collect 线程，garbage collect线程通过wait_event_interruptible_timeout函数，每隔一段时间，或者等待的condition为true时，判断是否需要执行garbage collect操作。

static int gc_thread_func(void *data)
{
	struct f2fs_sb_info *sbi = data;
	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
	unsigned int wait_ms;

	wait_ms = gc_th->min_sleep_time;

	set_freezable();
	do {
		wait_event_interruptible_timeout(*wq,
				kthread_should_stop() || freezing(current) ||
				gc_th->gc_wake,
				msecs_to_jiffies(wait_ms));

		/* give it a try one time */
		if (gc_th->gc_wake)
			gc_th->gc_wake = 0;

		if (try_to_freeze())
			continue;
		if (kthread_should_stop())
			break;

		if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
			increase_sleep_time(gc_th, &wait_ms);
			continue;
		}

需要执行gc操作后，先进行一些判断，如果满足继续走gc过程，如果不满足继续等待或者退出。

1) try_to_freeze(): 判断系统是否正在进行休眠相关操作，如果是，则继续等待

2) kthread_should_stop: 判断gc线程是否需要结束

3) sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE：如果有write froze，则会增加wait time,继续等待

		if (!sb_start_write_trylock(sbi->sb))
			continue;

		/*
		 * [GC triggering condition]
		 * 0. GC is not conducted currently.
		 * 1. There are enough dirty segments.
		 * 2. IO subsystem is idle by checking the # of writeback pages.
		 * 3. IO subsystem is idle by checking the # of requests in
		 *    bdev's request list.
		 *
		 * Note) We have to avoid triggering GCs frequently.
		 * Because it is possible that some segments can be
		 * invalidated soon after by user update or deletion.
		 * So, I'd like to wait some time to collect dirty segments.
		 */
		if (!mutex_trylock(&sbi->gc_mutex))
			goto next;

接下来这段注释，对需要执行gc的条件做了说明：

1) 当前没有进行GC

2) dirty segments足够多

3) 检查writeback pages, 确定IO subsystem是idle状态

4) 检查bdev's request list, 确定IO subsystem是idle状态

注意：应当尽量避免频繁的进行GC操作，因为一些segments，被更新或者删除后，可能很快的变成无效的，可以再等一些时间，来收集这些dirty segments.

		if (gc_th->gc_urgent) {
			wait_ms = gc_th->urgent_sleep_time;
			goto do_gc;
		}

		if (!is_idle(sbi)) {
			increase_sleep_time(gc_th, &wait_ms);
			mutex_unlock(&sbi->gc_mutex);
			goto next;
		}

		if (has_enough_invalid_blocks(sbi))
			decrease_sleep_time(gc_th, &wait_ms);
		else
			increase_sleep_time(gc_th, &wait_ms);

接下来，判断gc_urgent是否设置，如果设置了，则将gc thread等待时间设置为urgent_sleep_time，并立即执行gc, tips:可以通过/sys下面的结点设置gc_urgent为ture。

接下来，判断IO subsystem是否idle状态，如果有读写操作正在进行，则增加gc thread sleep time，继续等待。看一下is_idle是怎么判断的：

static inline bool is_idle(struct f2fs_sb_info *sbi)
{
	struct block_device *bdev = sbi->sb->s_bdev;
	struct request_queue *q = bdev_get_queue(bdev);
	struct request_list *rl = &q->root_rl;

	if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
		return 0;

	return f2fs_time_over(sbi, REQ_TIME);
}

其实就是判断block device request list里面是否有block rw request。

然后has_enough_invalid_blocks(sbi)判断是否有足够的dirty blocks和足够少的free blocks，如果满足这两个条件，则会减少gc thread的等待时间，否则会增加等待时间。

do_gc:
		stat_inc_bggc_count(sbi);

		/* if return value is not zero, no victim was selected */
		if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO))
			wait_ms = gc_th->no_gc_sleep_time;

		trace_f2fs_background_gc(sbi->sb, wait_ms,
				prefree_segments(sbi), free_segments(sbi));

		/* balancing f2fs's metadata periodically */
		f2fs_balance_fs_bg(sbi);
next:
		sb_end_write(sbi->sb);

	} while (!kthread_should_stop());
	return 0;

接下来，f2fs_gc执行真正的garbage collect操作。

int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
			bool background, unsigned int segno)
{
	int gc_type = sync ? FG_GC : BG_GC;
	int sec_freed = 0, seg_freed = 0, total_freed = 0;
	int ret = 0;
	struct cp_control cpc;
	unsigned int init_segno = segno;
	struct gc_inode_list gc_list = {
		.ilist = LIST_HEAD_INIT(gc_list.ilist),
		.iroot = RADIX_TREE_INIT(GFP_NOFS),
	};

	trace_f2fs_gc_begin(sbi->sb, sync, background,
				get_pages(sbi, F2FS_DIRTY_NODES),
				get_pages(sbi, F2FS_DIRTY_DENTS),
				get_pages(sbi, F2FS_DIRTY_IMETA),
				free_sections(sbi),
				free_segments(sbi),
				reserved_segments(sbi),
				prefree_segments(sbi));

	cpc.reason = __get_cp_reason(sbi);
gc_more:
	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
		ret = -EINVAL;
		goto stop;
	}
	if (unlikely(f2fs_cp_error(sbi))) {
		ret = -EIO;
		goto stop;
	}

	if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
		/*
		 * For example, if there are many prefree_segments below given
		 * threshold, we can make them free by checkpoint. Then, we
		 * secure free segments which doesn't need fggc any more.
		 */
		if (prefree_segments(sbi)) {
			ret = write_checkpoint(sbi, &cpc);
			if (ret)
				goto stop;
		}
		if (has_not_enough_free_secs(sbi, 0, 0))
			gc_type = FG_GC;
	}

	/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
	if (gc_type == BG_GC && !background) {
		ret = -EINVAL;
		goto stop;
	}

f2fs gc有两种类型，background gc和forgeground gc, 默认执行bg gc, 紧急情况会转为fg gc执行，f2fs_gc开始会判断，如果是bg gc，并且free sections 不足，先执行prefree_segments，来释放一些segments，如果prefree后，还是不足，则转为fg gc。

	if (!__get_victim(sbi, &segno, gc_type)) {
		ret = -ENODATA;
		goto stop;
	}

接下来调用__get_victim，来获得执行garbage collect的segment。


/*
 * This function is called from two paths.
 * One is garbage collection and the other is SSR segment selection.
 * When it is called during GC, it just gets a victim segment
 * and it does not remove it from dirty seglist.
 * When it is called from SSR segment selection, it finds a segment
 * which has minimum valid blocks and removes it from dirty seglist.
 */
static int get_victim_by_default(struct f2fs_sb_info *sbi,
		unsigned int *result, int gc_type, int type, char alloc_mode)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	struct sit_info *sm = SIT_I(sbi);
	struct victim_sel_policy p;
	unsigned int secno, last_victim;
	unsigned int last_segment = MAIN_SEGS(sbi);
	unsigned int nsearched = 0;

	mutex_lock(&dirty_i->seglist_lock);

	p.alloc_mode = alloc_mode;
	select_policy(sbi, gc_type, type, &p);

	p.min_segno = NULL_SEGNO;
	p.min_cost = get_max_cost(sbi, &p);

	if (*result != NULL_SEGNO) {
		if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
			get_valid_blocks(sbi, *result, false) &&
			!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
			p.min_segno = *result;
		goto out;
	}

	if (p.max_search == 0)
		goto out;

	last_victim = sm->last_victim[p.gc_mode];
	if (p.alloc_mode == LFS && gc_type == FG_GC) {
		p.min_segno = check_bg_victims(sbi);
		if (p.min_segno != NULL_SEGNO)
			goto got_it;
	}

get_victim_by_default()函数传入5个参数，

1) struct f2fs_sb_info *sbi: f2fs super block info

2) unsigned int *result : 得到的victim segment number

3) int gc_type: background gc or fg gc

4) int type: NO_CHECK_TYPE，即包含所有的node 与 data

5) char alloc_mode: LFS

首先设置victim_sel_policy结构，初始化victim select 的一些条件，比如：

1) p.alloc_mode = LFS

2) p.min_segno = NULL_SEGNO; //从0开始找

3) p->max_search = dirty_i->nr_dirty[DIRTY]; //最多查找的个数为dirty segment

4) p->ofs_unit = sbi->segs_per_sec; //默认每个sction包含一个segment

5) 如果查找hot data或者node segment则offset设置为0，即从0开始查找，其它情况设置为last_victim[gc_mode]，即从上一次找到的victim开始:

/* let's select beginning hot/small space first */
   if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
       p->offset = 0;
   else
       p->offset = SIT_I(sbi)->last_victim[p->gc_mode];

如果fg gc, 则先从victim_secmap中选择一个segment, 这里面的segment都是之前做bg gc时选择过的segment, 这些segment中的valid block较少。如果从victim_secmap中选择了一个，则直接返回，否则继续查找。

查找主要是通过下面这个大的while循环实现：

	while (1) {
		unsigned long cost;
		unsigned int segno;

		segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);//从dirty_segmap中得到一个segment
		if (segno >= last_segment) {//判断得到的segment no是否大于等于last_segment，last_segment是segment的总数，
		                            //如果>=last_segment，说明已经找到最后了，需要从头开始继续找
			if (sm->last_victim[p.gc_mode]) {
				/*设置下次查找，p.offset从0开始，最终找到last_segment,即从0找到last_victim*/
				last_segment =
					sm->last_victim[p.gc_mode];
				sm->last_victim[p.gc_mode] = 0;
				p.offset = 0;
				continue;
			}
			break;
		}

		p.offset = segno + p.ofs_unit;//每个section包含一个segment，即p.offset += 1
		if (p.ofs_unit > 1) {
			p.offset -= segno % p.ofs_unit;
			nsearched += count_bits(p.dirty_segmap,
						p.offset - p.ofs_unit,
						p.ofs_unit);
		} else {
			nsearched++;//已查找的个数
		}

		secno = GET_SEC_FROM_SEG(sbi, segno);//得到的segment number

		if (sec_usage_check(sbi, secno))
			goto next;
		if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
			goto next;
		if (gc_type == FG_GC && p.alloc_mode == LFS &&
					no_fggc_candidate(sbi, secno))
			goto next;

        //得到这个victim segment 的cost，即gc此segment花费的cost
		cost = get_gc_cost(sbi, segno, &p);

        //p.min_segno保存cost最小的segno, p.min_cost保存最小的cost
		if (p.min_cost > cost) {
			p.min_segno = segno;
			p.min_cost = cost;
		}
next:
		if (nsearched >= p.max_search) {//如果已经把所有的dirty segment搜索完成
		    //设置last_victim值
			if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
				sm->last_victim[p.gc_mode] = last_victim + 1;
			else
				sm->last_victim[p.gc_mode] = segno + 1;
			sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
			break;
		}
	}

这段查找，主要内容就是在dirty segment中找到一个cost最小的segment，最后返回。

后面再看找到victim后怎样garbage colldect。