f2fs mount时,会启动garbage collect 线程,garbage collect线程通过wait_event_interruptible_timeout函数,每隔一段时间,或者等待的condition为true时,判断是否需要执行garbage collect操作。
static int gc_thread_func(void *data)
{
struct f2fs_sb_info *sbi = data;
struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
unsigned int wait_ms;
wait_ms = gc_th->min_sleep_time;
set_freezable();
do {
wait_event_interruptible_timeout(*wq,
kthread_should_stop() || freezing(current) ||
gc_th->gc_wake,
msecs_to_jiffies(wait_ms));
/* give it a try one time */
if (gc_th->gc_wake)
gc_th->gc_wake = 0;
if (try_to_freeze())
continue;
if (kthread_should_stop())
break;
if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
increase_sleep_time(gc_th, &wait_ms);
continue;
}
需要执行gc操作后,先进行一些判断,如果满足继续走gc过程,如果不满足继续等待或者退出。
1) try_to_freeze(): 判断系统是否正在进行休眠相关操作,如果是,则继续等待
2) kthread_should_stop: 判断gc线程是否需要结束
3) sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE:如果有write froze,则会增加wait time,继续等待
if (!sb_start_write_trylock(sbi->sb))
continue;
/*
* [GC triggering condition]
* 0. GC is not conducted currently.
* 1. There are enough dirty segments.
* 2. IO subsystem is idle by checking the # of writeback pages.
* 3. IO subsystem is idle by checking the # of requests in
* bdev's request list.
*
* Note) We have to avoid triggering GCs frequently.
* Because it is possible that some segments can be
* invalidated soon after by user update or deletion.
* So, I'd like to wait some time to collect dirty segments.
*/
if (!mutex_trylock(&sbi->gc_mutex))
goto next;
接下来这段注释,对需要执行gc的条件做了说明:
1) 当前没有进行GC
2) dirty segments足够多
3) 检查writeback pages, 确定IO subsystem是idle状态
4) 检查bdev's request list, 确定IO subsystem是idle状态
注意:应当尽量避免频繁的进行GC操作,因为一些segments,被更新或者删除后,可能很快的变成无效的,可以再等一些时间,来收集这些dirty segments.
if (gc_th->gc_urgent) {
wait_ms = gc_th->urgent_sleep_time;
goto do_gc;
}
if (!is_idle(sbi)) {
increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
goto next;
}
if (has_enough_invalid_blocks(sbi))
decrease_sleep_time(gc_th, &wait_ms);
else
increase_sleep_time(gc_th, &wait_ms);
接下来,判断gc_urgent是否设置,如果设置了,则将gc thread等待时间设置为urgent_sleep_time,并立即执行gc, tips:可以通过/sys下面的结点设置gc_urgent为ture。
接下来,判断IO subsystem是否idle状态,如果有读写操作正在进行,则增加gc thread sleep time,继续等待。看一下is_idle是怎么判断的:
static inline bool is_idle(struct f2fs_sb_info *sbi)
{
struct block_device *bdev = sbi->sb->s_bdev;
struct request_queue *q = bdev_get_queue(bdev);
struct request_list *rl = &q->root_rl;
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
return 0;
return f2fs_time_over(sbi, REQ_TIME);
}
其实就是判断block device request list里面是否有block rw request。
然后has_enough_invalid_blocks(sbi)判断是否有足够的dirty blocks和足够少的free blocks,如果满足这两个条件,则会减少gc thread的等待时间,否则会增加等待时间。
do_gc:
stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO))
wait_ms = gc_th->no_gc_sleep_time;
trace_f2fs_background_gc(sbi->sb, wait_ms,
prefree_segments(sbi), free_segments(sbi));
/* balancing f2fs's metadata periodically */
f2fs_balance_fs_bg(sbi);
next:
sb_end_write(sbi->sb);
} while (!kthread_should_stop());
return 0;
接下来,f2fs_gc执行真正的garbage collect操作。
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
bool background, unsigned int segno)
{
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0, seg_freed = 0, total_freed = 0;
int ret = 0;
struct cp_control cpc;
unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(GFP_NOFS),
};
trace_f2fs_gc_begin(sbi->sb, sync, background,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
free_sections(sbi),
free_segments(sbi),
reserved_segments(sbi),
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
ret = -EINVAL;
goto stop;
}
if (unlikely(f2fs_cp_error(sbi))) {
ret = -EIO;
goto stop;
}
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
/*
* For example, if there are many prefree_segments below given
* threshold, we can make them free by checkpoint. Then, we
* secure free segments which doesn't need fggc any more.
*/
if (prefree_segments(sbi)) {
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
}
if (has_not_enough_free_secs(sbi, 0, 0))
gc_type = FG_GC;
}
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
if (gc_type == BG_GC && !background) {
ret = -EINVAL;
goto stop;
}
f2fs gc有两种类型,background gc和forgeground gc, 默认执行bg gc, 紧急情况会转为fg gc执行,f2fs_gc开始会判断,如果是bg gc, 并且free sections 不足,先执行prefree_segments,来释放一些segments,如果prefree后,还是不足,则转为fg gc。
if (!__get_victim(sbi, &segno, gc_type)) {
ret = -ENODATA;
goto stop;
}
接下来调用__get_victim,来获得执行garbage collect的segment。
/*
* This function is called from two paths.
* One is garbage collection and the other is SSR segment selection.
* When it is called during GC, it just gets a victim segment
* and it does not remove it from dirty seglist.
* When it is called from SSR segment selection, it finds a segment
* which has minimum valid blocks and removes it from dirty seglist.
*/
static int get_victim_by_default(struct f2fs_sb_info *sbi,
unsigned int *result, int gc_type, int type, char alloc_mode)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
unsigned int last_segment = MAIN_SEGS(sbi);
unsigned int nsearched = 0;
mutex_lock(&dirty_i->seglist_lock);
p.alloc_mode = alloc_mode;
select_policy(sbi, gc_type, type, &p);
p.min_segno = NULL_SEGNO;
p.min_cost = get_max_cost(sbi, &p);
if (*result != NULL_SEGNO) {
if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
}
if (p.max_search == 0)
goto out;
last_victim = sm->last_victim[p.gc_mode];
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
if (p.min_segno != NULL_SEGNO)
goto got_it;
}
get_victim_by_default()函数传入5个参数,
1) struct f2fs_sb_info *sbi: f2fs super block info
2) unsigned int *result : 得到的victim segment number
3) int gc_type: background gc or fg gc
4) int type: NO_CHECK_TYPE,即包含所有的node 与 data
5) char alloc_mode: LFS
首先设置victim_sel_policy结构,初始化victim select 的一些条件,比如:
1) p.alloc_mode = LFS
2) p.min_segno = NULL_SEGNO; //从0开始找
3) p->max_search = dirty_i->nr_dirty[DIRTY]; //最多查找的个数为dirty segment
4) p->ofs_unit = sbi->segs_per_sec; //默认每个sction包含一个segment
5) 如果查找hot data或者node segment则offset设置为0,即从0开始查找,其它情况设置为last_victim[gc_mode],即从上一次找到的victim开始:
/* let's select beginning hot/small space first */
if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
p->offset = 0;
else
p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
如果fg gc, 则先从victim_secmap中选择一个segment, 这里面的segment都是之前做bg gc时选择过的segment, 这些segment中的valid block较少。如果从victim_secmap中选择了一个,则直接返回,否则继续查找。
查找主要是通过下面这个大的while循环实现:
while (1) {
unsigned long cost;
unsigned int segno;
segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);//从dirty_segmap中得到一个segment
if (segno >= last_segment) {//判断得到的segment no是否大于等于last_segment,last_segment是segment的总数,
//如果>=last_segment,说明已经找到最后了,需要从头开始继续找
if (sm->last_victim[p.gc_mode]) {
/*设置下次查找,p.offset从0开始,最终找到last_segment,即从0找到last_victim*/
last_segment =
sm->last_victim[p.gc_mode];
sm->last_victim[p.gc_mode] = 0;
p.offset = 0;
continue;
}
break;
}
p.offset = segno + p.ofs_unit;//每个section包含一个segment,即p.offset += 1
if (p.ofs_unit > 1) {
p.offset -= segno % p.ofs_unit;
nsearched += count_bits(p.dirty_segmap,
p.offset - p.ofs_unit,
p.ofs_unit);
} else {
nsearched++;//已查找的个数
}
secno = GET_SEC_FROM_SEG(sbi, segno);//得到的segment number
if (sec_usage_check(sbi, secno))
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
if (gc_type == FG_GC && p.alloc_mode == LFS &&
no_fggc_candidate(sbi, secno))
goto next;
//得到这个victim segment 的cost,即gc此segment花费的cost
cost = get_gc_cost(sbi, segno, &p);
//p.min_segno保存cost最小的segno, p.min_cost保存最小的cost
if (p.min_cost > cost) {
p.min_segno = segno;
p.min_cost = cost;
}
next:
if (nsearched >= p.max_search) {//如果已经把所有的dirty segment搜索完成
//设置last_victim值
if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
sm->last_victim[p.gc_mode] = last_victim + 1;
else
sm->last_victim[p.gc_mode] = segno + 1;
sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
break;
}
}
这段查找,主要内容就是在dirty segment中找到一个cost最小的segment,最后返回。
后面再看找到victim后怎样garbage colldect。