Scheduler 学习之三：stop_sched_class

最新推荐文章于 2024-06-14 22:33:31 发布

sucjhwaxp

最新推荐文章于 2024-06-14 22:33:31 发布

阅读量1.3k

点赞数

本文链接：https://blog.csdn.net/sucjhwaxp/article/details/106182027

版权

Context
在Scheduler 学习之二：主调度器_schedule函数文章中，提到目前kernel主要有5个调度器，class的优先顺序为:stop_sched_class > dl_sched_class > rt_sched_class > fair_sched_class > idle_sched_class。接下来按照优先顺序学习。今天主要学习优先级最高的stop_sched_class。
Summary
1. stop class不能被抢占,不能被切换,不会主动yield,一旦设定stop,就会一直run到task执行完或者自己让渡出cpu
2.某一个task,同一时间只会有一个stop task,旧的task会被放置到rt sched class当中
3.目前的kernel中只有migration task来使用,且不会给Userspace使用.

回调函数设定

/*
 * Simple, special scheduling class for the per-CPU stop tasks:
 */
const struct sched_class stop_sched_class = {
	.next			= &dl_sched_class,

	.enqueue_task		= enqueue_task_stop,
	.dequeue_task		= dequeue_task_stop,
	.yield_task		= yield_task_stop,

	.check_preempt_curr	= check_preempt_curr_stop,

	.pick_next_task		= pick_next_task_stop,
	.put_prev_task		= put_prev_task_stop,
	.set_next_task          = set_next_task_stop,

#ifdef CONFIG_SMP
	.balance		= balance_stop,
	.select_task_rq		= select_task_rq_stop,
	.set_cpus_allowed	= set_cpus_allowed_common,
#endif

	.task_tick		= task_tick_stop,

	.get_rr_interval	= get_rr_interval_stop,

	.prio_changed		= prio_changed_stop,
	.switched_to		= switched_to_stop,
	.update_curr		= update_curr_stop,
};

3.1 enqueue_task_stop

static void
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
	add_nr_running(rq, 1);//将nr_running计数器加1
}
static inline void add_nr_running(struct rq *rq, unsigned count)
{
	unsigned prev_nr = rq->nr_running;

	rq->nr_running = prev_nr + count; //增加计数

#ifdef CONFIG_SMP
	if (prev_nr < 2 && rq->nr_running >= 2) {
		if (!READ_ONCE(rq->rd->overload))
			WRITE_ONCE(rq->rd->overload, 1);
	} //这一段代码表明如果rq中的task的数量超过2之后，就会认为root domain overload。如后面这个变量原生code注释所示。
#endif

	sched_update_tick_dependency(rq);//后面详细研究关于task running时候tick的问题
}

struct root_domain {
。。。。
/*
	 * Indicate pullable load on at least one CPU, e.g:
	 * - More than one runnable task
	 * - Running task is misfit
	 */
	int			overload;
//如上面的注释，看上去有两种可能性：
1.2个有以上的task
2.running的task 发生了不适合当前cpu的现象。这种看后面是否有机会看到源代码 。初步看代码是在：update_sd_lb_stats函数中/* update overload indicator if we are at root domain */
		WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD);设定的

}

如上面的code所示，这时候并没有将task做入queue的操作。奇怪，那task保存在哪里呢?
如后面要提到的pick_next_task，其实同一时刻只允许有一个stop task，因此并不需要用queue或者list进行保存。
什么时候给stop赋值呢？
如下的funtion所示:会为rq的stop赋值:

struct rq {
    struct task_struct	*curr;
	struct task_struct	*idle;
	struct task_struct	*stop;
}

void sched_set_stop_task(int cpu, struct task_struct *stop)
{
	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
	struct task_struct *old_stop = cpu_rq(cpu)->stop;

	if (stop) {
		/*
		 * Make it appear like a SCHED_FIFO task, its something
		 * userspace knows about and won't get confused about.
		 *
		 * Also, it will make PI more or less work without too
		 * much confusion -- but then, stop work should not
		 * rely on PI working anyway.
		 */
		sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);

		stop->sched_class = &stop_sched_class;
	}

	cpu_rq(cpu)->stop = stop;

	if (old_stop) {
		/*
		 * Reset it back to a normal scheduling class so that
		 * it can die in pieces.
		 */
		old_stop->sched_class = &rt_sched_class;
	}
}

如上面的code所示,如果来设定新的stop task时,会将旧的stop task设定rt class.
这里有一个问题,并没有看将这个旧的task 对rt class做enqeueu的动作.
看上去应该是说,这个地方主要目的只设定新的stop,而某一个task在从sleep到running的时候,会走enqueue的流程,那时就会走到rt 的runqueue当中.

3.2 dequeue_task_stop

static void
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
	sub_nr_running(rq, 1); //将计数器减1操作。
}
static inline void sub_nr_running(struct rq *rq, unsigned count)
{
	rq->nr_running -= count; //只做减1的操作，奇怪但是产并没有做overload的反操作。
	/* Check if we still need preemption */
	sched_update_tick_dependency(rq);
}

如上面的code所示，dequeue的时候，只将计数器减1操作。并没有将overload标志为0.应该是在后面Loadbalance的时候，再重新判断。

3.3 yield_task_stop

static void yield_task_stop(struct rq *rq)
{
	BUG(); /* the stop task should never yield, its pointless. */
}

3.4 check_preempt_curr_stop

static void
check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
{
	/* we're never preempted */
}

如注释所示，stop class从来不会被抢占。如何做到这一点呢？
3.5 .pick_next_task = pick_next_task_stop

static struct task_struct *
pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
	WARN_ON_ONCE(prev || rf);//这个地方表明，同一时刻只能有一个stop task在running,要等一个task执行完，才能有下一个stop task

	if (!sched_stop_runnable(rq))//如果没有task,或者还没有queued,则直接返回。
		return NULL;

	set_next_task_stop(rq, rq->stop, true);//设定task开始执行的时间
	return rq->stop;//返回stop 
}

static inline bool sched_stop_runnable(struct rq *rq)
{
	return rq->stop && task_on_rq_queued(rq->stop);
}

static void set_next_task_stop(struct rq *rq, struct task_struct *stop, bool first)
{
	stop->se.exec_start = rq_clock_task(rq);
}

3.6 put_prev_task_stop

static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
{
	struct task_struct *curr = rq->curr;
	u64 delta_exec;

	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
	if (unlikely((s64)delta_exec < 0))
		delta_exec = 0;

	schedstat_set(curr->se.statistics.exec_max,
			max(curr->se.statistics.exec_max, delta_exec));

	curr->se.sum_exec_runtime += delta_exec;//统计当前task的执行时间。
	account_group_exec_runtime(curr, delta_exec);// 更新group 的exec time

	curr->se.exec_start = rq_clock_task(rq);//重新开始计时
	cgroup_account_cputime(curr, delta_exec);//待研究到cgroup部分时候，再研究。
}

如上代码所述，这个function主要实现更新task及其group的执行时间并重新开始计时.

3.7 set_next_task_stop

static void set_next_task_stop(struct rq *rq, struct task_struct *stop, bool first)
{
	stop->se.exec_start = rq_clock_task(rq);//设置task开始执行的时间
}

只是设置了task开始执行的时间。

3.8 balance = balance_stop

static int
balance_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
	return sched_stop_runnable(rq);
}

static inline bool sched_stop_runnable(struct rq *rq)
{
	return rq->stop && task_on_rq_queued(rq->stop);
}

static inline int task_on_rq_queued(struct task_struct *p)
{
	return p->on_rq == TASK_ON_RQ_QUEUED;
}

如果stop不为空，并且stop处于queue状态，则返回true.这个地方返回true有什么用呢？
如在pick_next_task函数中：

	/*
	 * We must do the balancing pass before put_next_task(), such
	 * that when we release the rq->lock the task is in the same
	 * state as before we took rq->lock.
	 *
	 * We can terminate the balance pass as soon as we know there is
	 * a runnable task of @class priority or higher.
	 */
	for_class_range(class, prev->sched_class, &idle_sched_class) {
		if (class->balance(rq, prev, rf))
			break;
	}

所以balance回调函数的目的在于判断当前sched class中是否有runnable的task，如果有的话，可以终止后面优先级较低的sched_class 做balance了。因为，已经有可以pick的task了。

3.9 select_task_rq = select_task_rq_stop

static int
select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
{
	return task_cpu(p); /* stop tasks as never migrate */
}

如上面的注释所述，stop tasks 从来不做migrate，所以这里不需要选核操作，task原来在哪个核上面run,就还在哪个核上面run.
3.10 set_cpus_allowed = set_cpus_allowed_common

/*
 * sched_class::set_cpus_allowed must do the below, but is not required to
 * actually call this function.
 */
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
{
	cpumask_copy(&p->cpus_mask, new_mask);//设定了允许run的cpu mask
	p->nr_cpus_allowed = cpumask_weight(new_mask); //记录了，可以run的cpu数量
}

struct task_struct {
。。。。
cpumask_t			cpus_mask;
int				nr_cpus_allowed;
。。。。
}

采用common的方法。

3.11 .task_tick = task_tick_stop,

/*
 * scheduler tick hitting a task of our scheduling class.
 *
 * NOTE: This function can be called remotely by the tick offload that
 * goes along full dynticks. Therefore no local assumption can be made
 * and everything must be accessed through the @rq and @curr passed in
 * parameters.
 */
static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
{
}

3.12 .get_rr_interval = get_rr_interval_stop

static unsigned int
get_rr_interval_stop(struct rq *rq, struct task_struct *task)
{
	return 0;
}

3.13 .prio_changed = prio_changed_stop

static void
prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio)
{
	BUG(); /* how!?, what priority? */
}

3.14 .switched_to = switched_to_stop

static void switched_to_stop(struct rq *rq, struct task_struct *p)
{
	BUG(); /* its impossible to change to this class */
}

3.15 .update_curr = update_curr_stop

static void update_curr_stop(struct rq *rq)
{
}

使用stop class的地方
如下图所示,目前主要是通过cpu_stop_create函数中去设定了一个stop task

static void cpu_stop_create(unsigned int cpu)
{
	sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
}

/* the actual stopper, one per every possible cpu, enabled on online cpus */
struct cpu_stopper {
	struct task_struct	*thread;

	raw_spinlock_t		lock;
	bool			enabled;	/* is this stopper enabled? */
	struct list_head	works;		/* list of pending works */

	struct cpu_stop_work	stop_work;	/* for stop_cpus */
};

static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);

static struct smp_hotplug_thread cpu_stop_threads = {
	.store			= &cpu_stopper.thread,
	.thread_should_run	= cpu_stop_should_run,
	.thread_fn		= cpu_stopper_thread,
	.thread_comm		= "migration/%u",
	.create			= cpu_stop_create,
	.park			= cpu_stop_park,
	.selfparking		= true,
};

static int __init cpu_stop_init(void)
{
	unsigned int cpu;

	for_each_possible_cpu(cpu) {
		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);

		raw_spin_lock_init(&stopper->lock);
		INIT_LIST_HEAD(&stopper->works);
	}

	BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
	stop_machine_unpark(raw_smp_processor_id());
	stop_machine_initialized = true;
	return 0;
}

如上面的代码所示,migration task属于stop类型class的task.

sucjhwaxp

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Scheduler 学习之三：stop_sched_class

Context 在Scheduler 学习之二：主调度器_schedule函数文章中，提到目前kernel主要有5个调度器，class的优先顺序为:stop_sched_class >dl_sched_class >rt_sched_class >fair_sched_class >idle_sched_class。接下来按照优先顺序学习。今天主要学习优先级最高的stop_sched_class。 Summary 1. stop class不能被抢占,不能被切换,不会主...
复制链接

扫一扫