schedutil调频流程分析

        CPU调频策略需要满足这样的要求,在系统负载低的时候降低CPU频率以节省功耗减小发热,在系统负载高的时候提高CPU频率以保障用户交互的流畅。内核提供了多种调频策略,例如ondemand、userspace、powersave、performance、schedutil等。手机在开机完成后,用的是schedutil调频策略。理想的调频策略,希望能够做到系统需要多少性能,就给多少性能,这样即满足了性能又不至于造成功耗浪费。但是没有接口能够知道,系统接下来的一段时间需要多少性能,只能用过去的负载预测当前的需求。《WALT负载计算》中提到过,不同频率对应不同的运算能力,schedutil根据当前CPU负载,选择一个合适的频率,该频率对应的运算能力大于当前负载的1.25倍,这样即留给了性能余量又不至于浪费功耗。

1.schedutil初始化

        初始化时先调用cpufreq_governor的init接口,再调用start接口,schedutil的init接口对应的是sugov_init()。sugov_init()的主要任务是创建sugov_policy,sugov_policy是schedutil策略的抽象,成员struct sugov_tunables *tunables是向用户程序提供的调参集合,可调的参数包括:min_rate_limit_ns两次调频之间的最小时间间隔,up_rate_delay_ns连续两次升频之间的最小时间间隔,down_rate_delay_ns连续两次降频之间的时间间隔。

static struct cpufreq_governor schedutil_gov = {
	.name = "schedutil",
	.owner = THIS_MODULE,
	.init = sugov_init,
	.exit = sugov_exit,
	.start = sugov_start,
	.stop = sugov_stop,
	.limits = sugov_limits,
};
struct sugov_policy {
	struct cpufreq_policy *policy;

	struct sugov_tunables *tunables;
	struct list_head tunables_hook;

	raw_spinlock_t update_lock;  /* For shared policies */
	u64 last_freq_update_time;
	s64 min_rate_limit_ns;
	s64 up_rate_delay_ns;
	s64 down_rate_delay_ns;
	unsigned int next_freq;
	unsigned int cached_raw_freq;
    ......................................................
};
static int sugov_init(struct cpufreq_policy *policy)
{
	struct sugov_policy *sg_policy;
	struct sugov_tunables *tunables;
	int ret = 0;
    ......................................................
	cpufreq_enable_fast_switch(policy);
    //分配内存
	sg_policy = sugov_policy_alloc(policy);
	if (!sg_policy) {
		ret = -ENOMEM;
		goto disable_fast_switch;
	}

	ret = sugov_kthread_create(sg_policy);
	if (ret)
		goto free_sg_policy;
    ....................................................
	tunables = sugov_tunables_alloc(sg_policy);
	if (!tunables) {
		ret = -ENOMEM;
		goto stop_kthread;
	}

	if (policy->up_transition_delay_us && policy->down_transition_delay_us) {
		tunables->up_rate_limit_us = policy->up_transition_delay_us;
		tunables->down_rate_limit_us = policy->down_transition_delay_us;
	} else {
		unsigned int lat;

                tunables->up_rate_limit_us = LATENCY_MULTIPLIER;
                tunables->down_rate_limit_us = LATENCY_MULTIPLIER;
		lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
		if (lat) {
                        tunables->up_rate_limit_us *= lat;
                        tunables->down_rate_limit_us *= lat;
                }
	}
    //新建的sugov_policy存放在cpufreq_policy的governor_data
	policy->governor_data = sg_policy;
	sg_policy->tunables = tunables;

	ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
				   get_governor_parent_kobj(policy), "%s",
				   schedutil_gov.name);
    ........................................................
}

        sugov_start()主要任务是初始化per_cpu变量sugov_cpu,sugov_cpu的成员sg_policy指向sugov_init()中创建的sugov_policy,成员update_util对应的函数指针赋值为sugov_update_shared(),update_util被存到per_cpu变量cpufreq_update_util_data中。

static int sugov_start(struct cpufreq_policy *policy)
{
	struct sugov_policy *sg_policy = policy->governor_data;
	unsigned int cpu;
    ...........................................................
	for_each_cpu(cpu, policy->cpus) {
		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);

		memset(sg_cpu, 0, sizeof(*sg_cpu));
		sg_cpu->sg_policy = sg_policy;
		sg_cpu->flags = SCHED_CPUFREQ_DL;
		sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
		cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
					     policy_is_shared(policy) ?
							sugov_update_shared :
							sugov_update_single);
	}
	return 0;
}
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
			void (*func)(struct update_util_data *data, u64 time,
				     unsigned int flags))
{
	if (WARN_ON(!data || !func))
		return;

	if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu)))
		return;

	data->func = func;
	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
}

        schedutil初始化后的结构体关系如下,每一个CPU都对应一个结构体sugov_cpu,其成员sg_policy指向结构体sugov_policysugov_policy包含了众多限制条件,sugov_tunables为限制条件提供sys接口。

2.触发调频

        schedutil与其它策略一个不同点就是,schedutil不采用定期采样方式,而是在适当的地方埋点来触发调频。cpufreq_update_util()用以触发cpufreq更新频率,参数flag标志触发的原因,包括进程退出iowait会触发更新频率,进程迁移后CPU的负载发送变化会触发更新频率等。

static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
{
	struct update_util_data *data;

	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
	if (data)
		data->func(data, rq_clock(rq), flags);
}

3.更新当前CPU负载

        sugov_update_shared()首先调用sugov_get_util()获取当前CPU负载(见《WALT负载计算》),然后判断两次时间间隔是否大于最小时间间隔,如过大于就计算频率然后设置新频率。

static void sugov_update_shared(struct update_util_data *hook, u64 time,
				unsigned int flags)
{
	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
	unsigned long util, max;
	unsigned int next_f;
	int cid;
    //获取当前CPU负载
	sugov_get_util(&util, &max, time);

	raw_spin_lock(&sg_policy->update_lock);

	sg_cpu->util = util;
	sg_cpu->max = max;
	sg_cpu->flags = flags;

	sugov_set_iowait_boost(sg_cpu, time, flags);
	sg_cpu->last_update = time;
    //判断时间间隔
	if (sugov_should_update_freq(sg_policy, time)) {
	         //计算频率
		if (flags & SCHED_CPUFREQ_DL)
			next_f = sg_policy->policy->cpuinfo.max_freq;
		else
			next_f = sugov_next_freq_shared(sg_cpu, time);
        ...................................................
		//设置频率
		sugov_update_commit(sg_policy, time, next_f);
	}

	raw_spin_unlock(&sg_policy->update_lock);
}

4.计算目标频率

        同一个cluster中有多个CPU,这些CPU有不同的负载,以其中最高的负载计算目标频率。目标频率与当前频率的关系为next_freq = 1.25 * curr_freq * util / max,由此可见当 util / max大于0.8时将提频。

static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
{
	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
	struct cpufreq_policy *policy = sg_policy->policy;
	unsigned long util = 0, max = 1;
	unsigned int j;
    //轮询cluster中的所有CPU,找出最大负载
	for_each_cpu(j, policy->cpus) {
		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
		unsigned long j_util, j_max;
		s64 delta_ns;
		
		delta_ns = time - j_sg_cpu->last_update;
		if (delta_ns > TICK_NSEC) {
			j_sg_cpu->iowait_boost = 0;
			continue;
		}
		if (j_sg_cpu->flags & SCHED_CPUFREQ_DL)
			return policy->cpuinfo.max_freq;

		j_util = j_sg_cpu->util;
		j_max = j_sg_cpu->max;
		if (j_util * max > j_max * util) {
			util = j_util;
			max = j_max;
		}

		sugov_iowait_boost(j_sg_cpu, &util, &max);
	}
    //根据负载找频率
	return get_next_freq(sg_policy, util, max);
}
static unsigned int get_next_freq(struct sugov_policy *sg_policy,
				  unsigned long util, unsigned long max)
{
	struct cpufreq_policy *policy = sg_policy->policy;
	unsigned int freq = arch_scale_freq_invariant() ?
				policy->cpuinfo.max_freq : policy->cur;

	freq = (freq + (freq >> 2)) * util / max; //freq = 1.25freq*util/max

	if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
		return sg_policy->next_freq;
	sg_policy->cached_raw_freq = freq;
	return cpufreq_driver_resolve_freq(policy, freq);
}

参考资料

奔跑吧Linux内核

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值