CPU调频策略需要满足这样的要求,在系统负载低的时候降低CPU频率以节省功耗减小发热,在系统负载高的时候提高CPU频率以保障用户交互的流畅。内核提供了多种调频策略,例如ondemand、userspace、powersave、performance、schedutil等。手机在开机完成后,用的是schedutil调频策略。理想的调频策略,希望能够做到系统需要多少性能,就给多少性能,这样即满足了性能又不至于造成功耗浪费。但是没有接口能够知道,系统接下来的一段时间需要多少性能,只能用过去的负载预测当前的需求。《WALT负载计算》中提到过,不同频率对应不同的运算能力,schedutil根据当前CPU负载,选择一个合适的频率,该频率对应的运算能力大于当前负载的1.25倍,这样即留给了性能余量又不至于浪费功耗。
1.schedutil初始化
初始化时先调用cpufreq_governor的init接口,再调用start接口,schedutil的init接口对应的是sugov_init()。sugov_init()的主要任务是创建sugov_policy,sugov_policy是schedutil策略的抽象,成员struct sugov_tunables *tunables是向用户程序提供的调参集合,可调的参数包括:min_rate_limit_ns两次调频之间的最小时间间隔,up_rate_delay_ns连续两次升频之间的最小时间间隔,down_rate_delay_ns连续两次降频之间的时间间隔。
static struct cpufreq_governor schedutil_gov = {
.name = "schedutil",
.owner = THIS_MODULE,
.init = sugov_init,
.exit = sugov_exit,
.start = sugov_start,
.stop = sugov_stop,
.limits = sugov_limits,
};
struct sugov_policy {
struct cpufreq_policy *policy;
struct sugov_tunables *tunables;
struct list_head tunables_hook;
raw_spinlock_t update_lock; /* For shared policies */
u64 last_freq_update_time;
s64 min_rate_limit_ns;
s64 up_rate_delay_ns;
s64 down_rate_delay_ns;
unsigned int next_freq;
unsigned int cached_raw_freq;
......................................................
};
static int sugov_init(struct cpufreq_policy *policy)
{
struct sugov_policy *sg_policy;
struct sugov_tunables *tunables;
int ret = 0;
......................................................
cpufreq_enable_fast_switch(policy);
//分配内存
sg_policy = sugov_policy_alloc(policy);
if (!sg_policy) {
ret = -ENOMEM;
goto disable_fast_switch;
}
ret = sugov_kthread_create(sg_policy);
if (ret)
goto free_sg_policy;
....................................................
tunables = sugov_tunables_alloc(sg_policy);
if (!tunables) {
ret = -ENOMEM;
goto stop_kthread;
}
if (policy->up_transition_delay_us && policy->down_transition_delay_us) {
tunables->up_rate_limit_us = policy->up_transition_delay_us;
tunables->down_rate_limit_us = policy->down_transition_delay_us;
} else {
unsigned int lat;
tunables->up_rate_limit_us = LATENCY_MULTIPLIER;
tunables->down_rate_limit_us = LATENCY_MULTIPLIER;
lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
if (lat) {
tunables->up_rate_limit_us *= lat;
tunables->down_rate_limit_us *= lat;
}
}
//新建的sugov_policy存放在cpufreq_policy的governor_data
policy->governor_data = sg_policy;
sg_policy->tunables = tunables;
ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
get_governor_parent_kobj(policy), "%s",
schedutil_gov.name);
........................................................
}
sugov_start()主要任务是初始化per_cpu变量sugov_cpu,sugov_cpu的成员sg_policy指向sugov_init()中创建的sugov_policy,成员update_util对应的函数指针赋值为sugov_update_shared(),update_util被存到per_cpu变量cpufreq_update_util_data中。
static int sugov_start(struct cpufreq_policy *policy)
{
struct sugov_policy *sg_policy = policy->governor_data;
unsigned int cpu;
...........................................................
for_each_cpu(cpu, policy->cpus) {
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->sg_policy = sg_policy;
sg_cpu->flags = SCHED_CPUFREQ_DL;
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
policy_is_shared(policy) ?
sugov_update_shared :
sugov_update_single);
}
return 0;
}
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
void (*func)(struct update_util_data *data, u64 time,
unsigned int flags))
{
if (WARN_ON(!data || !func))
return;
if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu)))
return;
data->func = func;
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
}
schedutil初始化后的结构体关系如下,每一个CPU都对应一个结构体sugov_cpu,其成员sg_policy指向结构体sugov_policy。sugov_policy包含了众多限制条件,sugov_tunables为限制条件提供sys接口。
2.触发调频
schedutil与其它策略一个不同点就是,schedutil不采用定期采样方式,而是在适当的地方埋点来触发调频。cpufreq_update_util()用以触发cpufreq更新频率,参数flag标志触发的原因,包括进程退出iowait会触发更新频率,进程迁移后CPU的负载发送变化会触发更新频率等。
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
{
struct update_util_data *data;
data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
if (data)
data->func(data, rq_clock(rq), flags);
}
3.更新当前CPU负载
sugov_update_shared()首先调用sugov_get_util()获取当前CPU负载(见《WALT负载计算》),然后判断两次时间间隔是否大于最小时间间隔,如过大于就计算频率然后设置新频率。
static void sugov_update_shared(struct update_util_data *hook, u64 time,
unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long util, max;
unsigned int next_f;
int cid;
//获取当前CPU负载
sugov_get_util(&util, &max, time);
raw_spin_lock(&sg_policy->update_lock);
sg_cpu->util = util;
sg_cpu->max = max;
sg_cpu->flags = flags;
sugov_set_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
//判断时间间隔
if (sugov_should_update_freq(sg_policy, time)) {
//计算频率
if (flags & SCHED_CPUFREQ_DL)
next_f = sg_policy->policy->cpuinfo.max_freq;
else
next_f = sugov_next_freq_shared(sg_cpu, time);
...................................................
//设置频率
sugov_update_commit(sg_policy, time, next_f);
}
raw_spin_unlock(&sg_policy->update_lock);
}
4.计算目标频率
同一个cluster中有多个CPU,这些CPU有不同的负载,以其中最高的负载计算目标频率。目标频率与当前频率的关系为next_freq = 1.25 * curr_freq * util / max,由此可见当 util / max大于0.8时将提频。
static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
{
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
unsigned long util = 0, max = 1;
unsigned int j;
//轮询cluster中的所有CPU,找出最大负载
for_each_cpu(j, policy->cpus) {
struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
unsigned long j_util, j_max;
s64 delta_ns;
delta_ns = time - j_sg_cpu->last_update;
if (delta_ns > TICK_NSEC) {
j_sg_cpu->iowait_boost = 0;
continue;
}
if (j_sg_cpu->flags & SCHED_CPUFREQ_DL)
return policy->cpuinfo.max_freq;
j_util = j_sg_cpu->util;
j_max = j_sg_cpu->max;
if (j_util * max > j_max * util) {
util = j_util;
max = j_max;
}
sugov_iowait_boost(j_sg_cpu, &util, &max);
}
//根据负载找频率
return get_next_freq(sg_policy, util, max);
}
static unsigned int get_next_freq(struct sugov_policy *sg_policy,
unsigned long util, unsigned long max)
{
struct cpufreq_policy *policy = sg_policy->policy;
unsigned int freq = arch_scale_freq_invariant() ?
policy->cpuinfo.max_freq : policy->cur;
freq = (freq + (freq >> 2)) * util / max; //freq = 1.25freq*util/max
if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
return sg_policy->next_freq;
sg_policy->cached_raw_freq = freq;
return cpufreq_driver_resolve_freq(policy, freq);
}
参考资料
《奔跑吧Linux内核》