cpufreq 代码分析
基础知识:
cpufreq 代码分析
从 drivers\cpufreq\Makefile 开始,(注:我的是linux-4.14.63)
# SPDX-License-Identifier: GPL-2.0
# CPUfreq core
obj-$(CONFIG_CPU_FREQ) += cpufreq.o freq_table.o # cpufreq core 代码
# CPUfreq stats
obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o # cpufreq stats
# CPUfreq governors
obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o # 对应于cpufreq 五种模式
obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o
obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o
obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o
obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o
obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o
obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o
obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o # cpufreq_driver dt 表示设备树版
obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o
//....
驱动入口:
cpufreq-dt.c:
static struct platform_driver dt_cpufreq_platdrv = {
.driver = {
.name = "cpufreq-dt",
},
.probe = dt_cpufreq_probe,
.remove = dt_cpufreq_remove,
};
module_platform_driver(dt_cpufreq_platdrv);
可以看出这是个平台驱动,与之对应的平台设备的添加在cpufreq-dt-platdev.c:
static int __init cpufreq_dt_platdev_init(void)
{
create_pdev:
of_node_put(np);
return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt",
-1, data,
sizeof(struct cpufreq_dt_platform_data)));
}
device_initcall(cpufreq_dt_platdev_init);
device_initcall(cpufreq_dt_platdev_init); 表示该函数在内核启动过程中会被调用。
继续看 probe 函数,dt_cpufreq_probe:
ret = cpufreq_register_driver(&dt_cpufreq_driver);
dt_cpufreq_driver 的定义
static struct cpufreq_driver dt_cpufreq_driver = {
.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = set_target,
.get = cpufreq_generic_get,
.init = cpufreq_init,
.exit = cpufreq_exit,
.ready = cpufreq_ready,
.name = "cpufreq-dt",
.attr = cpufreq_dt_attr,
.suspend = cpufreq_generic_suspend,c
};
继续跟踪 cpufreq_register_driver
int cpufreq_register_driver(struct cpufreq_driver *driver_data)
{
cpufreq_driver = driver_data; // 1.给 cpufreq_driver 赋值
ret = subsys_interface_register(&cpufreq_interface); // 2.注册 cpufreq_interface
// ...
}
cpufreq_interface 的定义
static struct subsys_interface cpufreq_interface = {
.name = "cpufreq",
.subsys = &cpu_subsys,
.add_dev = cpufreq_add_dev,
.remove_dev = cpufreq_remove_dev,
};
subsys_interface_register 函数会调用 sif->add_dev 函数 ,即 cpufreq_add_dev
int subsys_interface_register(struct subsys_interface *sif)
{
if (sif->add_dev) {
sif->add_dev(dev, sif);
}
}
cpufreq_add_dev:
static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
{
if (cpu_online(cpu)) {
ret = cpufreq_online(cpu);
}
}
cpufreq_online:
static int cpufreq_online(unsigned int cpu)
{
/* call driver. From then on the cpufreq must be able
* to accept all calls to ->verify and ->setpolicy for this CPU
*/
ret = cpufreq_driver->init(policy); // 1.调用 cpufreq_driver->init 函数 初始化policy
down_write(&policy->rwsem);
if (new_policy) {
ret = cpufreq_add_dev_interface(policy); // 创建 sys 节点 cpufreq/scaling_cur_freq 等
if (ret)
goto out_exit_policy;
cpufreq_stats_create_table(policy); // 创建 cpufreq/sys stats 节点
write_lock_irqsave(&cpufreq_driver_lock, flags);
list_add(&policy->policy_list, &cpufreq_policy_list); //
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
}
ret = cpufreq_init_policy(policy); // 初始化policy 的 governor
up_write(&policy->rwsem);
kobject_uevent(&policy->kobj, KOBJ_ADD);
/* Callback for handling stuff after policy is ready */
if (cpufreq_driver->ready)
cpufreq_driver->ready(policy); // cpufreq_driver->ready , 创建了cpufreq_cooling_dev,
// 与 thermal 系统关联
pr_debug("initialization complete\n");
return 0;
}
cpufreq_driver->init -----------------> cpufreq_init ,主要是填充policy结构体中的 freq_table
static int cpufreq_init(struct cpufreq_policy *policy)
{
dev_pm_opp_of_cpumask_add_table(policy->cpus); // 从dts 中读取 opp cpufreq table
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv) {
ret = -ENOMEM;
goto out_free_opp;
}
priv->reg_name = name;
priv->opp_table = opp_table;
ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); //取出 freq_table
if (ret) {
dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
goto out_free_priv;
}
priv->cpu_dev = cpu_dev;
policy->driver_data = priv;
policy->clk = cpu_clk;
policy->suspend_freq = dev_pm_opp_get_suspend_opp_freq(cpu_dev) / 1000;
ret = cpufreq_table_validate_and_show(policy, freq_table); // freq_table 填入 policy
transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev);
if (!transition_latency)
transition_latency = CPUFREQ_ETERNAL;
policy->cpuinfo.transition_latency = transition_latency;
policy->dvfs_possible_from_any_cpu = true;
return 0;
}
小结:cpufreq_driver 注册时序图:
接上面:cpufre_online -> cpufreq_init_policy , 继续跟踪
cpufreq_init_policy:
static int cpufreq_init_policy(struct cpufreq_policy *policy)
{
new_policy.governor = gov;
/* set default policy */
return cpufreq_set_policy(policy, &new_policy); // cpufreq_set_policy
}
cpufreq_set_policy:
static int cpufreq_set_policy(struct cpufreq_policy *policy,
struct cpufreq_policy *new_policy)
{
/* start new governor */
policy->governor = new_policy->governor;
ret = cpufreq_init_governor(policy);
if (!ret) {
ret = cpufreq_start_governor(policy); // start_governor
}
return ret;
}
cpufreq_start_governor:
static int cpufreq_start_governor(struct cpufreq_policy *policy)
{
int ret;
if (cpufreq_suspended)
return 0;
if (!policy->governor)
return -EINVAL;
pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
if (cpufreq_driver->get && !cpufreq_driver->setpolicy)
cpufreq_update_current_freq(policy);
if (policy->governor->start) {
ret = policy->governor->start(policy); // governor->start
if (ret)
return ret;
}
if (policy->governor->limits)
policy->governor->limits(policy); // governor->limits
return 0;
}
governor->start , 以 ondemand 为例
static struct dbs_governor od_dbs_gov = {
.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"),
.kobj_type = { .default_attrs = od_attributes },
.gov_dbs_update = od_dbs_update,
.alloc = od_alloc,
.free = od_free,
.init = od_init,
.exit = od_exit,
.start = od_start,
};
#define CPUFREQ_DBS_GOVERNOR_INITIALIZER(_name_) \
{ \
.name = _name_, \
.dynamic_switching = true, \
.owner = THIS_MODULE, \
.init = cpufreq_dbs_governor_init, \
.exit = cpufreq_dbs_governor_exit, \
.start = cpufreq_dbs_governor_start, \
.stop = cpufreq_dbs_governor_stop, \
.limits = cpufreq_dbs_governor_limits, \
}
governor->start 就对应于cpufreq_dbs_governor_start
int cpufreq_dbs_governor_start(struct cpufreq_policy *policy)
{
gov->start(policy); // gov->start ----- od_start
gov_set_update_util(policy_dbs, sampling_rate);
return 0;
}
static void gov_set_update_util(struct policy_dbs_info *policy_dbs,
unsigned int delay_us)
{
for_each_cpu(cpu, policy->cpus) {
struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
cpufreq_add_update_util_hook(cpu, &cdbs->update_util,
dbs_update_util_handler); // dbs_update_util_handler
}
}
dbs_update_util_handler
static void dbs_update_util_handler(struct update_util_data *data, u64 time,
unsigned int flags)
{
irq_work_queue(&policy_dbs->irq_work); // irq_work
}
init_irq_work(&policy_dbs->irq_work, dbs_irq_work);
INIT_WORK(&policy_dbs->work, dbs_work_handler);
static void dbs_irq_work(struct irq_work *irq_work)
{
struct policy_dbs_info *policy_dbs;
policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work);
schedule_work_on(smp_processor_id(), &policy_dbs->work); //---> dbs_work_handler
}
static void dbs_work_handler(struct work_struct *work)
{
gov_update_sample_delay(policy_dbs, gov->gov_dbs_update(policy)); //---> od_dbs_update
}
static unsigned int od_dbs_update(struct cpufreq_policy *policy)
{
od_update(policy); // od_update
return dbs_data->sampling_rate * policy_dbs->rate_mult;
}
od_update
/*
* Every sampling_rate, we check, if current idle time is less than 20%
* (default), then we try to increase frequency. Else, we adjust the frequency
* proportional to load.
*/
static void od_update(struct cpufreq_policy *policy)
{
/* Check for frequency increase */
if (load > dbs_data->up_threshold) { // 根据负载来动态调整cpu频率
/* If switching to max speed, apply sampling_down_factor */
if (policy->cur < policy->max)
policy_dbs->rate_mult = dbs_data->sampling_down_factor;
dbs_freq_increase(policy, policy->max);
} else {
/* Calculate the next frequency proportional to load */
unsigned int freq_next, min_f, max_f;
min_f = policy->cpuinfo.min_freq;
max_f = policy->cpuinfo.max_freq;
freq_next = min_f + load * (max_f - min_f) / 100;
/* No longer fully busy, reset rate_mult */
policy_dbs->rate_mult = 1;
if (od_tuners->powersave_bias)
freq_next = od_ops.powersave_bias_target(policy,
freq_next,
CPUFREQ_RELATION_L);
__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C);
}
}
小结:cpu_policy 时序图