(init/main.c) kernel_init --> kernel_init_freeable -->
( arm64/kernel/smp.c) --> smp_prepare_cpus --> init_cpu_topology-->
( arm64/kernel/topology.c) -->parse_dt_topology -->parse_cluster-->parse_core-->get_cpu_for_node-->topology_parse_cpu_capacity(cpu_node, cpu)
cpu_capacity=dts"capacity-dmips-mhz"=544
capacity_scale = max(cpu_capacity, capacity_scale)=1024 取四个cpu cpu_capacity的最大值,cpu3的cpu_capacity=1024
raw_capacity[cpu0~2] = cpu_capacity=544; raw_capacity[cpu3] = cpu_capacity=1024
smp_init-->idle_threads_init 初始化boot cpu0外其它cpu的idle线程
-->cpuhp_threads_init 初始化hotplug线程
-->up_cpu
sched_init_smp-->sched_init_domains-->build_sched_domains-->
-->build_sched_domain-->sd_init
-->build_sched_groups
-->init_sched_groups_energy-->init_sched_groups_capacity-->update_group_capacity--> update_cpu_capacity
在kernel4.14/kernel/sched/fair.c accumulate_sum()中
2923 scale_freq = arch_scale_freq_capacity(NULL, cpu); //1024*curr_freq/max_freq
2924 scale_cpu = arch_scale_cpu_capacity(NULL, cpu); //cpu[0~2] =488 cpu[3]=1024
默认定义无效kernel4.14/kernel/sched/fair.c
39 #include "sched.h" //kernel4.14/kernel/sched/sched.h
1737 #ifndef arch_scale_freq_capacity
1738 static __always_inline
1739 unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
1740 {
1741 return SCHED_CAPACITY_SCALE; //1024
1742 }
1743 #endif
1753 #ifndef arch_scale_cpu_capacity
1754 static __always_inline
1755 unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
1756 {
1757 if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
//cat proc/sys/kernel/sched_domain/cpu0/domain0/flags ==>33343=823F, SD_SHARE_CPUCAPACITY=0
1758 return sd->smt_gain / sd->span_weight;
1759
1760 return SCHED_CAPACITY_SCALE; //1024
1761 }
1762 #endif
修改定义kernel4.14/kernel/sched/fair.c
25 #include <linux/sched/topology.h> //kernel4.14/include/linux/sched/topology.h
5 #include <linux/topology.h> //kernel4.14/include/linux/topology.h
35 #include <asm/topology.h> //kernel4.14/arch/arm64/include/asm/topology.h
在kernel4.14/arch/arm64/include/asm/topology.h中
38 /* Replace task scheduler's default frequency-invariant accounting */
39 #define arch_scale_freq_capacity topology_get_freq_scale
45 #define arch_scale_cpu_capacity topology_get_cpu_scale
在kernel4.14/include/linux/arch_topology.h中
34 static inline
35 unsigned long topology_get_freq_scale(struct sched_domain *sd, int cpu)
36 {
37 return per_cpu(freq_scale, cpu);
38 }
25 unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu)
26 {
27 return per_cpu(cpu_scale, cpu);
28 }
cpu_scale/freq_scale在哪被定义呢?kernel4.14/drivers/base/arch_topology.c
freq_scale:
kernel4.14/drivers/cpufreq/cpufreq.c
1948 int __cpufreq_driver_target()
1984 return __target_index(policy, index);
1988 retval = cpufreq_driver->target_index(policy, index);
-------------------------->call sprd_cpufreq_set_target_index(),如下2104行开始
kernel4.4/drivers/cpufreq/sprd-cpufreqhw.c
|1400 static int sprd_hardware_cpufreq_set_target_index()
| kernel4.14/drivers/base/arch_topology.c
| 27 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; //初始值是1024
| 28 DEFINE_PER_CPU(unsigned long, max_cpu_freq);
| 29 DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
| 30
|--->31 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
32 unsigned long max_freq) //对比kernel4.4做了修改,在sprd cpufreq driver里实现
33 {
34 unsigned long scale;
35 int i;
36
37 scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
//SCHED_CAPACITY_SHIFT=10,当前cpu频率和最高频率的比值,按1024为最大值进行scale。例cpu0最大频率=2G,当前频率=1.5G,scale=1024*1.5/2=768
38
39 for_each_cpu(i, cpus) {
40 per_cpu(freq_scale, i) = scale;
41 per_cpu(max_cpu_freq, i) = max_freq;
42 }
cpu_scale:
kernel4.14/arch/arm64/kernel/topology.c
|__init parse_dt_topology()
|---->parse_cluster-->parse_core-->get_cpu_for_node-->topology_parse_cpu_capacity(cpu_node, cpu)
ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
&cpu_capacity); //cpu0~2 544, cpu3 1024
capacity_scale = max(cpu_capacity, capacity_scale); //cpu0~3的最大值,1024
raw_capacity[cpu] = cpu_capacity cpu0~2 544, cpu3 1024
|---->|359 void topology_normalize_cpu_scale(void)
360 {
368 mutex_lock(&cpu_scale_mutex);
369 for_each_possible_cpu(cpu) {
370 capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
371 / capacity_scale;
372 topology_set_cpu_scale(cpu, capacity); cpu_scale: cpu0~2=544 cpu3=1024
|
|---->|65 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; //1024
67 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
68 {
69 per_cpu(cpu_scale, cpu) = capacity;
70 }
373 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu raw_capacity=%u\n",
374 cpu, topology_get_cpu_scale(NULL, cpu),
375 raw_capacity[cpu]);
376 }
377 mutex_unlock(&cpu_scale_mutex);
378 }
kernel4.14/drivers/base/arch_topology.c
|init_cpu_capacity_callback() //在哪被调?cpufreq_set_policy()通过notifier方式
441 for_each_cpu(cpu, policy->related_cpus) {
442 raw_capacity[cpu] = topology_get_cpu_scale(NULL, cpu) *
443 policy->cpuinfo.max_freq / 1000UL;
raw_capacity[cpu0~2]=544*1820000/1000=990080;
raw_capacity[cpu3]=1024*2028000/1000=2076672
444 capacity_scale = max(raw_capacity[cpu], capacity_scale); //capacity_scale =2076672
445 }
446
447 if (cpumask_empty(cpus_to_visit)) {
448 topology_normalize_cpu_scale();
|---->|356void topology_normalize_cpu_scale(void)
366 for_each_possible_cpu(cpu) {
367 capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
368 / capacity_scale;
369 topology_set_cpu_scale(cpu, capacity); //cpu_scale[0~2]=488; cpu_scale[3]=1024
370 pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu raw_capacity=%u\n",
371 cpu, topology_get_cpu_scale(NULL, cpu),
372 raw_capacity[cpu]);
373 }
374 mutex_unlock(&cpu_scale_mutex);
449 init_sched_energy_costs();
450 if (topology_detect_flags())
451 schedule_work(&update_topology_flags_work);
452 free_raw_capacity();
453 pr_debug("cpu_capacity: parsing done\n");
454 schedule_work(&parsing_done_work);
455 }
456
457 return 0;
总结:cpu_scale是cpu[0-2]的capacity(dmips*最大频率),除以cpu3的capacity,对1024的比值。
2096 static struct cpufreq_driver sprd_cpufreq_driver = {
2097 .name = "sprd-cpufreq",
2098 .flags = CPUFREQ_STICKY
2099 | CPUFREQ_NEED_INITIAL_FREQ_CHECK
2100 | CPUFREQ_HAVE_GOVERNOR_PER_POLICY,
2101 .init = sprd_cpufreq_init,
2102 .exit = sprd_cpufreq_exit,
2103 .verify = sprd_cpufreq_table_verify,
2104 .target_index = sprd_cpufreq_set_target_index,
2105 .get = sprd_cpufreq_get,
2106 .suspend = sprd_cpufreq_suspend,
2107 .resume = sprd_cpufreq_resume,
2108 .attr = cpufreq_generic_attr,
2109 /* platform specific boost support code */
2110 .boost_supported = true,
2111 .boost_enabled = true,
2112 .set_boost = sprd_cpufreq_set_boost,
2113 };
所以经过scale后的频率是当前cpu频率,扩大1024,在除以该cpu的最大频率
scale_freq=scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq
64 static DEFINE_MUTEX(cpu_scale_mutex);
65 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
67 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
68 {
69 per_cpu(cpu_scale, cpu) = capacity;
70 }
经过scale后的cpu capacity计算流程如下:
kernel4.14/arch/arm64/kernel/topology.c
354 void __init init_cpu_topology(void)
362 if (of_have_populated_dt() && parse_dt_topology())
171 static int __init parse_dt_topology(void)
195 topology_normalize_cpu_scale();
366 capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
367 / capacity_scale;
368 topology_set_cpu_scale(cpu, capacity);
在 topology_parse_cpu_capacity()初始化capacity_scale,从board的dtsi文件中读取(在1mhz频率下,每秒百万条整数运算指令 )
385 ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz", //sharkl5 cpu0~2 544; cpu3 1024
386 &cpu_capacity);
398 capacity_scale = max(cpu_capacity, capacity_scale);
399 raw_capacity[cpu] = cpu_capacity;
2892 #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
2893
2894 /*
2895 * Accumulate the three separate parts of the sum; d1 the remainder
2896 * of the last (incomplete) period, d2 the span of full periods and d3
2897 * the remainder of the (incomplete) current period.
2898 *
2899 * d1 d2 d3
2900 * ^ ^ ^
2901 * | | |
2902 * |<->|<----------------->|<--->|
2903 * ... |---x---|------| ... |------|-----x (now)
2904 *
2905 * p-1
2906 * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
2907 * n=1
2908 *
2909 * = u y^p + (Step 1)
2910 *
2911 * p-1
2912 * d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2)
2913 * n=1
2914 */
2915 static __always_inline u32
2916 accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
2917 unsigned long weight, int running, struct cfs_rq *cfs_rq)
2918 {
2919 unsigned long scale_freq, scale_cpu;
2920 u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
2921 u64 periods;
2922
2923 scale_freq = arch_scale_freq_capacity(NULL, cpu);
2924 scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
2925
2926 delta += sa->period_contrib;
2927 periods = delta / 1024; /* A period is 1024us (~1ms) */
2928
2929 /*
2930 * Step 1: decay old *_sum if we crossed period boundaries.
2931 */
2932 if (periods) {
2933 sa->load_sum = decay_load(sa->load_sum, periods);
2934 if (cfs_rq) {
2935 cfs_rq->runnable_load_sum =
2936 decay_load(cfs_rq->runnable_load_sum, periods);
2937 }
2938 sa->util_sum = decay_load((u64)(sa->util_sum), periods);
2939
2940 /*
2941 * Step 2
2942 */
2943 delta %= 1024;
2944 contrib = __accumulate_pelt_segments(periods,
2945 1024 - sa->period_contrib, delta);
2946 }
2947 sa->period_contrib = delta;
2948
2949 contrib = cap_scale(contrib, scale_freq);
2950 if (weight) {
2951 sa->load_sum += weight * contrib;
2952 if (cfs_rq)
2953 cfs_rq->runnable_load_sum += weight * contrib;
2954 }
2955 if (running)
2956 sa->util_sum += contrib * scale_cpu;
2957
2958 return periods;
2959 }
max_freq_scale
7902 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
7903 {
7913 capacity *= arch_scale_max_freq_capacity(sd, cpu);
7945 }
/*arch/arm64/include/asm/topology.h
*/
22 /* Replace task scheduler's default max-frequency-invariant accounting */
23 #define arch_scale_max_freq_capacity topology_get_max_freq_scale
/*include/linux/arch_topology.h
*/
38 static inline
39 unsigned long topology_get_max_freq_scale(struct sched_domain *sd, int cpu)
40 {
41 return per_cpu(max_freq_scale, cpu);
42 }
max_freq_scale在哪定义赋值?
1. 在drivers/base/arch_topology.c 中初始化:
26 DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
2. 在drivers/base/arch_topology.c 中初设置:
42 void arch_set_max_freq_scale(struct cpumask *cpus,
43 unsigned long policy_max_freq)
44 {
45 unsigned long scale, max_freq;
46 int cpu = cpumask_first(cpus);
47
48 if (cpu > nr_cpu_ids)
49 return;
50
/*max_freq实际是cpuinfo.max_freq,cpu的实际最大频率
*/
51 max_freq = per_cpu(max_cpu_freq, cpu);
52 if (!max_freq)
53 return;
/*per_cpu(max_cpu_freq)设置: sprd_hardware_cpufreq_set_target_index()->arch_set_freq_scale()
*变频时在dvfs driver中,设置max_cpu_freq=policy->cpuinfo.max_freq,即cpu实际的最大频率,不受限频影响。
*/
/*drivers/cpufreq/sprd-cpufreqhw.c
*/
117 static
118 int sprd_hardware_cpufreq_set_target_index(struct cpufreq_policy *policy,
119 unsigned int idx)
120 {
169 if (!ret)
170 arch_set_freq_scale(policy->related_cpus,
171 freq, policy->cpuinfo.max_freq);
174 }
/*drivers/base/arch_topology.c
*/
24 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
25 DEFINE_PER_CPU(unsigned long, max_cpu_freq);
26 DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
27
28 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
29 unsigned long max_freq)
30 {
31 unsigned long scale;
32 int i;
33
34 scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
35
36 for_each_cpu(i, cpus) {
37 per_cpu(freq_scale, i) = scale;
38 per_cpu(max_cpu_freq, i) = max_freq;
39 }
40 }
54
55 scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq;
56
57 for_each_cpu(cpu, cpus)
58 per_cpu(max_freq_scale, cpu) = scale;
59 }
policy_max_freq:arch_set_max_freq_scale()第二个参数,值在哪设置?
cpufreq_set_policy()->arch_set_max_freq_scale(policy->cpus, policy->max)
从如下函数中可见,policy_max_freq是通过qos限频后的cpu最大频率,
所以max_freq_scale是cpu经过qos限频后,对cpu实际最高频scale到1024。
2391 static int cpufreq_set_policy(struct cpufreq_policy *policy,
2392 struct cpufreq_governor *new_gov,
2393 unsigned int new_pol)
2394 {
2395 struct cpufreq_policy_data new_data;
2396 struct cpufreq_governor *old_gov;
2397 int ret;
2398
2399 memcpy(&new_data.cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
2400 new_data.freq_table = policy->freq_table;
2401 new_data.cpu = policy->cpu;
2402 /*
2403 * PM QoS framework collects all the requests from users and provide us
2404 * the final aggregated value here.
2405 */
2406 new_data.min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN);
2407 new_data.max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX);
2408
2409 pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
2410 new_data.cpu, new_data.min, new_data.max);
2411
2412 /* verify the cpu speed can be set within this limit */
2413 ret = cpufreq_driver->verify(&new_data);
2414 if (ret)
2415 return ret;
2416
2417 policy->min = new_data.min;
2418 policy->max = new_data.max;
2419 trace_cpu_frequency_limits(policy);
2420
2421 arch_set_max_freq_scale(policy->cpus, policy->max);