cpu_scale and freq_scale

(init/main.c)  kernel_init --> kernel_init_freeable -->

( arm64/kernel/smp.c) --> smp_prepare_cpus --> init_cpu_topology-->

( arm64/kernel/topology.c) -->parse_dt_topology -->parse_cluster-->parse_core-->get_cpu_for_node-->topology_parse_cpu_capacity(cpu_node, cpu)

cpu_capacity=dts"capacity-dmips-mhz"=544

capacity_scale = max(cpu_capacity, capacity_scale)=1024 取四个cpu cpu_capacity的最大值,cpu3的cpu_capacity=1024

raw_capacity[cpu0~2] = cpu_capacity=544; raw_capacity[cpu3] = cpu_capacity=1024

smp_init-->idle_threads_init 初始化boot cpu0外其它cpu的idle线程
        -->cpuhp_threads_init 初始化hotplug线程
        -->up_cpu
sched_init_smp-->sched_init_domains-->build_sched_domains-->
-->build_sched_domain-->sd_init
-->build_sched_groups
-->init_sched_groups_energy-->init_sched_groups_capacity-->update_group_capacity--> update_cpu_capacity

kernel4.14/kernel/sched/fair.c accumulate_sum()

2923 scale_freq = arch_scale_freq_capacity(NULL, cpu);      //1024*curr_freq/max_freq
2924 scale_cpu = arch_scale_cpu_capacity(NULL, cpu);     //cpu[0~2] =488 cpu[3]=1024

 

默认定义无效kernel4.14/kernel/sched/fair.c

39 #include "sched.h"     //kernel4.14/kernel/sched/sched.h

1737 #ifndef arch_scale_freq_capacity
1738 static __always_inline
1739 unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
1740 {
1741 return SCHED_CAPACITY_SCALE;            //1024
1742 }
1743 #endif

1753 #ifndef arch_scale_cpu_capacity
1754 static __always_inline 
1755 unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
1756 { 
1757 if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))

//cat proc/sys/kernel/sched_domain/cpu0/domain0/flags ==>33343=823F, SD_SHARE_CPUCAPACITY=0
1758 return sd->smt_gain / sd->span_weight;
1759 
1760 return SCHED_CAPACITY_SCALE;      //1024
1761 } 
1762 #endif

 

修改定义kernel4.14/kernel/sched/fair.c

25 #include <linux/sched/topology.h>             //kernel4.14/include/linux/sched/topology.h

5 #include <linux/topology.h>             //kernel4.14/include/linux/topology.h

35 #include <asm/topology.h>      //kernel4.14/arch/arm64/include/asm/topology.h

在kernel4.14/arch/arm64/include/asm/topology.h中

38 /* Replace task scheduler's default frequency-invariant accounting */

39 #define arch_scale_freq_capacity topology_get_freq_scale

45 #define arch_scale_cpu_capacity topology_get_cpu_scale

在kernel4.14/include/linux/arch_topology.h中

34 static inline
35 unsigned long topology_get_freq_scale(struct sched_domain *sd, int cpu)
36 {
37 return per_cpu(freq_scale, cpu);
38 }

25 unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu)
26 {
27 return per_cpu(cpu_scale, cpu);
28 }

 

cpu_scale/freq_scale在哪被定义呢?kernel4.14/drivers/base/arch_topology.c

freq_scale

kernel4.14/drivers/cpufreq/cpufreq.c

1948 int __cpufreq_driver_target()

1984     return __target_index(policy, index);

1988     retval = cpufreq_driver->target_index(policy, index); 

-------------------------->call sprd_cpufreq_set_target_index(),如下2104行开始

kernel4.4/drivers/cpufreq/sprd-cpufreqhw.c

|1400 static int sprd_hardware_cpufreq_set_target_index()

|    kernel4.14/drivers/base/arch_topology.c

|    27 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;     //初始值是1024
|    28 DEFINE_PER_CPU(unsigned long, max_cpu_freq);

|    29 DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;

|    30

|--->31 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
     32 unsigned long max_freq)     //对比kernel4.4做了修改,在sprd cpufreq driver里实现

     33 { 
     34 unsigned long scale;
     35 int i;
     36 
     37 scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;     

     //SCHED_CAPACITY_SHIFT=10,当前cpu频率和最高频率的比值,按1024为最大值进行scale。例cpu0最大频率=2G,当前频率=1.5G,scale=1024*1.5/2=768
     38 
     39 for_each_cpu(i, cpus) {
     40     per_cpu(freq_scale, i) = scale;
     41     per_cpu(max_cpu_freq, i) = max_freq;
     42 }

 

cpu_scale:

kernel4.14/arch/arm64/kernel/topology.c

|__init parse_dt_topology()

|---->parse_cluster-->parse_core-->get_cpu_for_node-->topology_parse_cpu_capacity(cpu_node, cpu)

ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
                            &cpu_capacity);          //cpu0~2 544, cpu3 1024

capacity_scale = max(cpu_capacity, capacity_scale);     //cpu0~3的最大值,1024
raw_capacity[cpu] = cpu_capacity     cpu0~2 544, cpu3 1024

|---->|359 void topology_normalize_cpu_scale(void) 

       360 {

       368     mutex_lock(&cpu_scale_mutex); 
       369     for_each_possible_cpu(cpu) { 
       370         capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
       371             / capacity_scale;
       372         topology_set_cpu_scale(cpu, capacity);     cpu_scale: cpu0~2=544 cpu3=1024

       |

       |---->|65 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;     //1024

              67 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)

              68 {

              69     per_cpu(cpu_scale, cpu) = capacity;

              70 }

       373         pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu raw_capacity=%u\n",
       374             cpu, topology_get_cpu_scale(NULL, cpu),
       375             raw_capacity[cpu]); 
       376 }
       377 mutex_unlock(&cpu_scale_mutex);
       378 }

 

kernel4.14/drivers/base/arch_topology.c

|init_cpu_capacity_callback()     //在哪被调?cpufreq_set_policy()通过notifier方式

441 for_each_cpu(cpu, policy->related_cpus) {
442     raw_capacity[cpu] = topology_get_cpu_scale(NULL, cpu) *
443                 policy->cpuinfo.max_freq / 1000UL;   

raw_capacity[cpu0~2]=544*1820000/1000=990080;

raw_capacity[cpu3]=1024*2028000/1000=2076672
444     capacity_scale = max(raw_capacity[cpu], capacity_scale);     //capacity_scale =2076672
445 }
446
447 if (cpumask_empty(cpus_to_visit)) {
448     topology_normalize_cpu_scale();

|---->|356void topology_normalize_cpu_scale(void)

       366 for_each_possible_cpu(cpu) {
       367     capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
       368         / capacity_scale;
       369     topology_set_cpu_scale(cpu, capacity);     //cpu_scale[0~2]=488; cpu_scale[3]=1024
       370     pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu raw_capacity=%u\n",
       371         cpu, topology_get_cpu_scale(NULL, cpu),
       372         raw_capacity[cpu]);
       373 }
       374 mutex_unlock(&cpu_scale_mutex);

 

449     init_sched_energy_costs();
450     if (topology_detect_flags())
451         schedule_work(&update_topology_flags_work);
452     free_raw_capacity();
453     pr_debug("cpu_capacity: parsing done\n");
454     schedule_work(&parsing_done_work);
455 }
456
457 return 0;

总结:cpu_scale是cpu[0-2]的capacity(dmips*最大频率),除以cpu3的capacity,对1024的比值。

 

 

 

 

2096 static struct cpufreq_driver sprd_cpufreq_driver = {
2097 .name = "sprd-cpufreq", 
2098 .flags = CPUFREQ_STICKY 
2099 | CPUFREQ_NEED_INITIAL_FREQ_CHECK 
2100 | CPUFREQ_HAVE_GOVERNOR_PER_POLICY,
2101 .init = sprd_cpufreq_init,
2102 .exit = sprd_cpufreq_exit,
2103 .verify = sprd_cpufreq_table_verify,
2104 .target_index = sprd_cpufreq_set_target_index,
2105 .get = sprd_cpufreq_get, 
2106 .suspend = sprd_cpufreq_suspend, 
2107 .resume = sprd_cpufreq_resume,
2108 .attr = cpufreq_generic_attr,
2109 /* platform specific boost support code */
2110 .boost_supported = true, 
2111 .boost_enabled = true, 
2112 .set_boost = sprd_cpufreq_set_boost,
2113 };

 

所以经过scale后的频率是当前cpu频率,扩大1024,在除以该cpu的最大频率

scale_freq=scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq

 

64 static DEFINE_MUTEX(cpu_scale_mutex);
65 DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
67 void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
68 { 
69     per_cpu(cpu_scale, cpu) = capacity;
70 }

 

经过scale后的cpu capacity计算流程如下:

kernel4.14/arch/arm64/kernel/topology.c

354 void __init init_cpu_topology(void)

     362     if (of_have_populated_dt() && parse_dt_topology())

         171 static int __init parse_dt_topology(void)

              195     topology_normalize_cpu_scale(); 

                   366     capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
                   367         / capacity_scale;

                   368     topology_set_cpu_scale(cpu, capacity);

 

在 topology_parse_cpu_capacity()初始化capacity_scale,从board的dtsi文件中读取(在1mhz频率下,每秒百万条整数运算指令 )

385     ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",        //sharkl5 cpu0~2 544; cpu3 1024

386                    &cpu_capacity);

398         capacity_scale = max(cpu_capacity, capacity_scale);

399         raw_capacity[cpu] = cpu_capacity;

 

2892 #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
2893 
2894 /*
2895 * Accumulate the three separate parts of the sum; d1 the remainder
2896 * of the last (incomplete) period, d2 the span of full periods and d3
2897 * the remainder of the (incomplete) current period.
2898 *
2899 * d1 d2 d3
2900 * ^ ^ ^
2901 * | | |
2902 * |<->|<----------------->|<--->|
2903 * ... |---x---|------| ... |------|-----x (now)
2904 *
2905 * p-1
2906 * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
2907 * n=1
2908 *
2909 * = u y^p + (Step 1)
2910 *
2911 * p-1
2912 * d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2)
2913 * n=1
2914 */
2915 static __always_inline u32
2916 accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
2917 unsigned long weight, int running, struct cfs_rq *cfs_rq)
2918 {
2919 unsigned long scale_freqscale_cpu;
2920 u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
2921 u64 periods;
2922 
2923 scale_freq = arch_scale_freq_capacity(NULL, cpu);
2924 scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
2925 
2926 delta += sa->period_contrib;
2927 periods = delta / 1024; /* A period is 1024us (~1ms) */
2928 
2929 /*
2930 * Step 1: decay old *_sum if we crossed period boundaries.
2931 */
2932 if (periods) {

2933 sa->load_sum = decay_load(sa->load_sum, periods);
2934 if (cfs_rq) {
2935 cfs_rq->runnable_load_sum =
2936 decay_load(cfs_rq->runnable_load_sum, periods);
2937 }
2938 sa->util_sum = decay_load((u64)(sa->util_sum), periods);
2939 
2940 /*
2941 * Step 2
2942 */
2943 delta %= 1024;
2944 contrib = __accumulate_pelt_segments(periods,
2945 1024 - sa->period_contrib, delta);
2946 }
2947 sa->period_contrib = delta;
2948 
2949 contrib = cap_scale(contrib, scale_freq);
2950 if (weight) {
2951 sa->load_sum += weight * contrib;
2952 if (cfs_rq)
2953 cfs_rq->runnable_load_sum += weight * contrib;
2954 }
2955 if (running)
2956 sa->util_sum += contrib * scale_cpu;
2957 
2958 return periods;
2959 }


max_freq_scale

 7902 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 7903 {
 7913     capacity *= arch_scale_max_freq_capacity(sd, cpu);
 7945 }

/*arch/arm64/include/asm/topology.h
 */
 22 /* Replace task scheduler's default max-frequency-invariant accounting */
 23 #define arch_scale_max_freq_capacity topology_get_max_freq_scale

/*include/linux/arch_topology.h
 */
 38 static inline
 39 unsigned long topology_get_max_freq_scale(struct sched_domain *sd, int cpu)
 40 {
 41     return per_cpu(max_freq_scale, cpu);
 42 }

max_freq_scale在哪定义赋值?

1. 在drivers/base/arch_topology.c 中初始化:
26 DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;

2. 在drivers/base/arch_topology.c 中初设置:
 42 void arch_set_max_freq_scale(struct cpumask *cpus,
 43                  unsigned long policy_max_freq)
 44 {
 45     unsigned long scale, max_freq;
 46     int cpu = cpumask_first(cpus);
 47     
 48     if (cpu > nr_cpu_ids)
 49         return;
 50

/*max_freq实际是cpuinfo.max_freq,cpu的实际最大频率
 */
 51     max_freq = per_cpu(max_cpu_freq, cpu);
 52     if (!max_freq)
 53         return;

/*per_cpu(max_cpu_freq)设置: sprd_hardware_cpufreq_set_target_index()->arch_set_freq_scale()
 *变频时在dvfs driver中,设置max_cpu_freq=policy->cpuinfo.max_freq,即cpu实际的最大频率,不受限频影响。
 */

/*drivers/cpufreq/sprd-cpufreqhw.c
 */
117 static
118 int sprd_hardware_cpufreq_set_target_index(struct cpufreq_policy *policy,
119                        unsigned int idx)
120 {
169     if (!ret)
170         arch_set_freq_scale(policy->related_cpus,
171                     freq, policy->cpuinfo.max_freq);
174 }
/*drivers/base/arch_topology.c 
 */
 24 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
 25 DEFINE_PER_CPU(unsigned long, max_cpu_freq);
 26 DEFINE_PER_CPU(unsigned long, max_freq_scale) = SCHED_CAPACITY_SCALE;
 27 
 28 void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 29              unsigned long max_freq)
 30 {
 31     unsigned long scale;
 32     int i;
 33 
 34     scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
 35 
 36     for_each_cpu(i, cpus) {
 37         per_cpu(freq_scale, i) = scale;
 38         per_cpu(max_cpu_freq, i) = max_freq;
 39     }
 40 }

 54 
 55     scale = (policy_max_freq << SCHED_CAPACITY_SHIFT) / max_freq; 
 56 
 57     for_each_cpu(cpu, cpus)
 58         per_cpu(max_freq_scale, cpu) = scale;
 59 }

policy_max_freq:arch_set_max_freq_scale()第二个参数,值在哪设置?
cpufreq_set_policy()->arch_set_max_freq_scale(policy->cpus, policy->max)
从如下函数中可见,policy_max_freq是通过qos限频后的cpu最大频率,

所以max_freq_scale是cpu经过qos限频后,对cpu实际最高频scale到1024。

2391 static int cpufreq_set_policy(struct cpufreq_policy *policy,
2392                   struct cpufreq_governor *new_gov,
2393                   unsigned int new_pol)          
2394 { 
2395     struct cpufreq_policy_data new_data;
2396     struct cpufreq_governor *old_gov;
2397     int ret;
2398   
2399     memcpy(&new_data.cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo));
2400     new_data.freq_table = policy->freq_table;
2401     new_data.cpu = policy->cpu;    
2402     /*
2403      * PM QoS framework collects all the requests from users and provide us
2404      * the final aggregated value here.
2405      */
2406     new_data.min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN);
2407     new_data.max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX);
2408   
2409     pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
2410          new_data.cpu, new_data.min, new_data.max);
2411   
2412     /* verify the cpu speed can be set within this limit */
2413     ret = cpufreq_driver->verify(&new_data);
2414     if (ret)
2415         return ret;
2416   
2417     policy->min = new_data.min;    
2418     policy->max = new_data.max;    
2419     trace_cpu_frequency_limits(policy);
2420   
2421     arch_set_max_freq_scale(policy->cpus, policy->max);

 

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
void SystemClock_Config(void) { RCC_OscInitTypeDef RCC_OscInitStruct = {0}; RCC_ClkInitTypeDef RCC_ClkInitStruct = {0}; /** Configure the main internal regulator output voltage */ HAL_PWREx_ControlVoltageScaling(PWR_REGULATOR_VOLTAGE_SCALE1); /** Initializes the RCC Oscillators according to the specified parameters * in the RCC_OscInitTypeDef structure. */ RCC_OscInitStruct.OscillatorType = RCC_OSCILLATORTYPE_HSI; RCC_OscInitStruct.HSIState = RCC_HSI_ON; RCC_OscInitStruct.HSIDiv = RCC_HSI_DIV1; RCC_OscInitStruct.HSICalibrationValue = RCC_HSICALIBRATION_DEFAULT; RCC_OscInitStruct.PLL.PLLState = RCC_PLL_ON; RCC_OscInitStruct.PLL.PLLSource = RCC_PLLSOURCE_HSI; RCC_OscInitStruct.PLL.PLLM = RCC_PLLM_DIV1; RCC_OscInitStruct.PLL.PLLN = 8; RCC_OscInitStruct.PLL.PLLP = RCC_PLLP_DIV2; RCC_OscInitStruct.PLL.PLLR = RCC_PLLR_DIV2; if (HAL_RCC_OscConfig(&RCC_OscInitStruct) != HAL_OK) { Error_Handler(); } /** Initializes the CPU, AHB and APB buses clocks */ RCC_ClkInitStruct.ClockType = RCC_CLOCKTYPE_HCLK|RCC_CLOCKTYPE_SYSCLK |RCC_CLOCKTYPE_PCLK1; RCC_ClkInitStruct.SYSCLKSource = RCC_SYSCLKSOURCE_PLLCLK; RCC_ClkInitStruct.AHBCLKDivider = RCC_SYSCLK_DIV1; RCC_ClkInitStruct.APB1CLKDivider = RCC_HCLK_DIV1; if (HAL_RCC_ClockConfig(&RCC_ClkInitStruct, FLASH_LATENCY_2) != HAL_OK) { Error_Handler(); } } /** * @brief I2C1 Initialization Function * @param None * @retval None */ static void MX_I2C1_Init(void) { /* USER CODE BEGIN I2C1_Init 0 */ /* USER CODE END I2C1_Init 0 */ /* USER CODE BEGIN I2C1_Init 1 */ /* USER CODE END I2C1_Init 1 */ hi2c1.Instance = I2C1; hi2c1.Init.Timing = 0x10707DBC; hi2c1.Init.OwnAddress1 = 0; hi2c1.Init.AddressingMode = I2C_ADDRESSINGMODE_7BIT; hi2c1.Init.DualAddressMode = I2C_DUALADDRESS_DISABLE; hi2c1.Init.OwnAddress2 = 0; hi2c1.Init.OwnAddress2Masks = I2C_OA2_NOMASK; hi2c1.Init.GeneralCallMode = I2C_GENERALCALL_DISABLE; hi2c1.Init.NoStretchMode = I2C_NOSTRETCH_DISABLE; if (HAL_I2C_Init(&hi2c1) != HAL_OK) { Error_Handler(); } /** Configure Analogue filter */ if (HAL_I2CEx_ConfigAnalogFilter(&hi2c1, I2C_ANALOGFILTER_ENABLE) != HAL_OK) { Error_Handler(); } /** Configure Digital filter */ if (HAL_I2CEx_ConfigDigitalFilter(&hi2c1, 0) != HAL_OK) { Error_Handler(); } GPIO_InitTypeDef GPIO_InitStruct; __HAL_RCC_GPIOB_CLK_ENABLE(); // 配置 I2C1_SCL 引脚 GPIO_InitStruct.Pin = GPIO_PIN_3; GPIO_InitStruct.Mode = GPIO_MODE_AF_OD; GPIO_InitStruct.Pull = GPIO_PULLUP; GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_HIGH; GPIO_InitStruct.Alternate = GPIO_AF6_I2C1; HAL_GPIO_Init(GPIOB, &GPIO_InitStruct); GPIO_InitStruct.Pin = GPIO_PIN_7; GPIO_InitStruct.Mode = GPIO_MODE_AF_OD; GPIO_InitStruct.Pull = GPIO_PULLUP; GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_HIGH; GPIO_InitStruct.Alternate = GPIO_AF6_I2C1; HAL_GPIO_Init(GPIOB, &GPIO_InitStruct); /* USER CODE BEGIN I2C1_Init 2 */ /* USER CODE END I2C1_Init 2 */ }这个是什么意思需要修改添加代码吗
07-15
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值