eas k5.4 (五):v4.20 - Runtime scheduler domain flag detection

AOSP Kernel4.14已经有这个feature,kernel4.20参考AOSP kernel也支持这个feature:

e1799a8 sched/topology, arch/arm: Rebuild sched_domain hierarchy when CPU capacity changes
3ba09df sched/topology, arch/arm64: Rebuild the sched_domain hierarchy when the CPU capacity changes
bb1fbdd sched/topology, drivers/base/arch_topology: Rebuild the sched_domain hierarchy when capacities change
05484e0 sched/topology: Add SD_ASYM_CPUCAPACITY flag detection
 

  • 数据结构

  • sched_domain的建立

  1. 函数调用关系

    
    |rebuild_sched_domains()  /*kernel/cgroup/cpuset.c*/                    (1)
    |update_cpumasks_hier()
    |update_relax_domain_level()
    |update_flag()
    |update_prstate() /*(kernel5.4)*/
      \-|rebuild_sched_domains_locked(void)
        | 
        |\-|partition_sched_domains_locked() /*kernel4.14/kernel/sched/topology.c*/
        |    \-|build_sched_domains()
        |
         \-|generate_sched_domains(&doms, &attr) /*kernel5.4/kernel/cgroup/cpuset.c*/
           |partition_and_rebuild_sched_domains() 
           |
            \-|partition_sched_domains_locked() /*kernel5.4/kernel/sched/topology.c*/
              | \-|build_sched_domains()
              |
              |rebuild_root_domains()
    
    |cpuhp_hp_states[CPUHP_AP_ACTIVE] /*kernel/cpu.c*/                        (2)
    | 
     \-|sched_cpu_activate()  /*kernel/sched/core.c*/
       |sched_cpu_deactivate()
         \-|cpuset_cpu_active(1, NULL, NULL)
           |cpuset_cpu_inactive(1, NULL, NULL)
           | 
            \-|partition_sched_domains() /*kernel/sched/topology.c*/
              |
               \partition_sched_domains_locked
                 \-|build_sched_domains()
    
    |__init sched_init_smp(void) /*kernel/sched/core.c*/                        (3)
    |
     \-|sched_init_domains(cpu_active_mask) /*kernel/sched/topology.c*/
        \-|build_sched_domains()

    (3) 系统初始化时为所有 cpu_active_mask cpu(排除housekeeping HK_FLAG_DOMAIN)建立一个sched domain. 

  2. 关键函数

    build_sched_domains()
        |__visit_domain_allocation_hell()
        |    |__sdt_alloc()
        |        |alloc_rootdomain()
        |            |init_rootdomain()
        |                 |init_irq_work()
        |                     |init_dl_bw()
        |                     |cpudl_init()  
        |                     |cpupri_init()  
        |                     |init_max_cpu_capacity()
        |
        |build_sched_domain()
        |    |sd_init()  
        |
        |build_sched_groups()
        |    |get_group()
        |
        |init_sched_groups_capacity()
    
    

1427  * Topology list, bottom-up.
1428  */
1429 static struct sched_domain_topology_level default_topology[] = {
1430 #ifdef CONFIG_SCHED_SMT  
1431     { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
1432 #endif
1433 #ifdef CONFIG_SCHED_MC
1434     { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
1435 #endif
1436     { cpu_cpu_mask, SD_INIT_NAME(DIE) },
1437     { NULL, },
1438 };

1978 static int
1979 build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
1980 {
1981     enum s_alloc alloc_state = sa_none;
1982     struct sched_domain *sd;
1983     struct s_data d;
1984     int i, ret = -ENOMEM;
1985     struct sched_domain_topology_level *tl_asym;
1986     bool has_asym = false;
1990 
1991     alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
1994 
1995     tl_asym = asym_cpu_capacity_level(cpu_map);        /*commit:05484e09*/    (1)
1996 
1997     /* Set up domains for CPUs specified by the cpu_map: */
1998     for_each_cpu(i, cpu_map) {            /*loop每个cpu*/        (2)
1999         struct sched_domain_topology_level *tl;
2000 
2001         sd = NULL;
2002         for_each_sd_topology(tl) {        /*loop每个topology level*/        (3)
2003             int dflags = 0;
2004 
2005             if (tl == tl_asym) {
2006                 dflags |= SD_ASYM_CPUCAPACITY;    /*设置异构topology level sched domain的flag*/    (4)
2007                 has_asym = true;
2008             }
2009 
2010             if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
2011                 goto error;
2012 
/*初始化cpu i所在的tl(mc or die)的sched domain*/        (5)
2013             sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
2014 
2015             if (tl == sched_domain_topology)
/*初始化s_data->sd,为每个cpu的最低level的sched domain*/        (6)
2016                 *per_cpu_ptr(d.sd, i) = sd;
2017             if (tl->flags & SDTL_OVERLAP)
2018                 sd->flags |= SD_OVERLAP;
2019             if (cpumask_equal(cpu_map, sched_domain_span(sd)))
2020                 break;
2021         }
2022     }
2023 
2024     /* Build the groups for the domains */        (7)
2025     for_each_cpu(i, cpu_map) {
2026         for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
2027             sd->span_weight = cpumask_weight(sched_domain_span(sd));
2028             if (sd->flags & SD_OVERLAP) {
2029                 if (build_overlap_sched_groups(sd, i))
2030                     goto error;
2031             } else {
2032                 if (build_sched_groups(sd, i))
2033                     goto error;
2034             }
2035         }
2036     }
2037 
2038     /* Calculate CPU capacity for physical packages and nodes */        (8)
2039     for (i = nr_cpumask_bits-1; i >= 0; i--) {
2040         if (!cpumask_test_cpu(i, cpu_map))
2041             continue;
2042 
2043         for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
2044             claim_allocations(i, sd);
2045             init_sched_groups_capacity(i, sd);
2046         }
2047     }
2048 
2049     /* Attach the domains */        (9)
2050     rcu_read_lock();
2051     for_each_cpu(i, cpu_map) {
2052         sd = *per_cpu_ptr(d.sd, i);
2053         cpu_attach_domain(sd, d.rd, i);
2054     }
2055     rcu_read_unlock();
2056 
2057     if (has_asym)
2058         static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
2059 
2060     ret = 0;
2061 error:
2062     __free_domain_allocs(&d, alloc_state, cpu_map);
2063 
2064     return ret;
2065 }

(1) loop cpu异构的domain level,在不支持smt的四大四小架构中,只有MC domain和DIE domain,此时返回DIE domain
(2)(3) loop cpu0->MC, cpu0->DIE, cpu1->MC, cpu1->DIE.....
(4) DIE domain包含cluster0和cluster1两种架构cpu,SD_ASYM_CPUCAPACITY被置位;MC domain 都是同构cpu,SD_ASYM_CPUCAPACITY不置位。
(5) build sched domain关键函数
(6) 初始化s_data->sd为cpu0~cpu7的MC domain
(7) loop cpu0~7的 s_data->sd(都为MC domain)及其parent(DIE domain),具体是:cpu0->MC,cpu0->DIE,cpu1->MC,cpu1->DIE......分别对loop到的domain build sched group

1113  * build_sched_groups will build a circular linked list of the groups
1114  * covered by the given span, will set each group's ->cpumask correctly,
1115  * and will initialize their ->sgc.
1116  *
1117  * Assumes the sched_domain tree is fully constructed
1118  */

/*指定cpu i和sched domain,建立sched group。
cpu0->MC,cpu0->DIE,cpu1->MC,cpu1->DIE......*/

1119 static int
1120 build_sched_groups(struct sched_domain *sd, int cpu)
1121 { 
1122     struct sched_group *first = NULL, *last = NULL;
1123     struct sd_data *sdd = sd->private;
1124     const struct cpumask *span = sched_domain_span(sd); 
1125     struct cpumask *covered;
1126     int i;
1127   
1128     lockdep_assert_held(&sched_domains_mutex);
1129     covered = sched_domains_tmpmask;
1130   
1131     cpumask_clear(covered);
1132
   
/*从i从cpu开始,在span mask范围内loop*/

1133     for_each_cpu_wrap(i, span, cpu) {        
1134         struct sched_group *sg;        
1135   
/*
*covered是sg->cpumask,对于MC,每个sg只包含一个cpu;对于DIE有两个sg,sg0包含cpu0~3,
*对应cpu0的DIE sched_group,sg1包含cpu4~7,对应cpu4的DIE sched_group,其它CPU的DIE 
*sched_group没有用到。
*
*因为是根据cpu loop sg,DIE的每个sg包含多个cpu,只需要sg->cpumask中cpu只需要loop一个cpu
*/
1136         if (cpumask_test_cpu(i, covered)) 
1137             continue;    
1138
/*   
*get_group用来初始化cpui的MC或DIE的sched_group,DIE多个cpu对应一个sched_group:
*sg->cpumask
*sg->sgc->cpumask
*sg->sgc->capacity
*sg->sgc->min_capacity
*sg->sgc->max_capacity
*
*并且被调用一次
*sg->ref+1
*sg-sgc->ref+1
*
*/
1139         sg = get_group(i, sdd);        
1140   
1141         cpumask_or(covered, covered, sched_group_span(sg));
1142   
1143         if (!first)      
1144             first = sg;  
1145         if (last)
1146             last->next = sg;
1147         last = sg;
1148     }
1149     last->next = first;
1150     sd->groups = first;  
1151   
1152     return 0;
1153 } 

1075 static struct sched_group *get_group(int cpu, struct sd_data *sdd)
1076 {
1077     struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
1078     struct sched_domain *child = sd->child;
1079     struct sched_group *sg;
1080     bool already_visited;
1081 
1082     if (child)        
/*
*如果这个domain(DIE domain)包含child,取此cpu DIE domain对应的child domain
*的cpu mask的第一个cpu number;所以cpu0~3对应的cpu=0,cpu4~7对应的cpu=4
*/
1083         cpu = cpumask_first(sched_domain_span(child));
1084 
1085     sg = *per_cpu_ptr(sdd->sg, cpu);
/*
*建立sd_data中sg和sgc的关系。如果是DIE topology level,只包含两个group,分别对应
*cpu0(cpu0~3)和cpu4(cpu4~7);如果是MC topology level,包含8个,每个cpu MC domain对应
*一个group
*/
1086     sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
1087 
1088     /* Increase refcounts for claim_allocations: */
1089     already_visited = atomic_inc_return(&sg->ref) > 1;
1090     /* sgc visits should follow a similar trend as sg */
1091     WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1));
1092 
1093     /* If we have already visited that group, it's already initialized. */
1094     if (already_visited)
1095         return sg;
1096 
/*
*如果是包含child的DIE domain,初始化sg->cpumask=(child)sd->span(0x0F,0xF0), 
*初始化sg->sgc->cpumask=sg->cpumask
*/
1097     if (child) {
1098         cpumask_copy(sched_group_span(sg), sched_domain_span(child));
1099         cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
1100     } else {  /*如果是MC domain,分别添加当前cpu到sg->cpumask和sg->sgc->cpumask*/
1101         cpumask_set_cpu(cpu, sched_group_span(sg));
1102         cpumask_set_cpu(cpu, group_balance_mask(sg));
1103     }
1104
/*
*根据组权重初始化sgc capacicy,MC level的每个group只包含一个cpu,权重=1;
*DIE level的两个group分别包含cpu0~3和cpu4~7,组权重=4
*/ 
1105     sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
1106     sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
1107     sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
1108 
1109     return sg;
1110 }

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值