eas k5.4 (六):v4.20 - Misfit task

757ffdd sched/fair: Set rq->rd->overload when misfit
e90c8fe sched/fair: Wrap rq->rd->overload accesses with READ/WRITE_ONCE()
575638d sched/core: Change root_domain->overload type to int
dbbad71 sched/fair: Change 'prefer_sibling' type to bool
5fbdfae sched/fair: Kick nohz balance if rq->misfit_task_load
cad68e5 sched/fair: Consider misfit tasks when load-balancing
e3d6d0c sched/fair: Add sched_group per-CPU max capacity
3b1baa6 sched/fair: Add 'group_misfit_task' load-balance type
 

1. 创建SCHED_SOFTIRQ
 *start_kernel()
   |--sched_init(void)
      |--init_sched_fair_class()  /*sched/fair.c*/
         |--open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
|<--------|
|
| *scheduler_tick(void)
|   |--trigger_load_balance(struct rq *rq)    /*kernel/sched/fair.c */
|       |--raise_softirq(SCHED_SOFTIRQ)
|<-------|
|
|     *scheduler_ipi(void)  /*kernel/sched/core.c*/
|       |--raise_softirq_irqoff(SCHED_SOFTIRQ)
|------->|
           |--run_rebalance_domains(struct softirq_action *h)
               |--if (nohz_idle_balance(this_rq, idle)), return;
|<----------|       |
| |-----------------|
| |  *balance_fair
| |  *pick_next_task_fair()
| |   |--newidle_balance()
| |------|--nohz_newidle_balance()
|             |-- _nohz_idle_balance()  
|------------>|--rebalance_domains(this_rq, idle)

9455 static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 9456 {
 9457     int continue_balancing = 1;    
 9458     int cpu = rq->cpu;  
 9459     unsigned long interval;
 9460     struct sched_domain *sd;
 9461     /* Earliest time when we have to do rebalance again */
 9462     unsigned long next_balance = jiffies + 60*HZ;
 9463     int update_next_balance = 0;   
 9464     int need_serialize, need_decay = 0;
 9465     u64 max_cost = 0;   
 9466  
 9467     rcu_read_lock();    
 9468     for_each_domain(cpu, sd) {
 9469         /*
 9470          * Decay the newidle max times here because this is a regular
 9471          * visit to all the domains. Decay ~1% per second.
 9472          */
 9473         if (time_after(jiffies, sd->next_decay_max_lb_cost)) { 
 9474             sd->max_newidle_lb_cost =      
 9475                 (sd->max_newidle_lb_cost * 253) / 256;
 9476             sd->next_decay_max_lb_cost = jiffies + HZ;
 9477             need_decay = 1;
 9478         }
 9479         max_cost += sd->max_newidle_lb_cost;
 9480  
 9481         if (!(sd->flags & SD_LOAD_BALANCE))
 9482             continue;   
 9483  
 9484         /*
 9485          * Stop the load balance at this level. There is another
 9486          * CPU in our sched group which is doing load balancing more
 9487          * actively.    
 9488          */
 9489         if (!continue_balancing) {     
 9490             if (need_decay)
 9491                 continue;
 9492             break;
 9493         }
 9494  
 9495         interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
 9496  
 9497         need_serialize = sd->flags & SD_SERIALIZE;
 9498         if (need_serialize) {
 9499             if (!spin_trylock(&balancing)) 
 9500                 goto out;
 9501         }
 9502  
 9503         if (time_after_eq(jiffies, sd->last_balance + interval)) {
 9504             if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
 9505                 /*
 9506                  * The LBF_DST_PINNED logic could have changed
 9507                  * env->dst_cpu, so we can't know our idle
 9508                  * state even if we migrated tasks. Update it.
 9509                  */
 9510                 idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
 9511             }
 9512             sd->last_balance = jiffies;
 9513             interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
 9514         }
 9515         if (need_serialize)
 9516             spin_unlock(&balancing);
 9517 out:
 9518         if (time_after(next_balance, sd->last_balance + interval)) {
 9519             next_balance = sd->last_balance + interval;
 9520             update_next_balance = 1;
 9521         }
 9522     }
 9523     if (need_decay) {
 9524         /*
 9525          * Ensure the rq-wide value also decays but keep it at a
 9526          * reasonable floor to avoid funnies with rq->avg_idle.
 9527          */
 9528         rq->max_idle_balance_cost =
 9529             max((u64)sysctl_sched_migration_cost, max_cost);
 9530     }
 9531     rcu_read_unlock();
 9532 
 9533     /*
 9534      * next_balance will be updated only when there is a need.
 9535      * When the cpu is attached to null domain for ex, it will not be
 9536      * updated.
 9537      */
 9538     if (likely(update_next_balance)) {
 9539         rq->next_balance = next_balance;
 9540 
 9541 #ifdef CONFIG_NO_HZ_COMMON
 9542         /*
 9543          * If this CPU has been elected to perform the nohz idle
 9544          * balance. Other idle CPUs have already rebalanced with
 9545          * nohz_idle_balance() and nohz.next_balance has been
 9546          * updated accordingly. This CPU is now running the idle load
 9547          * balance for itself and we need to update the
 9548          * nohz.next_balance accordingly.
 9549          */
 9550         if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
 9551             nohz.next_balance = rq->next_balance;
 9552 #endif
 9553     }
 9554 }

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值