static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, int *balance) { int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; unsigned long imbalance; struct rq *busiest; unsigned long flags;
/* load_balance_tmpmask is a per-cpu variable */
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
/* cpu_active_mask is a global variable defined in cpumask.h */
cpumask_copy(cpus, cpu_active_mask);
/* * When power savings policy is enabled for the parent domain, idle * sibling can pick up load irrespective of busy siblings. In this case, * let the state of idle sibling percolate up as CPU_IDLE, instead of * portraying it as CPU_NOT_IDLE. */ if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) sd_idle = 1;
/* ++sd->lb_count[idle] */ schedstat_inc(sd, lb_count[idle]);
redo:
/* update schedule domain' member last_update */ update_shares(sd);
/** * find_busiest_group - Returns the busiest group within the sched_domain * if there is an imbalance. If there isn't an imbalance, and * the user has opted for power-savings, it returns a group whose * CPUs can be put to idle by rebalancing those tasks elsewhere, if * such a group exists. * * Also calculates the amount of weighted load which should be moved * to restore balance. * * @sd: The sched_domain whose busiest group is to be returned. * @this_cpu: The cpu for which load balancing is currently being performed. * @imbalance: Variable which stores amount of weighted load which should * be moved to restore balance/put a group to idle. * @idle: The idle status of this_cpu. * @sd_idle: The idleness of sd * @cpus: The set of CPUs under consideration for load-balancing. * @balance: Pointer to a variable indicating if this_cpu * is the appropriate cpu to perform load balancing at this_level. * * Returns: - the busiest group if imbalance exists. * - If no imbalance and user has opted for power-savings balance, * return the least loaded group whose CPUs can be * put to idle by rebalancing its tasks onto our group. */
group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,cpus, balance);
if (*balance == 0) goto out_balanced;
if (!group) { schedstat_inc(sd, lb_nobusyg[idle]); goto out_balanced; }
/* * find_busiest_queue - find the busiest runqueue among the cpus in group. */
busiest = find_busiest_queue(group, idle, imbalance, cpus); if (!busiest) { schedstat_inc(sd, lb_nobusyq[idle]); goto out_balanced; }
BUG_ON(busiest == this_rq);
schedstat_add(sd, lb_imbalance[idle], imbalance);
ld_moved = 0; if (busiest->nr_running > 1) { /* * Attempt to move tasks. If find_busiest_group has found * an imbalance but busiest->nr_running <= 1, the group is * still unbalanced. ld_moved simply stays zero, so it is * correctly treated as an imbalance. */ local_irq_save(flags); double_rq_lock(this_rq, busiest);
/* * move_tasks tries to move up to max_load_move weighted load from busiest to * this_rq, as part of a balancing operation within domain "sd". * Returns 1 if successful and 0 otherwise. * * Called with both runqueues locked. */
ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, idle, &all_pinned); double_rq_unlock(this_rq, busiest); local_irq_restore(flags);
/* * some other cpu did the load balance for us. */ if (ld_moved && this_cpu != smp_processor_id()) resched_cpu(this_cpu);
/* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(all_pinned)) { cpumask_clear_cpu(cpu_of(busiest), cpus); if (!cpumask_empty(cpus)) goto redo; goto out_balanced; } }
/* if move_tasks failed */
if (!ld_moved) { schedstat_inc(sd, lb_failed[idle]); sd->nr_balance_failed++;
if (need_active_balance(sd, sd_idle, idle)) { raw_spin_lock_irqsave(&busiest->lock, flags);
/* don't kick the migration_thread, if the curr * task on busiest cpu can't be moved to this_cpu */ if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { raw_spin_unlock_irqrestore(&busiest->lock, flags); all_pinned = 1; goto out_one_pinned; }
/* move_tasks() is pulling, now set active_balance as 1, set push_cpu as this_cpu, means pushing happening within busiest run queue */
if (!busiest->active_balance) { busiest->active_balance = 1; busiest->push_cpu = this_cpu; active_balance = 1; } raw_spin_unlock_irqrestore(&busiest->lock, flags);
/* there is a migration thread specially processing migration at each run queue, now wake up it */ if (active_balance) wake_up_process(busiest->migration_thread);
/* * We've kicked active balancing, reset the failure * counter. */ sd->nr_balance_failed = sd->cache_nice_tries+1; } } else sd->nr_balance_failed = 0;
if (likely(!active_balance)) { /* We were unbalanced, so reset the balancing interval */ sd->balance_interval = sd->min_interval; } else { /* * If we've begun active balancing, start to back off. This * case may not be covered by the all_pinned logic if there * is only 1 task on the busy runqueue (because we don't call * move_tasks). */ if (sd->balance_interval < sd->max_interval) sd->balance_interval *= 2; }
if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) ld_moved = -1;
goto out;
out_balanced: schedstat_inc(sd, lb_balanced[idle]);
sd->nr_balance_failed = 0;
out_one_pinned: /* tune up the balancing interval */ if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) || (sd->balance_interval < sd->max_interval)) sd->balance_interval *= 2;
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) ld_moved = -1; else ld_moved = 0; out: if (ld_moved) update_shares(sd); return ld_moved; }
|