在kernel中会注册一个软件中断来做负载均衡
__init void init_sched_fair_class(void)
{
#ifdef CONFIG_SMP
open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
endif /* SMP */
}
这个软件中断SCHED_SOFTIRQ 对应的回调函数是run_rebalance_domains
static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
{
struct rq *this_rq = this_rq();
#判断当前rg是否是idle
enum cpu_idle_type idle = this_rq->idle_balance ?
CPU_IDLE : CPU_NOT_IDLE;
#开始负载均衡,从这里可以知道负载均衡是在一个domain 内的cpu来做的
rebalance_domains(this_rq, idle);
}
static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
{
#负载均衡的周期,这里很根据当前rq是否idle 会对应不同的时间,也就是说
#负载均衡的周期有两个值
interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
#遍历domain中的每个cpu
for_each_domain(cpu, sd) {
if (time_after_eq(jiffies, sd->last_balance + interval)) {
#周期到后开始做负载均衡
if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
/*
* The LBF_DST_PINNED logic could have changed
* env->dst_cpu, so we can't know our idle
* state even if we migrated tasks. Update it.
*/
idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
}
sd->last_balance = jiffies;
interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
}
}
这里首先看看负载均衡的两个周期是如何计算的
static inline unsigned long
get_sd_balance_interval(struct sched_domain *sd, int cpu_busy)
{
unsigned long interval = sd->balance_interval;
#关键是这一步,如果cpu busy的话,则做负载均衡的时间会变长
if (cpu_busy)
interval *= sd->busy_factor;
/* scale ms to jiffies */
interval = msecs_to_jiffies(interval);
#负载均衡的最大值不能超过1UL和max_load_balance_interval 之间,也就是负载均衡的最大周期不能超过max_load_balance_interval
interval = clamp(interval, 1UL, max_load_balance_interval);
return interval;
}
搞清楚负载均衡的周期后,开始看看处理负载均衡的函数load_balance
static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum cpu_idle_type idle,
int *continue_balancing)
{
#找到domain中最忙的group
group = find_busiest_group(&env);
if (!group) {
schedstat_inc(sd->lb_nobusyg[idle]);
goto out_balanced;
}
#找到最忙group中最忙的rq
busiest = find_busiest_queue(&env, group);
if (!busiest) {
schedstat_inc(sd->lb_nobusyq[idle]);
goto out_balanced;
}
BUG_ON(busiest == env.dst_rq);
schedstat_add(sd->lb_imbalance[idle], env.imbalance);
env.src_cpu = busiest->cpu;
env.src_rq = busiest;
ld_moved = 0;
#如果确实存在最忙的rq
if (busiest->nr_running > 1) {
#将task从最忙cpu中的rq迁移出来
cur_ld_moved = detach_tasks(&env);
rq_unlock(busiest, &rf);
if (cur_ld_moved) {
#将前面从最忙cpu rq中迁出的task 加入到目标cpu上
attach_tasks(&env);
ld_moved += cur_ld_moved;
}
}
smp的负载均衡
最新推荐文章于 2024-03-17 18:25:14 发布