Big Little迁移之Idle-pull

最新推荐文章于 2023-11-15 15:21:19 发布

hellowolrd

最新推荐文章于 2023-11-15 15:21:19 发布

阅读量1.3k

点赞数

分类专栏： linux

本文链接：https://blog.csdn.net/hellowolrd/article/details/39718323

版权

linux 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

Big Little迁移之Idle-pull

当某个cpu在调度时，发面runqueue上没有可运行进程了，这个cpu就进入idle状态了（注：nr_running没有包含idle进程），进入idle状态后，显然要看看其他cpu上是不是负载过重，如果是的话，就进行负载平衡。

static void __sched __schedule(void)
{
        ……

       if (unlikely(!rq->nr_running))
		idle_balance(cpu, rq);

        ……
}

idle_balance用来迁移一些进程到当前变成idle的cpu上来，cpu表示当前的cpu， rq是当前cpu的runqueue

/*
 * idle_balance is called by schedule() if this_cpu is about to become
 * idle. Attempts to pull tasks from other CPUs.
 */
void idle_balance(int this_cpu, struct rq *this_rq)
{
    struct sched_domain *sd;
    int pulled_task = 0;
    unsigned long next_balance = jiffies + HZ;

    this_rq->idle_stamp = this_rq->clock;

    if (this_rq->avg_idle < sysctl_sched_migration_cost)
        return;

    /*
     * Drop the rq->lock, but keep IRQ/preempt disabled.
     */
    raw_spin_unlock(&this_rq->lock);

    update_blocked_averages(this_cpu);
    rcu_read_lock();
<span style="color:#FF0000;">1</span>  for_each_domain(this_cpu, sd) {
        unsigned long interval;
        int balance = 1;

   <span style="color:#FF0000;">1a</span>   if (!(sd->flags & SD_LOAD_BALANCE))
            continue;

  <span style="color:#FF0000;"> 1b</span>   if (sd->flags & SD_BALANCE_NEWIDLE) {
            /* If we've pulled tasks over stop searching: */
   <span style="color:#FF0000;">1c </span>      pulled_task = load_balance(this_cpu, this_rq,
                           sd, CPU_NEWLY_IDLE, &balance);
        }

        interval = msecs_to_jiffies(sd->balance_interval);
      if (time_after(next_balance, sd->last_balance + interval))
            next_balance = sd->last_balance + interval;
        if (pulled_task) {
            this_rq->idle_stamp = 0;
            break;
        }
    }
    rcu_read_unlock();
#ifdef CONFIG_SCHED_HMP
<span style="color:#FF0000;">2 </span> if (!pulled_task)
        pulled_task = hmp_idle_pull(this_cpu);
#endif
    raw_spin_lock(&this_rq->lock);

    if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
        /*
         * We are going idle. next_balance may be set based on
         * a busy processor. So reset next_balance.
         */
        this_rq->next_balance = next_balance;
    }
}

这个函数分为两步

1.第一步是一个循环，处当前cpu所在的最低级domain开始，依次向上，在每次循环中：

1a 判断当前domain能否做能否做balance，假设是4个little + 4 个big 的架构，那么总共有3个domain。 4个little所处的domain a, 4个 big 所处的domain b. 那么a和b这个flag都是转瞬上的，表示task

可以在4个小核之间迁移，或者在4个大核之间迁移。对于root domain这个flag是没有置上的，表示domain a内的task不能迁到domain b中去。

1b 判断当前domain是否可以cpu idle时做balance。这个flag和上一个flag的设置一样。

1c 如果两个条件都满足，那么就调用load_bancle试图从domain中最忙的调度组中最忙的cpu上迁移一些负载过来。

load balance先不看，只需要知道，load balance之后，有可能会迁移一些进程到当前cpu上来了

2 如果上一步已经迁移一些进程到当前cpu上来了，cpu就不空闲了，也就没有必要再做hmp_idel_pull了。

hmp_idle_pull这个函数只存在于当big core空闲时，从小核中pull task上来,为什么要这么呢？请记住big little架构的目的是为了省电的同时兼顾性能。因此大部分task都放在小核上运行，而big cpu上期望的每个cpu上都只有一个task。所以当小核idle之后，从其它小核上pull几个task过来就行了,另外big core上也没几个进程，你也没必要去拉，当big core上的进程load变小之后，自然有时机迁移到小核上来。对于big core，它变空了，倒时有必要去检查一下小核上是否有很重load的task，因为前面的wakup, force时机在做迁移时，如果没有空闲的big core，即使有task load已经很重了，也是不会做迁移的。

/*
 * hmp_idle_pull looks at little domain runqueues to see
 * if a task should be pulled.
 *
 * Reuses hmp_force_migration spinlock.
 *
 */
static unsigned int hmp_idle_pull(int this_cpu)
{
	int cpu;
	struct sched_entity *curr, *orig;
	struct hmp_domain *hmp_domain = NULL;
	struct rq *target = NULL, *rq;
	unsigned long flags, ratio = 0;
	unsigned int force = 0;
	struct task_struct *p = NULL;

<span style="color:#FF0000;">1</span>	if (!hmp_cpu_is_slowest(this_cpu))
		hmp_domain = hmp_slower_domain(this_cpu);
	if (!hmp_domain)
		return 0;

	if (!spin_trylock(&hmp_force_migration))
		return 0;

	/* first select a task */
<span style="color:#FF0000;">2</span>	for_each_cpu(cpu, &hmp_domain->cpus) {
		rq = cpu_rq(cpu);
		raw_spin_lock_irqsave(&rq->lock, flags);
		curr = rq->cfs.curr;
		if (!curr) {
			raw_spin_unlock_irqrestore(&rq->lock, flags);
			continue;
		}
     <span style="color:#FF0000;">2a	</span>	if (!entity_is_task(curr)) {
			struct cfs_rq *cfs_rq;

			cfs_rq = group_cfs_rq(curr);
			while (cfs_rq) {
				curr = cfs_rq->curr;
				if (!entity_is_task(curr))
					cfs_rq = group_cfs_rq(curr);
				else
					cfs_rq = NULL;
			}
		}
		orig = curr;
   <span style="color:#FF0000;"> 2b	</span>	curr = hmp_get_heaviest_task(curr, this_cpu);
		/* check if heaviest eligible task on this
		 * CPU is heavier than previous task
		 */
    <span style="color:#FF0000;">2c	 </span>       if (curr && hmp_task_eligible_for_up_migration(curr) &&
			curr->avg.load_avg_ratio > ratio &
                        cpumask_test_cpu(this_cpu,
					tsk_cpus_allowed(task_of(curr)))) {
			p = task_of(curr);
			target = rq;
			ratio = curr->avg.load_avg_ratio;
		}
		raw_spin_unlock_irqrestore(&rq->lock, flags);
	}

	if (!p)
		goto done;

	/* now we have a candidate */
<span style="color:#FF0000;">3</span>	raw_spin_lock_irqsave(&target->lock, flags);
	if (!target->active_balance && task_rq(p) == target) {
		get_task_struct(p);
		target->push_cpu = this_cpu;
		target->migrate_task = p;
		trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_IDLE_PULL);
		hmp_next_up_delay(&p->se, target->push_cpu);
		/*
		 * if the task isn't running move it right away.
		 * Otherwise setup the active_balance mechanic and let
		 * the CPU stopper do its job.
		 */
	<span style="color:#FF0000;">3a</span>	if (!task_running(target, p)) {
			trace_sched_hmp_migrate_idle_running(p, 0);
			hmp_migrate_runnable_task(target);
	<span style="color:#FF0000;">3b</span>	} else {
			target->active_balance = 1;
			force = 1;
		}
	}
	raw_spin_unlock_irqrestore(&target->lock, flags);

<span style="color:#FF0000;">4</span>	if (force) {
		/* start timer to keep us awake */
		 hmp_cpu_keepalive_trigger();
		 stop_one_cpu_nowait(cpu_of(target),
			hmp_active_task_migration_cpu_stop,
			 target, &target->active_balance_work);
	}
done:
	spin_unlock(&hmp_force_migration);
	return force;
}

这个函数思路如下：

1.如果当前idle的cpu是小核，就直接返回，否则找到将hmp_domain赋值为little core的hmp_domain

2.接下来一个循环，遍历hmp_domain的每个cpu，选取一个load最重的task迁移到大核上运行，对于每个cpun执行下面的操作

2a表示cpun的当前调度实体是一个group，先不管这个，假设调度是个普通的进程，这一步跳过

2b选取cpun上负载最重的task,赋值为curr

2c 将上一步选取的task与之前其它cpu上负载最重task进行比较。load_avg_ratio表示task的负载。 hmp_task_eligible_for_up_migration用于计算task的值是否超过一个阀值，只有负载超过个值

的task才有操作被移到big core

3.运行到这里，表示找到一个侯选者了, target表示负载最大进程所有的cpu的runqueu，p是所选择和task， this_cpu表示当前处于idle的big core。如果target->active_balance不为0，表示target所在的 cpu由于某些原因，之前有人从target所在cpu pull task时没有成功，target会在过一段时间后，会主动来把自己的task push到一个比较闭的cpu上，对于这种情况，自然没有必要再去尝试从target上pull task。那么hmp_idle_pull就退出了

3a 运行到这里, p不是处在running状态，就直接把它迁移到this_cpu上，hmp_migrate_runnable_task这个函数先执行一些检查，然后执行迁移，这里迁移就比较简单，就是从target的runqueue

队列中取下来，然后插到this_cpu的runqueue上去，然后对this_cpu 调用check_preempt_curr，检查一下是否需要抢点，让这个进程在新的cpu上马上有机会得到运行

3b 到这里说明p进程正在运行，这可不好弄了，设置下面两个变量表示要求强制迁移

target->active_balance = 1;
force = 1;

4 到这里表示要强制迁移了，怎么做呢？？其实也简单，每个cpu上都有一个cpu_stopper_task，这个进程是用来停止cpu的，你可以给它一个work，让它在停止cpu之后执行这个work.因此

stop_one_cpu_nowait的实现就比较简单了，它准备好了一个work，然后唤醒target cpu上的cpu_stopper_task，这个cpu_stopper_task进程是个实时进程，一旦被唤醒会马上得到执行。唤醒这个进程后，this_cpu也就是我们的big core的hmp_idle_pull就结束了。

void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
			struct cpu_stop_work *work_buf)
{
	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
	cpu_stop_queue_work(cpu, work_buf);
}

/* queue @work to @stopper.  if offline, @work is completed immediately */
static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
{
    struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
    struct task_struct *p = per_cpu(cpu_stopper_task, cpu);

    unsigned long flags;

    spin_lock_irqsave(&stopper->lock, flags);

    if (stopper->enabled) {
        list_add_tail(&work->list, &stopper->works);
        wake_up_process(p);
    } else
        cpu_stop_signal_done(work->done, false);

    spin_unlock_irqrestore(&stopper->lock, flags);
}

按照前面所说,cpu_stopper_task进行会马上执行hmp_active_task_migration_cpu_stop这个函数，这个函数是在target cpu上执行的，它会把前面记录下来的这进程从targer cpu的runqueue上取下来，插到之前执行hmp_pull_idle的big core上去。

最后，补充一下cpu_stop_task相关的代码片段，就不分析了

static void cpu_stop_create(unsigned int cpu)
{
	sched_set_stop_task(cpu, per_cpu(cpu_stopper_task, cpu));
}

void sched_set_stop_task(int cpu, struct task_struct *stop)
{
	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
	struct task_struct *old_stop = cpu_rq(cpu)->stop;

	if (stop) {
		/*
		 * Make it appear like a SCHED_FIFO task, its something
		 * userspace knows about and won't get confused about.
		 *
		 * Also, it will make PI more or less work without too
		 * much confusion -- but then, stop work should not
		 * rely on PI working anyway.
		 */
		sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m);

		stop->sched_class = &stop_sched_class;
	}

	cpu_rq(cpu)->stop = stop;

	if (old_stop) {
		/*
		 * Reset it back to a normal scheduling class so that
		 * it can die in pieces.
		 */
		old_stop->sched_class = &rt_sched_class;
	}
}

再补充一下，唤醒进程时，检查抢占的代码

void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
	const struct sched_class *class;

	if (p->sched_class == rq->curr->sched_class) {
		rq->curr->sched_class->check_preempt_curr(rq, p, flags);
	} else {
		for_each_class(class) {
			if (class == rq->curr->sched_class)
				break;
		<span style="color:#FF0000;"><span style="background-color: rgb(255, 255, 255);">1</span></span>	if (class == p->sched_class) {
				resched_task(rq->curr);
				break;
			}
		}
	}

	/*
	 * A queue event has occurred, and we're going to schedule.  In
	 * this case, we can save a useless back to back clock update.
	 */
	if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
		rq->skip_clock_update = 1;
}

P是被唤醒的进程，rq是当前正在执行这个函数的cpu的rq。如果p和当前进程正在执行的进程调度类一样，就去调度类里检查，否则，如果p是实时进程，马上就会来到1标记的代码处。