Linux的进程管理之相关数据结构—3


先回顾下之前的定义,进程调度是指操作系统按照某种策略或者规则选择进程占用CPU进行运行的过程;
即:什么时候调度(调度时机)、怎么调度(上下文切换)、按照什么方式调度(调度策略)

1.调度策略

linux将进程主要划分为实时进程与普通进程,使用5种调度器;
一共五种调度器:STOP、RT、 DEADLINE、 CFS、 IDLE调度器;
调度策略:SCHED_FIFO
SCHED_RR
SCHED_IDLE
SCHED_NORMAL
SCHED_BATCH
其中STOP、IDEL调度器仅供内核使用,用户态常用的为CFS、RT调度器,其中RT常用于嵌入式系统。
Deadline调度器是linux3.14版本引入的,本文分析所使用的是linux3.0.20版本,因此还未引入,不过道理都是类似的,我们重点介绍CFS 与RT调度器

2.调度的数据结构

抽象体结构体
CPUstruct rq
调度队列rq->cfs_rq rq->rt_rq
进程struct task_struct
调度实体task_struct->sched_entity task_struct->sched_rt_entity
调度类task_struct->sched_class

内核通过rq task_struct sched_class等结构体进行抽象调度器,每个cpu抽象了一个rq结构体,进程提取了一个task_struct,sched_class代表一种调度类,以下为几者的关系:

在这里插入图片描述

【1】图示左上,对cpu的内核核进行的抽象,提取为struct rq 结构体,cpu可以认为是一个具有一定算力的贪吃蛇,对cpu来说,可以反复不断运行进程,这两个进程队列cfs_rq、rt_rq已经指向了进程实体,cpu只需要按照优先级反复执行即可。
【2】图示上中,数据结构,可以看见内核选取了红黑树作为CFS调度器的数据结构,选择了哈希表作为rt调度器的数据结构。
【3】图示右上,进程抽象,其中包含着普通进程调度实体与实时进程调度实体,通过调度实体找到了进程task_struct就完成了进程的调度。
【4】图示下,调度器,对调度器的抽象,调度器是承载在进程task_struct上,这样就可以通过配置调度策略来选择对应的调度器。
可以看出,内核背后的抽象思维非常值得我们学习,可以在做实际项目中体会这种思维;

3. cpu抽象rq结构体

此结构体主要是对CPU进行抽象,主要为cpu上的运行队列,主要以单核调度进行说明,多核与组调度,在此并不作为重点进行说明;

/*
 * This is the main, per-CPU runqueue data structure.
 *
 * Locking rule: those places that want to lock multiple runqueues
 * (such as the load balancing or the thread migration code), lock
 * acquire operations must be ordered by ascending &runqueue.
 */
struct rq {
	/* runqueue lock: */
	raw_spinlock_t lock;

	/*
	 * nr_running and cpu_load should be in the same cacheline because
	 * remote CPUs use both these fields when doing load calculation.
	 */
	  运行队列上调度实体的个数,是所有子调度器类中就绪实体之和
	unsigned long nr_running;
	#define CPU_LOAD_IDX_MAX 5
	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
	unsigned long last_load_update_tick;
#ifdef CONFIG_NO_HZ
	u64 nohz_stamp;
	unsigned char nohz_balance_kick;
#endif
	int skip_clock_update;

	/* capture load from *all* tasks on this cpu: */
	// 表示 rq 的权重,对于每个调度实体,都有一个权重值来表示进程的优先级,这里的 load 是整个队列上的总 load 值,反映了当前 runqueue 上进程的总体权重信息.
    struct load_weight load;
	struct load_weight load;
	// 负载的统计次数
	unsigned long nr_load_updates;
	// 该运行队列上进程的切换次数
	u64 nr_switches;

    // cfs 调度器类的就绪队列
    struct cfs_rq cfs;
    // rt 调度器类的就绪队列
	struct rt_rq rt;

#ifdef CONFIG_FAIR_GROUP_SCHED
	/* list of leaf cfs_rq on this cpu: */
	struct list_head leaf_cfs_rq_list;
#endif
#ifdef CONFIG_RT_GROUP_SCHED
	struct list_head leaf_rt_rq_list;
#endif

	/*
	 * This is part of a global counter where only the total sum
	 * over all CPUs matters. A task can increase this counter on
	 * one CPU and if it got migrated afterwards it may decrease
	 * it on another CPU. Always updated under the runqueue lock:
	 */
	unsigned long nr_uninterruptible;
// 保存的进程指针,分别对应当前执行进程 curr,idle 进程(空闲时调用),stop进程(用于停止 CPU)
	struct task_struct *curr, *idle, *stop;
	unsigned long next_balance;
	struct mm_struct *prev_mm;

	u64 clock;
	u64 clock_task;

	atomic_t nr_iowait;

#ifdef CONFIG_SMP
	struct root_domain *rd;
	struct sched_domain *sd;

	unsigned long cpu_power;

	unsigned char idle_at_tick;
	/* For active balancing */
	int post_schedule;
	int active_balance;
	int push_cpu;
	struct cpu_stop_work active_balance_work;
	/* cpu of this runqueue: */
	int cpu;
	int online;

	unsigned long avg_load_per_task;

	u64 rt_avg;
	u64 age_stamp;
	u64 idle_stamp;
	u64 avg_idle;
#endif

#ifdef CONFIG_IRQ_TIME_ACCOUNTING
	u64 prev_irq_time;
#endif

	/* calc_load related fields */
	unsigned long calc_load_update;
	long calc_load_active;

#ifdef CONFIG_SCHED_HRTICK
#ifdef CONFIG_SMP
	int hrtick_csd_pending;
	struct call_single_data hrtick_csd;
#endif
	struct hrtimer hrtick_timer;
#endif

#ifdef CONFIG_SCHEDSTATS
	/* latency stats */
	struct sched_info rq_sched_info;
	unsigned long long rq_cpu_time;
	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */

	/* sys_sched_yield() stats */
	unsigned int yld_count;

	/* schedule() stats */
	unsigned int sched_switch;
	unsigned int sched_count;
	unsigned int sched_goidle;

	/* try_to_wake_up() stats */
	unsigned int ttwu_count;
	unsigned int ttwu_local;
#endif

#ifdef CONFIG_SMP
	struct task_struct *wake_list;
#endif
};

4.调度队列cfs_rq rt_rq

每个CPU上管理这两个调度队列,实时调度队列,CFS调度队列;其目的就是用来管理调度实体的,将调度实体按照规则进行组织;
cfs_rq即普通进程运行队列,管理着普通任务,cfs使用红黑树进行管理,运行队列就是指向其红黑树根节点以及,最左边的叶子节点,即下一个要调度的节点;
rt_rq即实时进程运行队列,管理着实时任务,rt使用哈希表进行管理,运行队列同样指向与哈希表的节点,进行调度节点管理。

/* CFS-related fields in a runqueue */
struct cfs_rq {
	struct load_weight load;
	unsigned long nr_running;

	u64 exec_clock;
	u64 min_vruntime;
#ifndef CONFIG_64BIT
	u64 min_vruntime_copy;
#endif

	struct rb_root tasks_timeline;
	struct rb_node *rb_leftmost;

	struct list_head tasks;
	struct list_head *balance_iterator;

	/*
	 * 'curr' points to currently running entity on this cfs_rq.
	 * It is set to NULL otherwise (i.e when none are currently running).
	 */
	struct sched_entity *curr, *next, *last, *skip;

#ifdef	CONFIG_SCHED_DEBUG
	unsigned int nr_spread_over;
#endif

#ifdef CONFIG_FAIR_GROUP_SCHED
	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */

	/*
	 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
	 * (like users, containers etc.)
	 *
	 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
	 * list is used during load balance.
	 */
	int on_list;
	struct list_head leaf_cfs_rq_list;
	struct task_group *tg;	/* group that "owns" this runqueue */

#ifdef CONFIG_SMP
	/*
	 * the part of load.weight contributed by tasks
	 */
	unsigned long task_weight;

	/*
	 *   h_load = weight * f(tg)
	 *
	 * Where f(tg) is the recursive weight fraction assigned to
	 * this group.
	 */
	unsigned long h_load;

	/*
	 * Maintaining per-cpu shares distribution for group scheduling
	 *
	 * load_stamp is the last time we updated the load average
	 * load_last is the last time we updated the load average and saw load
	 * load_unacc_exec_time is currently unaccounted execution time
	 */
	u64 load_avg;
	u64 load_period;
	u64 load_stamp, load_last, load_unacc_exec_time;

	unsigned long load_contribution;
#endif
#endif
};

/* Real-Time classes' related field in a runqueue: */
struct rt_rq {
	struct rt_prio_array active;
	unsigned long rt_nr_running;
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
	struct {
		int curr; /* highest queued rt task prio */
#ifdef CONFIG_SMP
		int next; /* next highest */
#endif
	} highest_prio;
#endif
#ifdef CONFIG_SMP
	unsigned long rt_nr_migratory;
	unsigned long rt_nr_total;
	int overloaded;
	struct plist_head pushable_tasks;
#endif
	int rt_throttled;
	u64 rt_time;
	u64 rt_runtime;
	/* Nests inside the rq lock: */
	raw_spinlock_t rt_runtime_lock;

#ifdef CONFIG_RT_GROUP_SCHED
	unsigned long rt_nr_boosted;

	struct rq *rq;
	struct list_head leaf_rt_rq_list;
	struct task_group *tg;
#endif
};

5.调度实体sched_entity sched_rt_entity

调度实体,每一种调度器的调度实体是不一样的,是每一种调度器关键的调度载体。
cfs调度器,使用sched_entity调度实体,其中包含虚拟的运行时间,总执行时间等信息。
rt调度器,使用sched_rt_entity调度实体,其中包括时间片等信息。

struct sched_entity {
	struct load_weight	load;		/* for load-balancing */
	struct rb_node		run_node;
	struct list_head	group_node;
	unsigned int		on_rq;

	u64			exec_start;
	u64			sum_exec_runtime;
	u64			vruntime;
	u64			prev_sum_exec_runtime;

	u64			nr_migrations;

#ifdef CONFIG_SCHEDSTATS
	struct sched_statistics statistics;
#endif

#ifdef CONFIG_FAIR_GROUP_SCHED
	struct sched_entity	*parent;
	/* rq on which this entity is (to be) queued: */
	struct cfs_rq		*cfs_rq;
	/* rq "owned" by this entity/group: */
	struct cfs_rq		*my_q;
#endif
};
struct sched_rt_entity {
	struct list_head run_list;
	unsigned long timeout;
	unsigned int time_slice;
	int nr_cpus_allowed;

	struct sched_rt_entity *back;
#ifdef CONFIG_RT_GROUP_SCHED
	struct sched_rt_entity	*parent;
	/* rq on which this entity is (to be) queued: */
	struct rt_rq		*rt_rq;
	/* rq "owned" by this entity/group: */
	struct rt_rq		*my_q;
#endif
};

6.调度类sched_class

每个 CPU 拥有各自的 runqueue,而 runqueue 中维护了各个调度器类的相关信息:包括 cfs_rq,rt_rq.
每个不同的调度器类按照优先级排列依次为: stop_sched_class->rt_sched_class->fair_sched_class->idle_sched_class,
当高优先级调度器中存在就绪任务时,就不会轮到低优先级调度器中的任务执行;
内核对实时进程设置了运行占比为0.95,即当实时进程一直占用 CPU 时,会强行给非实时任务留出 5% 的执行时间,当然也是可以配置的,使用sysctl指令进行配置;

struct sched_class{
	const struct sched_class *next;
     //加入至调度队列
	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
	//从调度队列中删除
	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
	void (*yield_task) (struct rq *rq);
	bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);

	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
     //获取下一个即将调度的进程或者线程
	struct task_struct * (*pick_next_task) (struct rq *rq);
	void (*put_prev_task) (struct rq *rq, struct task_struct *p);

#ifdef CONFIG_SMP
	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);

	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
	void (*post_schedule) (struct rq *this_rq);
	void (*task_waking) (struct task_struct *task);
	void (*task_woken) (struct rq *this_rq, struct task_struct *task);

	void (*set_cpus_allowed)(struct task_struct *p,
				 const struct cpumask *newmask);

	void (*rq_online)(struct rq *rq);
	void (*rq_offline)(struct rq *rq);
#endif

	void (*set_curr_task) (struct rq *rq);
	void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
	void (*task_fork) (struct task_struct *p);

	void (*switched_from) (struct rq *this_rq, struct task_struct *task);
	void (*switched_to) (struct rq *this_rq, struct task_struct *task);
	//设置进程优先级
	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
			     int oldprio);

	unsigned int (*get_rr_interval) (struct rq *rq,
					 struct task_struct *task);

#ifdef CONFIG_FAIR_GROUP_SCHED
	void (*task_move_group) (struct task_struct *p, int on_rq);
#endif
}
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值