进程调度子系统（1）调度器数据结构

最新推荐文章于 2022-12-25 23:11:04 发布

真胖子

最新推荐文章于 2022-12-25 23:11:04 发布

阅读量1.2k

点赞数

分类专栏： linux调度子系统

本文链接：https://blog.csdn.net/figtingforlove/article/details/20904587

版权

linux调度子系统专栏收录该内容

3 篇文章 0 订阅

订阅专栏

调度器的任务，在进程间共享cpu时间，创造并行执行的错觉。该任务分为两个不同的部分：一个涉及调度策略，另一个涉及上下文切换。

1.相关数据结构

1.1 task_struct相关成员

1042 struct task_struct {

1058         int on_rq; 
1059 
1060         int prio, static_prio, normal_prio;//优先级

1061         unsigned int rt_priority;
1062         const struct sched_class *sched_class;//调度类
1063         struct sched_entity se; //调度实体
1064         struct sched_rt_entity rt;

1078         unsigned int policy;   //调度策略
1079         int nr_cpus_allowed;   
1080         cpumask_t cpus_allowed;  //cpu位图

}

1.2 优先级表示

用户进程优先级表示：prio和normal_prio为动态优先级，static_prio为静态优先级。

static_prio是进程创建时分配的优先级，如果不人为的更改（nice和sched_setscheduler系统调用），那么在这个进程运行期间不会发生变化。

normal_prio是基于static_prio和调度策略计算出的优先级。

prio是调度器类考虑的优先级，某些情况下需要暂时提高进程的优先级(实时互斥量)，因此有此变量，对于优先级未动态提高的进程来说这三个值是相等的。

以上三个优先级值越小，代表进程的优先级有高（-20~19）。实时进程优先级表示：
rt_policy表示实时进程的优先级（0～99），该值与prio，normal_prio和static_prio不同，值越大代表实时进程的优先级越高。内核如何处理这些优先级之间的关系呢？其实，内核使用0～139表示内部优先级，值越低优先级越高。其中0～99为实时进程，100～139为非实时进程。

/include/linux/sched/rt.h

 17 #define MAX_USER_RT_PRIO        100
 18 #define MAX_RT_PRIO             MAX_USER_RT_PRIO
 19 
 20 #define MAX_PRIO                (MAX_RT_PRIO + 40)
 21 #define DEFAULT_PRIO            (MAX_RT_PRIO + 20)

静态优先级转换

kernel/sched.c

 90 #define NICE_TO_PRIO(nice)      (MAX_RT_PRIO + (nice) + 20)
 91 #define PRIO_TO_NICE(prio)      ((prio) - MAX_RT_PRIO - 20)
 92 #define TASK_NICE(p)            PRIO_TO_NICE((p)->static_prio)

三种优先级的计算：

当static_prio分配好后，prio和normal_prio计算方法实现如下：
首先，大家都知道进程创建过程中do_fork会调用wake_up_new_task,在该函数中会调用static inteffective_prio(struct task_struct *p)函数。

void fastcall wake_up_new_task(struct task_struct *p, unsigned longclone_flags)
    {
          unsigned long flags;
           struct rq *rq;
       ...
          p->prio =effective_prio(p);  
       ...
    }

   

1953 static int effective_prio(struct task_struct *p)
1954 {
1955         p->normal_prio = normal_prio(p);
1956         /*
              *如果是实时进程或是已经提高到了实时优先级，保持优先级不变。否则，返回普通优先级
1960          */
1961         if (!rt_prio(p->prio))
1962                 return p->normal_prio;//非实时进程返回普通优先级
1963         return p->prio; //实时情况，保持优先级不变
1964 }

1923 static inline int __normal_prio(struct task_struct *p)
1924 {
1925         return p->static_prio;//返回静态优先级
1926 }
1927 

1935 static inline int normal_prio(struct task_struct *p)
1936 {
1937         int prio;
1938 
1939         if (task_has_rt_policy(p)) //是否是实时进程
1940                 prio = MAX_RT_PRIO-1 - p->rt_priority; //实时优先级数值转换为内核表示
1941         else
1942                 prio = __normal_prio(p); //普通进程
1943         return prio;
1944 }

可见，对于普通进程的prio，normal_prio和static_prio是一样的，但是也有特殊情况，当使用实时互斥量时普通进程的prio会暂时发生变化。
注意，子进程分支时，静态优先级继承自父进程，prio为父进程的normal_prio。这样确保实时互斥量引起的优先级提高不会传递给子进程。

1.3 调度类

调度类提供了调度器和各个调度方法之间的关联，对于不同的调度策略内核提供了不同的调度类，调度类采用平坦结构按进程的重要性顺序连接。实时调度类在完全公平调度类之前，最后是空闲进程调度类（这个层次结构在编译时建立，没有运行时动态增加的机制）。

调度器类提供了对进程的管理

kernel/sched/sched.h

994 struct sched_class {
995         const struct sched_class *next;
996 
997         void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
998         void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
999         void (*yield_task) (struct rq *rq);
1000         bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
1001 
1002         void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
1003 
1004         struct task_struct * (*pick_next_task) (struct rq *rq);
1005         void (*put_prev_task) (struct rq *rq, struct task_struct *p);
1006 
             .....//SMP相关函数
1022 
1023         void (*set_curr_task) (struct rq *rq);
1024         void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
1025         void (*task_fork) (struct task_struct *p);
1026 
1027         void (*switched_from) (struct rq *this_rq, struct task_struct *task);
1028         void (*switched_to) (struct rq *this_rq, struct task_struct *task);
1029         void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
1030                              int oldprio);
1031 
1032         unsigned int (*get_rr_interval) (struct rq *rq,
1033                                          struct task_struct *task);
1034 
            .....//组调度相关
1038 };

具体的函数后面分析

1.4 调度实体

调度器不限于调度进程，还可以处理更大的实体（实现组调度）。这个一般性要求调度器不直接操作进程，而是处理可调度实体。最简单的情况下，单个进程可以看作一个可调度实体（内嵌在task_struct结构）。

985 struct sched_entity {
986         struct load_weight      load;           /* for load-balancing */
987         struct rb_node          run_node;  //红黑树节点

989         unsigned int            on_rq;  //当前实体是否在接续队列上
990         /*完全公平调度器所需的时间*/
991         u64                     exec_start;  //进程开始执行的时间
992         u64                     sum_exec_runtime;//进程在cpu上运行的时间综合
993         u64                     vruntime;  //虚拟时间，用于在红黑树排队（完全公平调度需要）
994         u64                     prev_sum_exec_runtime; //进程上次在cpu上运行的时间综合
995 
996         u64                     nr_migrations;
997 
          ...// 统计信息
1001 
          ... //组调度相关
1009 
          ...//SMP相关
1014 };

struct sched_entity中含有struct rb_node的实例，structrb_node是红黑树（就绪队列的内核组织结构）的节点类型，这样在红黑树中也是通过container_of机制找到structsched_entity实体的。

1.5 就绪队列

核心调度器用于管理活动进程的主要就数据结构，各cpu有自身的就绪队列，每个活动进程只能出现在一个就绪队列中（在多个cpu上运行同一进程不可能）。

注意，进程不是由就绪队列成员直接管理的，这是调度器类的责任，因此在就绪队列中嵌入了特定于调度器类的子就绪队列。

struct rq {

411         unsigned int nr_running; //可运行进程数

416         #define CPU_LOAD_IDX_MAX 5
417         unsigned long cpu_load[CPU_LOAD_IDX_MAX]; 
418         unsigned long last_load_update_tick;


426         int skip_clock_update;
427 
428         /* capture load from *all* tasks on this cpu: */
429         struct load_weight load;  //该cpu队列上的权重
430         unsigned long nr_load_updates;
431         u64 nr_switches;
432 
433         struct cfs_rq cfs;  //完全公平调度子队列
434         struct rt_rq rt;
435 
           ...//组调度相关
452 
453         struct task_struct *curr, *idle, *stop; //进程结构指针
454         unsigned long next_balance;
455         struct mm_struct *prev_mm;   //上一个内存上下文
456 
457         u64 clock;       //就绪队列自身时钟（实际的时钟滴答）     
458         u64 clock_task;
459 
460         atomic_t nr_iowait;
461 
            ...//SMP 及 一些统计信息相关
534 };

1.1.6 负荷权重的计算
进程的重要性不但由优先级指定，还需要考虑保存在task_struct->se.load的负荷权重。

linux/sched.h

932 struct load_weight {
933         unsigned long weight;
934         u32 inv_weight; //被负荷权重除的结果
935 };


kernel/sched.c

//动态优先级和权重转换表

955 static const int prio_to_weight[40] = {
956  /* -20 */     88761,     71755,     56483,     46273,     36291,
957  /* -15 */     29154,     23254,     18705,     14949,     11916,
958  /* -10 */      9548,      7620,      6100,      4904,      3906,
959  /*  -5 */      3121,      2501,      1991,      1586,      1277,
960  /*   0 */      1024,       820,       655,       526,       423,
961  /*   5 */       335,       272,       215,       172,       137,
962  /*  10 */       110,        87,        70,        56,        45,
963  /*  15 */        36,        29,        23,        18,        15,
964 };

可见优先级越高，权重越大，实时优先级是普通优先级的两倍，idle进程优先级最小，默认权重是优先级0的权重。

权重和vrruntimr时间的关系：

    ideal_time = sum_runtime*se.weight/cfs_rq.weight
    1) vruntime = delta* NICE_0_LOAD/se.weight //(if curr.nice != NICE_0_LOAD)
    2)vruntime = delta;(if curr.nice = NICE_0_LOAD)

所以，从上面来看，优先级越高，权重越大，所得到的虚拟时间越少，最后越靠近就绪队列的红黑中的左边，否则，就越靠右边。
内核利用set_load_weight根据进程类型和静态优先级计算权重。

kernel/sched/sched.h

 940 #define WEIGHT_IDLEPRIO                3

       #define WMULT_IDLEPRIO         1431655765

 60 # define SCHED_LOAD_RESOLUTION  10
 61 # define scale_load(w)          ((w) << SCHED_LOAD_RESOLUTION)
 62 # define scale_load_down(w)     ((w) >> SCHED_LOAD_RESOLUTION)
 63 #else
 64 # define SCHED_LOAD_RESOLUTION  0
 65 # define scale_load(w)          (w)
 66 # define scale_load_down(w)     (w)
 67 #endif
 68
 69 #define SCHED_LOAD_SHIFT        (10 + SCHED_LOAD_RESOLUTION)
 70 #define SCHED_LOAD_SCALE        (1L << SCHED_LOAD_SHIFT)
 71
 72 #define NICE_0_LOAD             SCHED_LOAD_SCALE
 73 #define NICE_0_SHIFT            SCHED_LOAD_SHIFT

kernel/core.c

745 static void set_load_weight(struct task_struct *p)
746 {
747         int prio = p->static_prio - MAX_RT_PRIO;
748         struct load_weight *load = &p->se.load;
749
750         /*
751          * SCHED_IDLE tasks get minimal weight:
752          */
753         if (p->policy == SCHED_IDLE) {
754                 load->weight = scale_load(WEIGHT_IDLEPRIO);
755                 load->inv_weight = WMULT_IDLEPRIO;
756                 return;
757         }
758
759         load->weight = scale_load(prio_to_weight[prio]);
760         load->inv_weight = prio_to_wmult[prio];
761 }

借两个图看一下总体结构

图调度子系统数据结构

图调度器框架

真胖子

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
进程调度子系统（1）调度器数据结构

调度器的任务，在进程间共享cpu时间，创造并行执行的错觉。该任务分为两个不同的部分：一个涉及调度策略，另一个涉及上下文切换。1.相关数据结构1.1task_struct相关成员1042 struct task_struct {1058 int on_rq; 1059 1060 int prio, static_prio, normal_prio;
复制链接

扫一扫