Linux shedule 的发展历史.

最新推荐文章于 2021-07-28 19:57:16 发布

_RTFSC

最新推荐文章于 2021-07-28 19:57:16 发布

阅读量1.3k

点赞数 1

分类专栏： Linux 内核学习文章标签： Linux 源代码

本文链接：https://blog.csdn.net/cchd0001/article/details/42774737

版权

Linux 内核学习专栏收录该内容

7 篇文章 0 订阅

订阅专栏

慢慢来吧~~

Linux V0.11

支持定时器和信号

流程图:

源码:

void schedule(void)
{
    int i,next,c;
    struct task_struct ** p;

/* check alarm, wake up any interruptible tasks that have got a signal */

    for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
	if (*p) {
            if ((*p)->alarm && (*p)->alarm < jiffies) {
	 	(*p)->signal |= (1<<(SIGALRM-1));
	   	    (*p)->alarm = 0;
	    }
	if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) &&
	    (*p)->state==TASK_INTERRUPTIBLE)
		(*p)->state=TASK_RUNNING;
	}

/* this is the scheduler proper: */

    while (1) {
        c = -1;
	next = 0;
        i = NR_TASKS;
	p = &task[NR_TASKS];
	while (--i) {
	if (!*--p)
	    continue;
	if ((*p)->state == TASK_RUNNING && (*p)->counter > c)
            c = (*p)->counter, next = i;
	}
	if (c) break;
	for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
	    if (*p)
		(*p)->counter = ((*p)->counter >> 1) +
			(*p)->priority;
    }
    switch_to(next);
}

重新计算时间片:

time = oldtime / 2 + priority

scheduel 的调用者: (system_call.s)

reschedule:
    pushl $ret_from_sys_call
    jmp _schedule
_system_call:
	cmpl $nr_system_calls-1,%eax    # 检查系统调用号 
	ja bad_sys_call
	...                             # 各种压栈
	call _sys_call_table(,%eax,4)   # 调用对应接口 
	pushl %eax
	movl _current,%eax              # 拿到scheduel后的进程结构体指针
	cmpl $0,state(%eax)		# 如果不是运行态 就回去重新调度
	jne reschedule
	cmpl $0,counter(%eax)		# 如果时间片刚好消耗没了, 就回去重新调度
	je reschedule

Linux V0.12

支持限时操作, 比如 select 支持最长等待时间.

schedual流程图:

源代码:

void schedule(void)
{
	int i,next,c;
	struct task_struct ** p;

/* check alarm, wake up any interruptible tasks that have got a signal */

	for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
		if (*p) {
			if ((*p)->timeout && (*p)->timeout < jiffies) {
				(*p)->timeout = 0;
				if ((*p)->state == TASK_INTERRUPTIBLE)
					(*p)->state = TASK_RUNNING;
			}
			if ((*p)->alarm && (*p)->alarm < jiffies) {
				(*p)->signal |= (1<<(SIGALRM-1));
				(*p)->alarm = 0;
			}
			if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) &&
			(*p)->state==TASK_INTERRUPTIBLE)
				(*p)->state=TASK_RUNNING;
		}

/* this is the scheduler proper: */

	while (1) {
		c = -1;
		next = 0;
		i = NR_TASKS;
		p = &task[NR_TASKS];
		while (--i) {
			if (!*--p)
				continue;
			if ((*p)->state == TASK_RUNNING && (*p)->counter > c)
				c = (*p)->counter, next = i;
		}
		if (c) break;
		for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
			if (*p)
				(*p)->counter = ((*p)->counter >> 1) +
						(*p)->priority;
	}
	switch_to(next);
}

Linux V0.95

仅仅修改了一点, 关于signal 屏蔽位:

if (((*p)->signal & ~(_BLOCKABLE & (*p)->blocked)) &&
			(*p)->state==TASK_INTERRUPTIBLE)
				(*p)->state=TASK_RUNNING;

==>

if (((*p)->signal & ~(*p)->blocked) &&
			(*p)->state==TASK_INTERRUPTIBLE)
				(*p)->state=TASK_RUNNING;

因为V0.95 版本在sys_ssetmask函数中已经去掉了SIGKILL 和 SIGSTOP :

int sys_ssetmask(int newmask)
{
	int old=current->blocked;
	current->blocked = newmask & ~(1<<(SIGKILL-1)) & ~(1<<(SIGSTOP-1));
	return old;
}

Linux V0.95a

修改对限时操作的BUG:

if ((*p)->timeout && (*p)->timeout < jiffies) {
	(*p)->timeout = 0;
	if ((*p)->state == TASK_INTERRUPTIBLE)
		(*p)->state = TASK_RUNNING;
}

==>

if ((*p)->timeout && (*p)->timeout < jiffies)
        if ((*p)->state == TASK_INTERRUPTIBLE) {
	(*p)->timeout = 0;
	(*p)->state = TASK_RUNNING;
}

Linux V0.95c

无相关修改

Linux V0.96a

添加了一项高优先级抢占功能:

当新唤醒的进程更优先的时候, 就重新调度.

实现:

1. scheduel 函数仅仅在开始加上一句代码:

need_resched = 0;

2. wake_up 函数检查新唤醒的进程优先级是否大于当前的. 是则设置need_resched = 1

void wake_up(struct task_struct **p)
{
	struct task_struct * wakeup_ptr, * tmp;

	if (p && *p) {
		wakeup_ptr = *p;
		*p = NULL;
		while (wakeup_ptr && wakeup_ptr != task[0]) {
			if (wakeup_ptr->state == TASK_ZOMBIE)
				printk("wake_up: TASK_ZOMBIE\n");
			else if (wakeup_ptr->state != TASK_STOPPED) {
				wakeup_ptr->state = TASK_RUNNING;
				if (wakeup_ptr->counter > current->counter)
					need_resched = 1;
			}
			tmp = wakeup_ptr->next_wait;
			wakeup_ptr->next_wait = task[0];
			wakeup_ptr = tmp;
		}
	}
}

3. system_call 检查need_resched, 若非0 就重新调度:

reschedule:
	pushl $ret_from_sys_call
	jmp _schedule
.align 2
_system_call:
	pushl %eax		# save orig_eax
	SAVE_ALL
	cmpl _NR_syscalls,%eax
	jae bad_sys_call
	call _sys_call_table(,%eax,4)
	movl %eax,EAX(%esp)		# save the return value
ret_from_sys_call:
	cmpw $0x0f,CS(%esp)		# was old code segment supervisor ?
	jne 2f
	cmpw $0x17,OLDSS(%esp)		# was stack segment = 0x17 ?
	jne 2f
1:	movl _current,%eax
	cmpl _task,%eax			# task[0] cannot have signals
	je 2
	cmpl $0,_need_resched           # 检查need_resched, 若非0 就重新调度
	jne reschedule
	cmpl $0,state(%eax)		# state
	jne reschedule
	cmpl $0,counter(%eax)		# counter
	je reschedule

Linux V0.96b

将定时器部分益处scheduel, 这部分功能由 do_timer 接管.

流程图:

源代码:

void schedule(void)
{
	int i,next,c;
	struct task_struct ** p;

/* check alarm, wake up any interruptible tasks that have got a signal */

	need_resched = 0;
	for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
		if (*p) {
			if ((*p)->timeout && (*p)->timeout < jiffies)
				if ((*p)->state == TASK_INTERRUPTIBLE) {
					(*p)->timeout = 0;
					(*p)->state = TASK_RUNNING;
				}
			if (((*p)->signal & ~(*p)->blocked) &&
			(*p)->state==TASK_INTERRUPTIBLE)
				(*p)->state=TASK_RUNNING;
		}

/* this is the scheduler proper: */

	while (1) {
		c = -1;
		next = 0;
		i = NR_TASKS;
		p = &task[NR_TASKS];
		while (--i) {
			if (!*--p)
				continue;
			if ((*p)->state == TASK_RUNNING && (*p)->counter > c)
				c = (*p)->counter, next = i;
		}
		if (c) break;
		for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
			if (*p)
				(*p)->counter = ((*p)->counter >> 1) +
						(*p)->priority;
	}
	switch_to(next);
}

Linux V0.96c

无相关修改

Linux V0.97

添加 wake_one_task接口 ,修了scheduel的BUG , 使得调度的时候被唤醒的高优先级的进程也可以抢占.

1. 新的schedule

void schedule(void)
{
	int i,next,c;
	struct task_struct ** p;

/* check alarm, wake up any interruptible tasks that have got a signal */

	need_resched = 0;
	for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
		if (*p) {
			if ((*p)->timeout && (*p)->timeout < jiffies)
				if ((*p)->state == TASK_INTERRUPTIBLE) {
					(*p)->timeout = 0;
					wake_one_task(*p);          // 原来直接改变p->state
				}
			if (((*p)->signal & ~(*p)->blocked) &&
			    (*p)->state==TASK_INTERRUPTIBLE)
				wake_one_task(*p);                  // 原来直接改变p->state
		}

/* this is the scheduler proper: */

	while (1) {
		c = -1;
		next = 0;
		i = NR_TASKS;
		p = &task[NR_TASKS];
		while (--i) {
			if (!*--p)
				continue;
			if ((*p)->state == TASK_RUNNING && (*p)->counter > c)
				c = (*p)->counter, next = i;
		}
		if (c)
			break;
		for(p = &LAST_TASK ; p > &FIRST_TASK ; --p)
			if (*p)
				(*p)->counter = ((*p)->counter >> 1) +
						(*p)->priority;
	}
	sti();            // 保证原子操作
	switch_to(next);
}

辅助接口 :

void wake_one_task(struct task_struct * p)
{
	p->state = TASK_RUNNING;
	if (p->counter > current->counter)
		need_resched = 1;
}

2. 定义了 wait_queue .在 sched.h 里面添加了接口 : add_wait_queue 和 remove_wait_queue . 使得这个环状链表的操作更美观.

接口代码:

extern inline void add_wait_queue(struct wait_queue ** p, struct wait_queue * wait)
{
	unsigned long flags;
	struct wait_queue * tmp;

	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=r" (flags));
	wait->next = *p;
	tmp = wait;
	while (tmp->next)
		if ((tmp = tmp->next)->next == *p)
			break;
	*p = tmp->next = wait;
	__asm__ __volatile__("pushl %0 ; popfl"::"r" (flags));
}

extern inline void remove_wait_queue(struct wait_queue ** p, struct wait_queue * wait)
{
	unsigned long flags;
	struct wait_queue * tmp;

	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=r" (flags));
	if (*p == wait)
		if ((*p = wait->next) == wait)
			*p = NULL;
	tmp = wait;
	while (tmp && tmp->next != wait)
		tmp = tmp->next;
	if (tmp)
		tmp->next = wait->next;
	wait->next = NULL;
	__asm__ __volatile__("pushl %0 ; popfl"::"r" (flags));
}

然后对对应的 __sleep_on 接口进行了修改

static inline void __sleep_on(struct wait_queue **p, int state)
{
	unsigned long flags;

	if (!p)
		return;
	if (current == task[0])
		panic("task[0] trying to sleep");
	if (current->wait.next)
		printk("__sleep_on: wait->next exists\n");
	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=r" (flags));
	current->state = state;
	add_wait_queue(p,¤t->wait);
	sti();
	schedule();
	remove_wait_queue(p,¤t->wait);
	__asm__("pushl %0 ; popfl"::"r" (flags));
}

3. wake_up 函数也有改变, 但是只是将 while 改成了 do .. . while .

Linux V1.0

首先, scheduel函数重新接管了定时器, 当然代码更加复杂了. 但是流程图回到了 V1.2 版.

其次, 为了产生更高效率的机器码, 使用 for(；;) 代替while , 使用goto 代替if

最后, 不再依靠遍历task数组来遍历所有的进程,改为遍历环状链表.

源代码:

asmlinkage void schedule(void)
{
	int c;
	struct task_struct * p;
	struct task_struct * next;
	unsigned long ticks;

/* check alarm, wake up any interruptible tasks that have got a signal */

	cli();
	ticks = itimer_ticks;
	itimer_ticks = 0;
	itimer_next = ~0;
	sti();
	need_resched = 0;
	p = &init_task;
	for (;;) {
		if ((p = p->next_task) == &init_task)
			goto confuse_gcc1;
		if (ticks && p->it_real_value) {
			if (p->it_real_value <= ticks) {
				send_sig(SIGALRM, p, 1);
				if (!p->it_real_incr) {
					p->it_real_value = 0;
					goto end_itimer;
				}
				do {
					p->it_real_value += p->it_real_incr;
				} while (p->it_real_value <= ticks);
			}
			p->it_real_value -= ticks;
			if (p->it_real_value < itimer_next)
				itimer_next = p->it_real_value;
		}
end_itimer:
		if (p->state != TASK_INTERRUPTIBLE)
			continue;
		if (p->signal & ~p->blocked) {
			p->state = TASK_RUNNING;
			continue;
		}
		if (p->timeout && p->timeout <= jiffies) {
			p->timeout = 0;
			p->state = TASK_RUNNING;
		}
	}
confuse_gcc1:

/* this is the scheduler proper: */
#if 0
	/* give processes that go to sleep a bit higher priority.. */
	/* This depends on the values for TASK_XXX */
	/* This gives smoother scheduling for some things, but */
	/* can be very unfair under some circumstances, so.. */
 	if (TASK_UNINTERRUPTIBLE >= (unsigned) current->state &&
	    current->counter < current->priority*2) {
		++current->counter;
	}
#endif
	c = -1;
	next = p = &init_task;
	for (;;) {
		if ((p = p->next_task) == &init_task)
			goto confuse_gcc2;
		if (p->state == TASK_RUNNING && p->counter > c)
			c = p->counter, next = p;
	}
confuse_gcc2:
	if (!c) {
		for_each_task(p)
			p->counter = (p->counter >> 1) + p->priority;
	}
	if(current != next)
		kstat.context_swtch++;
	switch_to(next);
	/* Now maybe reload the debug registers */
	if(current->debugreg[7]){
		loaddebug(0);
		loaddebug(1);
		loaddebug(2);
		loaddebug(3);
		loaddebug(6);
	};
}

Linux V1.1

加入了个对中断的输出:

	if (intr_count) {
		printk("Aiee: scheduling in interrupt\n");
		intr_count = 0;
	}

Linux V1.2

删除了system_call.s 文件. 添加了arch文件夹, 系统跳用移动到对应内核的entry.S

Linux V1.3

1. 添加了对 scheduel时处理tq_scheduler的支持.

run_task_queue(&tq_scheduler);

2. 每次统计当前运行的进程数目

nr_running = 0;
...
nr_running++;

Linux V2.0

大量的修改 , 先看源码 ,再一一解释:

asmlinkage void schedule(void)
{
	int c;
	struct task_struct * p;
	struct task_struct * prev, * next;
	unsigned long timeout = 0;
	int this_cpu=smp_processor_id();

/* check alarm, wake up any interruptible tasks that have got a signal */

	if (intr_count)
		goto scheduling_in_interrupt;

	if (bh_active & bh_mask) {
		intr_count = 1;
		do_bottom_half();         // 1. 添加了对 buttom half 的支持.
		intr_count = 0;
	}

	run_task_queue(&tq_scheduler);    

	need_resched = 0;
	prev = current;
	cli();
	/* move an exhausted RR process to be last.. */
	if (!prev->counter && prev->policy == SCHED_RR) {
		prev->counter = prev->priority;
		move_last_runqueue(prev);                   //2. 加入了runquene的概念
	}
	switch (prev->state) {
		case TASK_INTERRUPTIBLE:
			if (prev->signal & ~prev->blocked)
				goto makerunnable;
			timeout = prev->timeout;
			if (timeout && (timeout <= jiffies)) {
				prev->timeout = 0;
				timeout = 0;
		makerunnable:
				prev->state = TASK_RUNNING;
				break;
			}
		default:
			del_from_runqueue(prev);
		case TASK_RUNNING:
	}
	p = init_task.next_run;
	sti();
	
#ifdef __SMP__
	/*
	 *	This is safe as we do not permit re-entry of schedule()
	 */
	prev->processor = NO_PROC_ID;
#define idle_task (task[cpu_number_map[this_cpu]])
#else
#define idle_task (&init_task)
#endif	

/*
 * Note! there may appear new tasks on the run-queue during this, as
 * interrupts are enabled. However, they will be put on front of the
 * list, so our list starting at "p" is essentially fixed.
 */
/* this is the scheduler proper: */
	c = -1000;
	next = idle_task;
	while (p != &init_task) {
		int weight = goodness(p, prev, this_cpu);            //3. 新的优先级计算方式
		if (weight > c)
			c = weight, next = p;
		p = p->next_run;
	}

	/* if all runnable processes have "counter == 0", re-calculate counters */
	if (!c) {
		for_each_task(p)
			p->counter = (p->counter >> 1) + p->priority;
	}
#ifdef __SMP__                                                                // 4. 多CPU支持
	/*
	 *	Allocate process to CPU
	 */
	 
	 next->processor = this_cpu;
	 next->last_processor = this_cpu;
#endif	 
#ifdef __SMP_PROF__ 
	/* mark processor running an idle thread */
	if (0==next->pid)
		set_bit(this_cpu,&smp_idle_map);
	else
		clear_bit(this_cpu,&smp_idle_map);
#endif
	if (prev != next) {
		struct timer_list timer;

		kstat.context_swtch++;
		if (timeout) {
			init_timer(&timer);
			timer.expires = timeout;
			timer.data = (unsigned long) prev;
			timer.function = process_timeout;
			add_timer(&timer);
		}
		get_mmu_context(next);
		switch_to(prev,next);
		if (timeout)
			del_timer(&timer);
	}
	return;

scheduling_in_interrupt:
	printk("Aiee: scheduling in interrupt %p\n",
		__builtin_return_address(0));
}

1. 添加了对 buttom half 的支持.

利用 bh_active 和 bh_mask 两个掩码来记录软中断信息. 每次scheduel统一执行之 .

	if (bh_active & bh_mask) {
		intr_count = 1;
		do_bottom_half();
		intr_count = 0;
	}

2. 定义了runquene的概念(task_struct 加入俩指针) , 加入接口 add_to_runqueue , del_from_runqueue, move_last_runqueue , 来支持对runquene的支持.

其中除了双向环状链表的操作, 就是对多CPU的支持.

static inline void add_to_runqueue(struct task_struct * p)
{
#ifdef __SMP__
	int cpu=smp_processor_id();
#endif	
#if 1	/* sanity tests */
	if (p->next_run || p->prev_run) {
		printk("task already on run-queue\n");
		return;
	}
#endif
	if (p->counter > current->counter + 3)
		need_resched = 1;
	nr_running++;
	(p->prev_run = init_task.prev_run)->next_run = p;
	p->next_run = &init_task;
	init_task.prev_run = p;
#ifdef __SMP__
	/* this is safe only if called with cli()*/
	while(set_bit(31,&smp_process_available))
	{
		while(test_bit(31,&smp_process_available))
		{
			if(clear_bit(cpu,&smp_invalidate_needed))
			{
				local_flush_tlb();
				set_bit(cpu,&cpu_callin_map[0]);
			}
		}
	}
	smp_process_available++;
	clear_bit(31,&smp_process_available);
	if ((0!=p->pid) && smp_threads_ready)
	{
		int i;
		for (i=0;i<smp_num_cpus;i++)
		{
			if (0==current_set[cpu_logical_map[i]]->pid) 
			{
				smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);
				break;
			}
		}
	}
#endif
}

static inline void del_from_runqueue(struct task_struct * p)
{
	struct task_struct *next = p->next_run;
	struct task_struct *prev = p->prev_run;

#if 1	/* sanity tests */
	if (!next || !prev) {
		printk("task not on run-queue\n");
		return;
	}
#endif
	if (p == &init_task) {
		static int nr = 0;
		if (nr < 5) {
			nr++;
			printk("idle task may not sleep\n");
		}
		return;
	}
	nr_running--;
	next->prev_run = prev;
	prev->next_run = next;
	p->next_run = NULL;
	p->prev_run = NULL;
}

static inline void move_last_runqueue(struct task_struct * p)
{
	struct task_struct *next = p->next_run;
	struct task_struct *prev = p->prev_run;

	/* remove from list */
	next->prev_run = prev;
	prev->next_run = next;
	/* add back to list */
	p->next_run = &init_task;
	prev = init_task.prev_run;
	init_task.prev_run = p;
	p->prev_run = prev;
	prev->next_run = p;
}

3. 加入新的优先级计算方式.

加入了进程调度测略概念:

/*
 * Scheduling policies
 */
#define SCHED_OTHER        0 //一般的进程
#define SCHED_FIFO        1  // 实时进程, 一个进程执行完才执行另一个
#define SCHED_RR        2    // 实时进程, 固定执行时间片,轮转依次执行

添加了优先级计算接口 goodness

/*
 * This is the function that decides how desirable a process is..
 * You can weigh different processes against each other depending
 * on what CPU they've run on lately etc to try to handle cache
 * and TLB miss penalties.
 *
 * Return values:
 *	 -1000: never select this
 *	     0: out of time, recalculate counters (but it might still be
 *		selected)
 *	   +ve: "goodness" value (the larger, the better)
 *	 +1000: realtime process, select this.
 */
static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)
{
	int weight;

#ifdef __SMP__	
	/* We are not permitted to run a task someone else is running */
	if (p->processor != NO_PROC_ID)
		return -1000;                    // CPU 不支持直接
#ifdef PAST_2_0		
	/* This process is locked to a processor group */
	if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))
		return -1000;                    // 绑定特定CPU , 不需要当前CPU
#endif		
#endif

	/*
	 * Realtime process, select the first one on the
	 * runqueue (taking priorities within processes
	 * into account).
	 */
	if (p->policy != SCHED_OTHER)
		return 1000 + p->rt_priority;    // 实时程序, 立刻执行

	/*
	 * Give the process a first-approximation goodness value
	 * according to the number of clock-ticks it has left.
	 *
	 * Don't do any other calculations if the time slice is
	 * over..
	 */
	weight = p->counter;
	if (weight) {
			
#ifdef __SMP__
		/* Give a largish advantage to the same processor...   */
		/* (this is equivalent to penalizing other processors) */
		if (p->last_processor == this_cpu)
			weight += PROC_CHANGE_PENALTY;      // 同一CPU
#endif

		/* .. and a slight advantage to the current process */
		if (p == prev)
			weight += 1;                       // 还是上一个进程
	}

	return weight;
}

4. 支持多CPU调度

Linux V2.1

Linux V2.2

修改较多.

1. 添加CPU的的更多支持.

2. 对每个队列操作添加了信号量保护

源码:

asmlinkage void schedule(void)
{
	struct schedule_data * sched_data;
	struct task_struct * prev, * next;
	int this_cpu;

	prev = current;
	this_cpu = prev->processor;
	/*
	 * 'sched_data' is protected by the fact that we can run
	 * only one process per CPU.
	 */
	sched_data = & aligned_data[this_cpu].schedule_data;

	if (in_interrupt())
		goto scheduling_in_interrupt;
	release_kernel_lock(prev, this_cpu);

	/* Do "administrative" work here while we don't hold any locks */
	if (bh_active & bh_mask)
		do_bottom_half();
	run_task_queue(&tq_scheduler);

	spin_lock(&scheduler_lock);
	spin_lock_irq(&runqueue_lock);

	/* move an exhausted RR process to be last.. */
	prev->need_resched = 0;

	if (!prev->counter && prev->policy == SCHED_RR) {
		prev->counter = prev->priority;
		move_last_runqueue(prev);
	}

	switch (prev->state) {
		case TASK_INTERRUPTIBLE:
			if (signal_pending(prev)) {
				prev->state = TASK_RUNNING;
				break;
			}
		default:
			del_from_runqueue(prev);
		case TASK_RUNNING:
	}

	sched_data->prevstate = prev->state;

	{
		struct task_struct * p = init_task.next_run;
		/*
		 * This is subtle.
		 * Note how we can enable interrupts here, even
		 * though interrupts can add processes to the run-
		 * queue. This is because any new processes will
		 * be added to the front of the queue, so "p" above
		 * is a safe starting point.
		 * run-queue deletion and re-ordering is protected by
		 * the scheduler lock
		 */
		spin_unlock_irq(&runqueue_lock);
#ifdef __SMP__
		prev->has_cpu = 0;
#endif
	
/*
 * Note! there may appear new tasks on the run-queue during this, as
 * interrupts are enabled. However, they will be put on front of the
 * list, so our list starting at "p" is essentially fixed.
 */
/* this is the scheduler proper: */
		{
			int c = -1000;
			next = idle_task;
			while (p != &init_task) {
				if (can_schedule(p)) {
					int weight = goodness(p, prev, this_cpu);
					if (weight > c)
						c = weight, next = p;
				}
				p = p->next_run;
			}

			/* Do we need to re-calculate counters? */
			if (!c) {
				struct task_struct *p;
				read_lock(&tasklist_lock);
				for_each_task(p)
					p->counter = (p->counter >> 1) + p->priority;
				read_unlock(&tasklist_lock);
			}
		}
	}

 	/*
 	 * maintain the per-process 'average timeslice' value.
 	 * (this has to be recalculated even if we reschedule to
 	 * the same process) Currently this is only used on SMP:
 	 */
#ifdef __SMP__
	{
		cycles_t t, this_slice;

		t = get_cycles();
		this_slice = t - sched_data->last_schedule;
		sched_data->last_schedule = t;

		/*
		 * Simple, exponentially fading average calculation:
		 */
		prev->avg_slice = this_slice + prev->avg_slice;
		prev->avg_slice >>= 1;
	}

	/*
	 * We drop the scheduler lock early (it's a global spinlock),
	 * thus we have to lock the previous process from getting
	 * rescheduled during switch_to().
	 */
	prev->has_cpu = 1;

 	next->has_cpu = 1;
 	next->processor = this_cpu;
	spin_unlock(&scheduler_lock);
#endif /* __SMP__ */
 	if (prev != next) {
#ifdef __SMP__
		sched_data->prev = prev;
#endif
	 	kstat.context_swtch++;
		get_mmu_context(next);
		switch_to(prev,next);

		__schedule_tail();
	}
  
	reacquire_kernel_lock(current);
	return;

scheduling_in_interrupt:
	printk("Scheduling in interrupt\n");
	*(int *)0 = 0;
}

Linux V2.3

1. 对多CPU支持进行了更多修改, 这里不讨论.

2. scheduel函数本身逻辑并无太大修改,但是大量使用goto 替换原来的if { ... } \

3. 更多的使用信号量保护.

源码:

asmlinkage void schedule(void)
{
	struct schedule_data * sched_data;
	struct task_struct *prev, *next, *p;
	int this_cpu, c;

	if (tq_scheduler)
		goto handle_tq_scheduler;
tq_scheduler_back:

	prev = current;
	this_cpu = prev->processor;

	if (in_interrupt())
		goto scheduling_in_interrupt;

	release_kernel_lock(prev, this_cpu);

	/* Do "administrative" work here while we don't hold any locks */
	if (bh_mask & bh_active)
		goto handle_bh;
handle_bh_back:

	/*
	 * 'sched_data' is protected by the fact that we can run
	 * only one process per CPU.
	 */
	sched_data = & aligned_data[this_cpu].schedule_data;

	spin_lock_irq(&runqueue_lock);

	/* move an exhausted RR process to be last.. */
	if (prev->policy == SCHED_RR)
		goto move_rr_last;
move_rr_back:

	switch (prev->state) {
		case TASK_INTERRUPTIBLE:
			if (signal_pending(prev)) {
				prev->state = TASK_RUNNING;
				break;
			}
		default:
			del_from_runqueue(prev);
		case TASK_RUNNING:
	}
	prev->need_resched = 0;

repeat_schedule:

	/*
	 * this is the scheduler proper:
	 */

	p = init_task.next_run;
	/* Default process to select.. */
	next = idle_task(this_cpu);
	c = -1000;
	if (prev->state == TASK_RUNNING)
		goto still_running;
still_running_back:

	/*
	 * This is subtle.
	 * Note how we can enable interrupts here, even
	 * though interrupts can add processes to the run-
	 * queue. This is because any new processes will
	 * be added to the front of the queue, so "p" above
	 * is a safe starting point.
	 * run-queue deletion and re-ordering is protected by
	 * the scheduler lock
	 */
/*
 * Note! there may appear new tasks on the run-queue during this, as
 * interrupts are enabled. However, they will be put on front of the
 * list, so our list starting at "p" is essentially fixed.
 */
	while (p != &init_task) {
		if (can_schedule(p)) {
			int weight = goodness(prev, p, this_cpu);
			if (weight > c)
				c = weight, next = p;
		}
		p = p->next_run;
	}

	/* Do we need to re-calculate counters? */
	if (!c)
		goto recalculate;
	/*
	 * from this point on nothing can prevent us from
	 * switching to the next task, save this fact in
	 * sched_data.
	 */
	sched_data->curr = next;
#ifdef __SMP__
 	next->has_cpu = 1;
	next->processor = this_cpu;
#endif
	spin_unlock_irq(&runqueue_lock);

	if (prev == next)
		goto same_process;

#ifdef __SMP__
 	/*
 	 * maintain the per-process 'average timeslice' value.
 	 * (this has to be recalculated even if we reschedule to
 	 * the same process) Currently this is only used on SMP,
	 * and it's approximate, so we do not have to maintain
	 * it while holding the runqueue spinlock.
 	 */
	{
		cycles_t t, this_slice;

		t = get_cycles();
		this_slice = t - sched_data->last_schedule;
		sched_data->last_schedule = t;

		/*
		 * Exponentially fading average calculation, with
		 * some weight so it doesnt get fooled easily by
		 * smaller irregularities.
		 */
		prev->avg_slice = (this_slice*1 + prev->avg_slice*1)/2;
	}

	/*
	 * We drop the scheduler lock early (it's a global spinlock),
	 * thus we have to lock the previous process from getting
	 * rescheduled during switch_to().
	 */

#endif /* __SMP__ */

	kstat.context_swtch++;
	get_mmu_context(next);
	switch_to(prev, next, prev);
	__schedule_tail(prev);

same_process:
  
	reacquire_kernel_lock(current);
	return;

recalculate:
	{
		struct task_struct *p;
		spin_unlock_irq(&runqueue_lock);
		read_lock(&tasklist_lock);
		for_each_task(p)
			p->counter = (p->counter >> 1) + p->priority;
		read_unlock(&tasklist_lock);
		spin_lock_irq(&runqueue_lock);
		goto repeat_schedule;
	}

still_running:
	c = prev_goodness(prev, prev, this_cpu);
	next = prev;
	goto still_running_back;

handle_bh:
	do_bottom_half();
	goto handle_bh_back;

handle_tq_scheduler:
	run_task_queue(&tq_scheduler);
	goto tq_scheduler_back;

move_rr_last:
	if (!prev->counter) {
		prev->counter = prev->priority;
		move_last_runqueue(prev);
	}
	goto move_rr_back;

scheduling_in_interrupt:
	printk("Scheduling in interrupt\n");
	*(int *)0 = 0;
	return;
}

Linux V2.4

1. scheduel添加了对内存的判断, 进程至少要有有效内存

	if (!current->active_mm) BUG();

2. 添加了 prepare_to_switch 部分.

	prepare_to_switch();
	{
		struct mm_struct *mm = next->mm;
		struct mm_struct *oldmm = prev->active_mm;
		if (!mm) {
			if (next->active_mm) BUG();
			next->active_mm = oldmm;
			atomic_inc(&oldmm->mm_count);
			enter_lazy_tlb(oldmm, next, this_cpu);
		} else {
			if (next->active_mm != mm) BUG();
			switch_mm(oldmm, mm, next, this_cpu);
		}

		if (!prev->mm) {
			prev->active_mm = NULL;
			mmdrop(oldmm);
		}
	}

scheduel 源码 :

asmlinkage void schedule(void)
{
	struct schedule_data * sched_data;
	struct task_struct *prev, *next, *p;
	struct list_head *tmp;
	int this_cpu, c;

	if (!current->active_mm) BUG();
need_resched_back:
	prev = current;
	this_cpu = prev->processor;

	if (in_interrupt())
		goto scheduling_in_interrupt;

	release_kernel_lock(prev, this_cpu);

	/* Do "administrative" work here while we don't hold any locks */
	if (softirq_active(this_cpu) & softirq_mask(this_cpu))
		goto handle_softirq;
handle_softirq_back:

	/*
	 * 'sched_data' is protected by the fact that we can run
	 * only one process per CPU.
	 */
	sched_data = & aligned_data[this_cpu].schedule_data;

	spin_lock_irq(&runqueue_lock);

	/* move an exhausted RR process to be last.. */
	if (prev->policy == SCHED_RR)
		goto move_rr_last;
move_rr_back:

	switch (prev->state) {
		case TASK_INTERRUPTIBLE:
			if (signal_pending(prev)) {
				prev->state = TASK_RUNNING;
				break;
			}
		default:
			del_from_runqueue(prev);
		case TASK_RUNNING:
	}
	prev->need_resched = 0;

	/*
	 * this is the scheduler proper:
	 */

repeat_schedule:
	/*
	 * Default process to select..
	 */
	next = idle_task(this_cpu);
	c = -1000;
	if (prev->state == TASK_RUNNING)
		goto still_running;

still_running_back:
	list_for_each(tmp, &runqueue_head) {
		p = list_entry(tmp, struct task_struct, run_list);
		if (can_schedule(p, this_cpu)) {
			int weight = goodness(p, this_cpu, prev->active_mm);
			if (weight > c)
				c = weight, next = p;
		}
	}

	/* Do we need to re-calculate counters? */
	if (!c)
		goto recalculate;
	/*
	 * from this point on nothing can prevent us from
	 * switching to the next task, save this fact in
	 * sched_data.
	 */
	sched_data->curr = next;
#ifdef CONFIG_SMP
 	next->has_cpu = 1;
	next->processor = this_cpu;
#endif
	spin_unlock_irq(&runqueue_lock);

	if (prev == next)
		goto same_process;

#ifdef CONFIG_SMP
 	/*
 	 * maintain the per-process 'last schedule' value.
 	 * (this has to be recalculated even if we reschedule to
 	 * the same process) Currently this is only used on SMP,
	 * and it's approximate, so we do not have to maintain
	 * it while holding the runqueue spinlock.
 	 */
 	sched_data->last_schedule = get_cycles();

	/*
	 * We drop the scheduler lock early (it's a global spinlock),
	 * thus we have to lock the previous process from getting
	 * rescheduled during switch_to().
	 */

#endif /* CONFIG_SMP */

	kstat.context_swtch++;
	/*
	 * there are 3 processes which are affected by a context switch:
	 *
	 * prev == .... ==> (last => next)
	 *
	 * It's the 'much more previous' 'prev' that is on next's stack,
	 * but prev is set to (the just run) 'last' process by switch_to().
	 * This might sound slightly confusing but makes tons of sense.
	 */
	prepare_to_switch();
	{
		struct mm_struct *mm = next->mm;
		struct mm_struct *oldmm = prev->active_mm;
		if (!mm) {
			if (next->active_mm) BUG();
			next->active_mm = oldmm;
			atomic_inc(&oldmm->mm_count);
			enter_lazy_tlb(oldmm, next, this_cpu);
		} else {
			if (next->active_mm != mm) BUG();
			switch_mm(oldmm, mm, next, this_cpu);
		}

		if (!prev->mm) {
			prev->active_mm = NULL;
			mmdrop(oldmm);
		}
	}

	/*
	 * This just switches the register state and the
	 * stack.
	 */
	switch_to(prev, next, prev);
	__schedule_tail(prev);

same_process:
	reacquire_kernel_lock(current);
	if (current->need_resched)
		goto need_resched_back;

	return;

recalculate:
	{
		struct task_struct *p;
		spin_unlock_irq(&runqueue_lock);
		read_lock(&tasklist_lock);
		for_each_task(p)
			p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
		read_unlock(&tasklist_lock);
		spin_lock_irq(&runqueue_lock);
	}
	goto repeat_schedule;

still_running:
	c = goodness(prev, this_cpu, prev->active_mm);
	next = prev;
	goto still_running_back;

handle_softirq:
	do_softirq();
	goto handle_softirq_back;

move_rr_last:
	if (!prev->counter) {
		prev->counter = NICE_TO_TICKS(prev->nice);
		move_last_runqueue(prev);
	}
	goto move_rr_back;

scheduling_in_interrupt:
	printk("Scheduling in interrupt\n");
	BUG();
	return;
}

Linux V2.5

1. 不再大量的使用 goto , 作为替代对 if 使用 unlikely .

源码:

asmlinkage void schedule(void)
{
	struct schedule_data * sched_data;
	struct task_struct *prev, *next, *p;
	struct list_head *tmp;
	int this_cpu, c;


	spin_lock_prefetch(&runqueue_lock);

	if (!current->active_mm) BUG();
need_resched_back:
	prev = current;
	this_cpu = prev->processor;

	if (unlikely(in_interrupt())) {
		printk("Scheduling in interrupt\n");
		BUG();
	}

	release_kernel_lock(prev, this_cpu);

	/*
	 * 'sched_data' is protected by the fact that we can run
	 * only one process per CPU.
	 */
	sched_data = & aligned_data[this_cpu].schedule_data;

	spin_lock_irq(&runqueue_lock);

	/* move an exhausted RR process to be last.. */
	if (unlikely(prev->policy == SCHED_RR))
		if (!prev->counter) {
			prev->counter = NICE_TO_TICKS(prev->nice);
			move_last_runqueue(prev);
		}

	switch (prev->state) {
		case TASK_INTERRUPTIBLE:
			if (signal_pending(prev)) {
				prev->state = TASK_RUNNING;
				break;
			}
		default:
			del_from_runqueue(prev);
		case TASK_RUNNING:;
	}
	prev->need_resched = 0;

	/*
	 * this is the scheduler proper:
	 */

repeat_schedule:
	/*
	 * Default process to select..
	 */
	next = idle_task(this_cpu);
	c = -1000;
	list_for_each(tmp, &runqueue_head) {
		p = list_entry(tmp, struct task_struct, run_list);
		if (can_schedule(p, this_cpu)) {
			int weight = goodness(p, this_cpu, prev->active_mm);
			if (weight > c)
				c = weight, next = p;
		}
	}

	/* Do we need to re-calculate counters? */
	if (unlikely(!c)) {
		struct task_struct *p;

		spin_unlock_irq(&runqueue_lock);
		read_lock(&tasklist_lock);
		for_each_task(p)
			p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
		read_unlock(&tasklist_lock);
		spin_lock_irq(&runqueue_lock);
		goto repeat_schedule;
	}

	/*
	 * from this point on nothing can prevent us from
	 * switching to the next task, save this fact in
	 * sched_data.
	 */
	sched_data->curr = next;
	task_set_cpu(next, this_cpu);
	spin_unlock_irq(&runqueue_lock);

	if (unlikely(prev == next)) {
		/* We won't go through the normal tail, so do this by hand */
		prev->policy &= ~SCHED_YIELD;
		goto same_process;
	}

#ifdef CONFIG_SMP
 	/*
 	 * maintain the per-process 'last schedule' value.
 	 * (this has to be recalculated even if we reschedule to
 	 * the same process) Currently this is only used on SMP,
	 * and it's approximate, so we do not have to maintain
	 * it while holding the runqueue spinlock.
 	 */
 	sched_data->last_schedule = get_cycles();

	/*
	 * We drop the scheduler lock early (it's a global spinlock),
	 * thus we have to lock the previous process from getting
	 * rescheduled during switch_to().
	 */

#endif /* CONFIG_SMP */

	kstat.context_swtch++;
	/*
	 * there are 3 processes which are affected by a context switch:
	 *
	 * prev == .... ==> (last => next)
	 *
	 * It's the 'much more previous' 'prev' that is on next's stack,
	 * but prev is set to (the just run) 'last' process by switch_to().
	 * This might sound slightly confusing but makes tons of sense.
	 */
	prepare_to_switch();
	{
		struct mm_struct *mm = next->mm;
		struct mm_struct *oldmm = prev->active_mm;
		if (!mm) {
			if (next->active_mm) BUG();
			next->active_mm = oldmm;
			atomic_inc(&oldmm->mm_count);
			enter_lazy_tlb(oldmm, next, this_cpu);
		} else {
			if (next->active_mm != mm) BUG();
			switch_mm(oldmm, mm, next, this_cpu);
		}

		if (!prev->mm) {
			prev->active_mm = NULL;
			mmdrop(oldmm);
		}
	}

	/*
	 * This just switches the register state and the
	 * stack.
	 */
	switch_to(prev, next, prev);
	__schedule_tail(prev);

same_process:
	reacquire_kernel_lock(current);
	if (current->need_resched)
		goto need_resched_back;
	return;
}