1 重要结构体介绍(定义进程块)
#define MAX_TASK_NUM 4
#define KERNEL_STACK_SIZE 1024*8
struct Thread {
unsigned long ip; //用于存放eip
unsigned long sp; //用于存放esp
};
----------------------
//定义进程管理相关的数据结构
typedef struct PCB {
int pid; //进程ID
volatile long state; //进程状态
char stack[KERNEL_STACK_SIZE]; //内核堆栈
struct Thread thread; //出入口寄存器存放
unsigned long task_entry; //函数入口
struct PCB *next; //进程链表指针
}tPCB;
2 内核代码入口my_kernel分析
mykernel主要做了初始化所有进程,并启动进程0。先贴代码
void __init my_start_kernel(void)
{
int pid = 0;
int i;
/* Initialize process 0*/
task[pid].pid = pid;
task[pid].state = 0;/* -1 unrunnable, 0 runnable, >0 stopped */
task[pid].task_entry = task[pid].thread.ip = (unsigned long)my_process;
task[pid].thread.sp = (unsigned long)&task[pid].stack[KERNEL_STACK_SIZE-1];
task[pid].next = &task[pid];
/*fork more process */
for(i=1;i<MAX_TASK_NUM;i++)
{
memcpy(&task[i],&task[0],sizeof(tPCB));
task[i].pid = i;
task[i].thread.sp = (unsigned long)(&task[i].stack[KERNEL_STACK_SIZE-1]);
task[i].next = task[i-1].next;
task[i-1].next = &task[i];
}
/* start process 0 by task[0] */
pid = 0;
my_current_task = &task[pid];
asm volatile(
"movl %1,%%esp\n\t" /* set task[pid].thread.sp to esp 将进程原堆栈栈顶到地址(这里是初始化到值)存入esp寄存器*/
"pushl %1\n\t" /* push rbp 将当前ebp寄存器值入栈*/
"pushl %0\n\t" /* push task[pid].thread.ip 将当前进程到eip(这里是初始化到值)入栈*/
"ret\n\t" /* pop task[pid].thread.ip to rip ret命令正好可以让入栈到进程eip保存到eip寄存器中*/
:
: "c" (task[pid].thread.ip),"d" (task[pid].thread.sp) /* input c or d mean %ecx/%edx*/
);
}
- a.上面代码可以看出前20行,初始化进程了4个进程,并加入链表方便在进程调度时切换进程。这不是my_kernel的核心就不赘述了。
- b.从20行后是那四句汇编是my_kernel的核心代码,作用是启动执行0进程。请看下面堆栈变化过程分析。
2.1 进程初始化堆栈变化过程
-
将esp寄存器指向进程0的堆栈栈底,stask[0].thread.sp初始值即为进程0的堆栈栈底。
-
将当前ebp寄存器的值入栈,因为是空栈,所以esp和ebp相同。这里简化起见,直接使用进程的堆栈栈顶的值task[0].thread.sp,相应的esp寄存器指向的位置也发生变化。
- 将当前进程的eip(这里是初始化的值my_process(void)函数的位置)入栈,相应的esp寄存器指向的位置也发生了变化。
- ret指令将栈顶位置的task[0].thread.ip,也就是my_process(void)函数的位置放入eip寄存器中,相应的esp寄存器指向的位置也发生了变化。
- 可能上面的描述听着还有些不明白我们来复习一下ret指令。
- ret指令可以分解为:
- popl %eip,popl指令又可以分解为
- movl (%esp), %eip ——>eip = *(uint32_t)esp
- addl $4, %esp ——>esp = esp + 4
- popl %eip,popl指令又可以分解为
- eip永远指向cpu的下一条指令,此时eip的值是task[0].thread.ip,也就是my_process(void)函数的位置。故ret完后就马上会运行my_process,达到启动0进程的目的。
3 进程调度代码分析
- 接下来运行my_process函数每1000000次判断一次是否需要调用my_schedule进行进程主动调度。
void my_process(void)
{
while(1) {
i++;
if(i%1000000 == 0) {
printk(KERN_NOTICE "this is process %d -\n",my_current_task->pid);
if(my_need_sched == 1) {
my_need_sched = 0;
my_schedule();
}
printk(KERN_NOTICE "this is process %d +\n",my_current_task->pid);
}
}
}
- my_need_sched标志位由定时器中断改变,设置时间片的大小,时间片用完时设置一下调度标志my_need_sched。
void my_timer_handler(void)
{
if(time_count%1000 == 0 && my_need_sched != 1) {
printk(KERN_NOTICE ">>>my_timer_handler here<<<\n");
my_need_sched = 1;
}
time_count ++ ;
return;
}
- 当my_need_sched置1后调用my_schedule进行进程调度,先贴代码,理解下面进程调度代码的关键是理解这些汇编做了什么。
void my_schedule(void)
{
tPCB * next;
tPCB * prev;
if(my_current_task == NULL
|| my_current_task->next == NULL)
{
return;
}
printk(KERN_NOTICE ">>>my_schedule<<<\n");
/* schedule */
next = my_current_task->next;
prev = my_current_task;
if(next->state == 0)/* -1 unrunnable, 0 runnable, >0 stopped */
{
my_current_task = next;
printk(KERN_NOTICE ">>>switch %d to %d<<<\n",prev->pid,next->pid);
/* switch to next process */
asm volatile(
"pushl %%ebp\n\t" /* save ebp of prev 保存当前ebp到堆栈中*/
"movl %%esp,%0\n\t" /* save esp of prev 保存当前esp到当前进程pcb中*/
"movl %2,%%esp\n\t" /* restore rsp of next 将next进程到堆栈栈顶到值存在esp寄存器*/
"movl $cur, %1\n\t" /* save eip of prev 保存当前进程到eip值,下次恢复进程后在标号1开始执行*/
"pushl %3\n\t" /* 将next进程继续执行到代码位置(标号1)压栈*/
"ret\n\t" /* restore eip of next 出栈标号1到eip寄存器*/
"cur:\t" /* next process start here 标号1,即next进程开始执行的位置*/
"popl %%ebp\n\t" /*恢复ebp寄存器的值*/
: "=m" (prev->thread.sp),"=m" (prev->thread.ip)
: "m" (next->thread.sp),"m" (next->thread.ip)
);
}
return;
}
3.1 进程调度时堆栈的变化过程。
3.1.1 调用push %%esp时的堆栈状态
3.1.2 pushl %%esp 保存process 0进程上下文,将process 0的ebp寄存器压栈保存起来,注意此时ebp寄存器指向my_schedule函数的栈顶,这也很好理解因为此时进程0刚好执行到my_schedule,my_scheule也有自己的堆栈。
3.1.3 movl %%esp, %0 把当前esp的值保存到prev->thread.sp,作用就是保存进程0上文。由于prev指向的是全局变量所有当前堆栈无变化。
prev->thread.sp = my_schedule 0(ebp)
3.1.4 movl %2, %%esp,将next->thread.sp放入esp寄存器,即将工作堆栈切换到next进程。
3.1.5 movl $cur, %1, 将标号cur保存到prev->thread.ip,即保存进程0的下文。prev是指向全局变量所以堆栈没变化。
3.1.6 pushl %3 将进程1的入口函数压栈
3.1.7 ret 进程1的入口赋值给eip,有没有很熟悉在初始化进程的时候也是用同样的套路执行进程0的。
eip = my_process1
- 至此已完成了进程0——>进程1的切换,从上面的堆栈示意图中可以看出,这跟我们传统理解的堆栈排布有些不一样。ebp没有指向进程1的栈顶,其实这是没关系的,因为在调用my_process1的时候会重新给ebp和esp赋值。如果为了规范,可以在movl %2, %%esp后面加一句movl %2, %%ebp(个人感觉比较鸡肋(汇编代码要尽量刀刀到肉))。
3.2 进程1执行过程发生调度,恢复进程0上下文并执行进程0
- 为了简便,假设系统只有两个进程,分别是进程0和进程1。进程0有内核启动时初始化执行,然后需要进行调度,开始执行进程1。那下面从进程1被调度开始分析堆栈变化,因为进程1从来没有被执行过,时第一次被调度执行,此时堆栈的变化。
3.2.1 执行 pushl %%ebp前堆栈的状态
3.2.2 pushl %%ebp 保存process 1进程上下文,将process 1的ebp寄存器压栈保存起来,注意此时ebp寄存器指向my_schedule函数的栈顶。
3.2.3 movl %%esp, %0 把当前esp的值保存到prev->thread.sp,作用就是保存进程0上文。此时prev指的是进程1,next指的是进程0。由于prev指向的是全局变量所有当前堆栈无变化。
prev->thread.sp = my_schedule 1(ebp)
3.2.4 movl %2, %%esp,将next->thread.sp放入esp寄存器,即:将工作堆栈切换到next进程(进程0)而进程0已经运行过一次next->thread.sp等于my_schedule0(ebp)。
3.2.5 movl $cur, %1 将标号cur保存到prev->thread.ip,即保存进程1的下文。prev(进程1)是指向全局变量所以堆栈没变化。
3.2.6 push %3 将next->thread.ip压栈,而next->thread.ip等于标号cur
3.2.7 ret 将标号cur出栈赋给eip。
eip = $cur
3.2.8 执行标号cur即,popl %%ebp
ebp = my_schedule(ebp)
- 至此就恢复了进程0的上下文环境,进程0继续执行剩下的代码。多个进程也是一样的分析方法这里就不展开了。
4 学习视频:
5 代码补丁
From 0e09ae188864056ba96fa5dc2ec13818d34d9c22 Mon Sep 17 00:00:00 2001
From: weidonghui <weidonghui@allwinnertech.com>
Date: Tue, 26 May 2020 00:19:42 +0800
Subject: [PATCH] process scheduling and process context switching
To: shuge <shuge@allwinnertech.com>
Cc: kevin <kevin@allwinnertech.com>,
sunny <sunny@allwinnertech.com>,
leafy <leafy.myeh@allwinnertech.com>,
liugang <liugang@allwinnertech.com>,
others
Signed-off-by: weidonghui <weidonghui@allwinnertech.com>
---
mykernel/myinterrupt.c | 87 ++++++++++++++++++++------------
mykernel/mymain.c | 131 ++++++++++++++++++++++---------------------------
mykernel/mypcb.h | 28 +++++++++++
3 files changed, 144 insertions(+), 102 deletions(-)
create mode 100644 mykernel/mypcb.h
diff --git a/mykernel/myinterrupt.c b/mykernel/myinterrupt.c
index a7433dc..19cacaf 100644
--- a/mykernel/myinterrupt.c
+++ b/mykernel/myinterrupt.c
@@ -2,46 +2,71 @@
* linux/mykernel/myinterrupt.c
*
* Kernel internal my_timer_handler
+ * Change IA32 to x86-64 arch, 2020/4/26
*
- * Copyright (C) 2013 Mengning
+ * Copyright (C) 2013, 2020 Mengning
*
*/
-#include <linux/kernel_stat.h>
-#include <linux/export.h>
-#include <linux/interrupt.h>
-#include <linux/percpu.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/pid_namespace.h>
-#include <linux/notifier.h>
-#include <linux/thread_info.h>
-#include <linux/time.h>
-#include <linux/jiffies.h>
-#include <linux/posix-timers.h>
-#include <linux/cpu.h>
-#include <linux/syscalls.h>
-#include <linux/delay.h>
-#include <linux/tick.h>
-#include <linux/kallsyms.h>
-#include <linux/irq_work.h>
-#include <linux/sched.h>
-#include <linux/sched/sysctl.h>
-#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/tty.h>
+#include <linux/vmalloc.h>
-#include <asm/uaccess.h>
-#include <asm/unistd.h>
-#include <asm/div64.h>
-#include <asm/timex.h>
-#include <asm/io.h>
+#include "mypcb.h"
-#define CREATE_TRACE_POINTS
-#include <trace/events/timer.h>
+extern tPCB task[MAX_TASK_NUM];
+extern tPCB * my_current_task;
+extern volatile int my_need_sched;
+volatile int time_count = 0;
/*
* Called by timer interrupt.
+ * it runs in the name of current running process,
+ * so it use kernel stack of current running process
*/
void my_timer_handler(void)
{
- printk(KERN_NOTICE "\n>>>>>>>>>>>>>>>>>my_timer_handler here<<<<<<<<<<<<<<<<<<\n\n");
+ if(time_count%1000 == 0 && my_need_sched != 1)
+ {
+ printk(KERN_NOTICE ">>>my_timer_handler here<<<\n");
+ my_need_sched = 1;
+ }
+ time_count ++ ;
+ return;
+}
+
+void my_schedule(void)
+{
+ tPCB * next;
+ tPCB * prev;
+
+ if(my_current_task == NULL
+ || my_current_task->next == NULL)
+ {
+ return;
+ }
+ printk(KERN_NOTICE ">>>my_schedule<<<\n");
+ /* schedule */
+ next = my_current_task->next;
+ prev = my_current_task;
+ if(next->state == 0)/* -1 unrunnable, 0 runnable, >0 stopped */
+ {
+ my_current_task = next;
+ printk(KERN_NOTICE ">>>switch %d to %d<<<\n",prev->pid,next->pid);
+ /* switch to next process */
+ asm volatile(
+ "pushl %%ebp\n\t" /* save ebp of prev 保存当前ebp到堆栈中*/
+ "movl %%esp,%0\n\t" /* save esp of prev 保存当前esp到当前进程pcb中*/
+ "movl %2,%%esp\n\t" /* restore rsp of next 将next进程到堆栈栈顶到值存在esp寄存器*/
+ "movl $cur, %1\n\t" /* save eip of prev 保存当前进程到eip值,下次恢复进程后在标号1开始执行*/
+ "pushl %3\n\t" /* 将next进程继续执行到代码位置(标号1)压栈*/
+ "ret\n\t" /* restore eip of next 出栈标号1到eip寄存器*/
+ "cur:\t" /* next process start here 标号1,即next进程开始执行的位置*/
+ "popl %%ebp\n\t" /*恢复ebp寄存器的值*/
+ : "=m" (prev->thread.sp),"=m" (prev->thread.ip)
+ : "m" (next->thread.sp),"m" (next->thread.ip)
+ );
+ }
+ return;
}
diff --git a/mykernel/mymain.c b/mykernel/mymain.c
index a1551a3..9337838 100644
--- a/mykernel/mymain.c
+++ b/mykernel/mymain.c
@@ -2,92 +2,81 @@
* linux/mykernel/mymain.c
*
* Kernel internal my_start_kernel
+ * Change IA32 to x86-64 arch, 2020/4/26
*
- * Copyright (C) 2013 Mengning
+ * Copyright (C) 2013, 2020 Mengning
*
*/
#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/kernel.h>
-#include <linux/syscalls.h>
-#include <linux/stackprotector.h>
#include <linux/string.h>
#include <linux/ctype.h>
-#include <linux/delay.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/initrd.h>
-#include <linux/bootmem.h>
-#include <linux/acpi.h>
#include <linux/tty.h>
-#include <linux/percpu.h>
-#include <linux/kmod.h>
#include <linux/vmalloc.h>
-#include <linux/kernel_stat.h>
-#include <linux/start_kernel.h>
-#include <linux/security.h>
-#include <linux/smp.h>
-#include <linux/profile.h>
-#include <linux/rcupdate.h>
-#include <linux/moduleparam.h>
-#include <linux/kallsyms.h>
-#include <linux/writeback.h>
-#include <linux/cpu.h>
-#include <linux/cpuset.h>
-#include <linux/cgroup.h>
-#include <linux/efi.h>
-#include <linux/tick.h>
-#include <linux/interrupt.h>
-#include <linux/taskstats_kern.h>
-#include <linux/delayacct.h>
-#include <linux/unistd.h>
-#include <linux/rmap.h>
-#include <linux/mempolicy.h>
-#include <linux/key.h>
-#include <linux/buffer_head.h>
-#include <linux/page_cgroup.h>
-#include <linux/debug_locks.h>
-#include <linux/debugobjects.h>
-#include <linux/lockdep.h>
-#include <linux/kmemleak.h>
-#include <linux/pid_namespace.h>
-#include <linux/device.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/idr.h>
-#include <linux/kgdb.h>
-#include <linux/ftrace.h>
-#include <linux/async.h>
-#include <linux/kmemcheck.h>
-#include <linux/sfi.h>
-#include <linux/shmem_fs.h>
-#include <linux/slab.h>
-#include <linux/perf_event.h>
-#include <linux/file.h>
-#include <linux/ptrace.h>
-#include <linux/blkdev.h>
-#include <linux/elevator.h>
-#include <asm/io.h>
-#include <asm/bugs.h>
-#include <asm/setup.h>
-#include <asm/sections.h>
-#include <asm/cacheflush.h>
-#ifdef CONFIG_X86_LOCAL_APIC
-#include <asm/smp.h>
-#endif
+#include "mypcb.h"
+
+tPCB task[MAX_TASK_NUM];
+tPCB * my_current_task = NULL;
+volatile int my_need_sched = 0;
+
+void my_process(void);
+
void __init my_start_kernel(void)
{
- int i = 0;
+ int pid = 0;
+ int i;
+ /* Initialize process 0*/
+ task[pid].pid = pid;
+ task[pid].state = 0;/* -1 unrunnable, 0 runnable, >0 stopped */
+ task[pid].task_entry = task[pid].thread.ip = (unsigned long)my_process;
+ task[pid].thread.sp = (unsigned long)&task[pid].stack[KERNEL_STACK_SIZE-1];
+ task[pid].next = &task[pid];
+ /*fork more process */
+ for(i=1;i<MAX_TASK_NUM;i++)
+ {
+ memcpy(&task[i],&task[0],sizeof(tPCB));
+ task[i].pid = i;
+ task[i].thread.sp = (unsigned long)(&task[i].stack[KERNEL_STACK_SIZE-1]);
+ task[i].next = task[i-1].next;
+ task[i-1].next = &task[i];
+ }
+ /* start process 0 by task[0] */
+ pid = 0;
+ my_current_task = &task[pid];
+ asm volatile(
+ "movl %1,%%esp\n\t" /* set task[pid].thread.sp to esp 将进程原堆栈栈顶到地址(这里是初始化到值)存入esp寄存器*/
+ "pushl %1\n\t" /* push rbp 将当前ebp寄存器值入栈*/
+ "pushl %0\n\t" /* push task[pid].thread.ip 将当前进程到eip(这里是初始化到值)入栈*/
+ "ret\n\t" /* pop task[pid].thread.ip to rip ret命令正好可以让入栈到进程eip保存到eip寄存器中*/
+ :
+ : "c" (task[pid].thread.ip),"d" (task[pid].thread.sp) /* input c or d mean %ecx/%edx*/
+ );
+}
+
+int i = 0;
+
+void my_process(void)
+{
+ int inied_pid=0xff;
while(1)
{
i++;
- if(i%100000 == 0)
- printk(KERN_NOTICE "my_start_kernel here %d \n",i);
-
+ if(i%10000000 == 0)
+ {
+ if (inied_pid != my_current_task->pid)
+ printk(KERN_NOTICE "this is process %d entry\n",my_current_task->pid);
+ if(my_need_sched == 1)
+ {
+ my_need_sched = 0;
+ my_schedule();
+ printk(KERN_NOTICE "cur process %d sched finish\n", my_current_task->pid);
+ }
+ if (inied_pid != my_current_task->pid) {
+ printk(KERN_NOTICE "this is process %d end\n",my_current_task->pid);
+ inied_pid = my_current_task->pid;
+ }
+ }
}
}
diff --git a/mykernel/mypcb.h b/mykernel/mypcb.h
new file mode 100644
index 0000000..e5bfa16
--- /dev/null
+++ b/mykernel/mypcb.h
@@ -0,0 +1,28 @@
+/* linux/mykernel/mypcb.h
+ *
+ * Kernel internal PCB types
+ *
+ * Copyright (C) 2013 Menging
+ * */
+
+#define MAX_TASK_NUM 4
+#define KERNEL_STACK_SIZE 1024*8
+
+
+/* CPU-specifice state of this task */
+
+struct Thread {
+ unsigned long ip;
+ unsigned long sp;
+};
+
+typedef struct PCB {
+ int pid;
+ volatile long state; /*-1 unrunnable, 0 runnable, >0 stopped*/
+ char stack[KERNEL_STACK_SIZE]; /* CPU-specifice state of this task */
+ struct Thread thread;
+ unsigned long task_entry;
+ struct PCB *next;
+}tPCB;
+
+void my_schedule(void);
--
2.7.4