白杰 + 原创作品转载请注明出处 + 《Linux内核分析》MOOC课程http://mooc.study.163.com/course/USTC-1000029000
一、理论分析
- 进程控制块PCB,OS中最重要的数据结构之一,存放OS用于描述进程情况及控制进程运行所需的全部信息。Linux内核中PCB是一个由task_struct定义的数据结构,定义在:
/include/linux/sched.h
中,主要包含以下内容:
- 进程状态
- 进程的内核堆栈
- 进程的标志位
- 运行队列
- 进程优先级
- 进程调度
- 进程链表
- 进程地址空间
- 进程内存管理
- 进程标识符
- 父子进程关系
- 进程间通信
- 进程切换
- 文件系统
- 其他信息
- 相关代码如下:
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped, 用来描述进程的状态*/
void *stack; /*指定进程的内核堆栈*/
atomic_t usage;
unsigned int flags; /* per process flags, defined below */
unsigned int ptrace;
...
```
int on_rq; /*运行队列*/
/*优先级相关*/
int prio, static_prio, normal_prio;
unsigned int rt_priority;
/*进程调度*/
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
...
...
struct list_head tasks; /*所有进程的链表*/
struct mm_struct *mm, *active_mm; /*进程地址空间和内存管理,每个进程又自己独立的地址空间*/
...
...
/*定义进程标识符*/
pid_t pid;
pid_t tgid;
...
...
/*进程父子关系管理*/
1337 /*
1338 * pointers to (original) parent process, youngest child, younger sibling,
1339 * older sibling, respectively. (p->father can be replaced with
1340 * p->real_parent->pid)
1341 */
1342 struct task_struct __rcu *real_parent; /* real parent process */
1343 struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
1344 /*
1345 * children/sibling forms the list of my natural children
1346 */
1347 struct list_head children; /* list of my children */
1348 struct list_head sibling; /* linkage in my parents children list */
1349 struct task_struct *group_leader; /* threadgroup leader */
1350 /*ptrace用作调试用*/
1351 /*
1352 * ptraced is the list of tasks this task is using ptrace on.
1353 * This includes both natural children and PTRACE_ATTACH targets.
1354 * p->ptrace_entry is ps link on the p->parent->ptraced list.
1355 */
1356 struct list_head ptraced;
1357 struct list_head ptrace_entry;
1358
1359 /* PID/PID hash table linkage. */
1360 struct pid_link pids[PIDTYPE_MAX];
1361 struct list_head thread_group;
1362 struct list_head thread_node;
1363
1364 struct completion *vfork_done; /* for vfork() */
1365 int __user *set_child_tid; /* CLONE_CHILD_SETTID */
1366 int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
1367 /*时间相关代码*/
1368 cputime_t utime, stime, utimescaled, stimescaled;
1369 cputime_t gtime;
...
...
1411/* CPU-specific state of this task */
1412 struct thread_struct thread; /*在进程切换时起作用*/
1413/* filesystem information */
1414 struct fs_struct *fs;
1415/* open file information */
1416 struct files_struct *files;
1417/* namespaces */
1418 struct nsproxy *nsproxy;
1419/* signal handlers */
1420 struct signal_struct *signal;
1421 struct sighand_struct *sighand;
1422
1423 sigset_t blocked, real_blocked;
1424 sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
1425 struct sigpending pending;
1426
1427 unsigned long sas_ss_sp;
1428 size_t sas_ss_size;
1429 int (*notifier)(void *priv);
1430 void *notifier_data;
1431 sigset_t *notifier_mask;
1432 struct callback_head *task_works;
1433
1434 struct audit_context *audit_context;
1435 ...
...
1595 /*
1596 * cache last used pipe for splice
1597 */
1598 struct pipe_inode_info *splice_pipe;
1599
1600 struct page_frag task_frag;
...
};
内核中进程的状态转换如下图:
在/include/linux/sched.h中定义了很多task的状态,主要的几个状态如下:
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
- 子进程的创建
fork, vfork和clone都是用户态用来创建一个子进程的系统调用,都是通过调用do_fork()来创建新进程的。
- 用户态中fork系统调用通过int 0x80陷入内核态,并保存现场。
- 执行do_fork()来创建子进程,过程如下:
- 复制一个PCB
- 分配一个新的内核堆栈
- 修改复制过来的数据,比如pid、进程链表、eip等
- 父进程从ISR中返回,恢复现场,IRET到fork的下一条代码执行