参见 include/linux/sched.h
//进程描述符task_structstruct task_struct {
/* * offsets of these are hardcoded elsewhere - touch with care
*/ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ //-1 不能运行 0 运行 >0 停止
unsigned long flags; /* per process flags, defined below *///进程标志,在下面定义
int sigpending; //进程上是否有待处理的信号
mm_segment_t addr_limit; /* thread address space:进程地址空间
0-0xBFFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
*/
volatile long need_resched; //调度标志,表示该进程是否需要重新调度,若非0,则当从内核态返回到用户态,会发生调度
int lock_depth; /* Lock depth *///锁深度
/* * offset 32 begins here on 32-bit platforms. We keep
* all fields in a single cacheline that are needed for
* the goodness() loop in schedule().
*/ long counter; //进程可运行的时间量
long nice; //进程的基本时间片
unsigned long policy; //进程的调度策略,有三种,实时进程:SCHED_FIFO,SCHED_RR;分时进程:SCHED_OTHER;
struct mm_struct *mm; //进程内存管理信息
int processor;
/* * cpus_runnable is ~0 if the process is not running on any
* CPU. It's (1 << cpu) if it's running on a CPU. This mask
* is updated under the runqueue lock.
* * To determine whether a process might run on a CPU, this
* mask is AND-ed with cpus_allowed.
* 若进程不在任何CPU上运行,cpus_runnable 的值是0,否则是1。这个值在运行 *队列被锁时更新;*/
unsigned long cpus_runnable, cpus_allowed;
/* * (only the 'next' pointer fits into the cacheline, but
* that's just fine.)
*/
struct list_head run_list; //指向运行队列的指针
unsigned long sleep_time; //进程的睡眠时间
struct task_struct *next_task, *prev_task; //用于将系统中所有的进程连成一个双向循环链表,其根是init_task.
struct mm_struct *active_mm;
struct list_head local_pages; //指向本地页面
unsigned int allocation_order, nr_local_pages;
/* task state */
struct linux_binfmt *binfmt; //进程所运行的可执行文件的格式
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies *///父进程终止是向子进程发送的信号
/* ??? */
unsigned long personality; //Linux可以运行由其他UNIX操作系统生成的符合iBCS2标准的程序
int did_exec:1; //按POSIX要求设计的布尔量,区分进程正在执行从父进程中继承的代码,还是执行由execve装入的新程序代码
pid_t pid; //进程标识符,用来代表一个进程
pid_t pgrp; //进程组标识,表示进程所属的进程组
pid_t tty_old_pgrp; //进程控制终端所在的组标识
pid_t session; //进程的会话标识
pid_t tgid;
/* boolean value for session group leader */
int leader; //标志,表示进程是否为会话主管
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
* p->p_pptr->pid)
*///指针指向(原始的)父进程,孩子进程,比自己年轻的兄弟进程,比自己年长的兄弟进程
//(p->father能被p->p_pptr->pid代替)
struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
struct list_head thread_group; //线程链表
/* PID hash table linkage. *///进程散列表指针
struct task_struct *pidhash_next; //用于将进程链入HASH表pidhash
struct task_struct **pidhash_pprev;
wait_queue_head_t wait_chldexit; /* for wait4() *///wait4()使用
struct completion *vfork_done; /* for vfork() */// vfork() 使用
unsigned long rt_priority; //实时优先级,用它计算实时进程调度时的weight值
//it_real_value,it_real_incr用于REAL定时器,单位为jiffies。系统根据it_real_value //设置定时器的第一个终止时间。在定时器到期时,向进程发送SIGALRM信号,同时根据it_real_incr重置终止时间
//it_prof_value,it_prof_incr用于Profile定时器,单位为jiffies。当进程运行时,不管在何种状态下,每个tick都使
//it_prof_value值减一,当减到0时,向进程发送信号SIGPROF,并根据it_prof_incr重置时间
//it_virt_value,it_virt_value用于Virtual定时器,单位为jiffies。当进程运行时,不管在何种状态下,每个tick都使
//it_virt_value值减一,当减到0时,向进程发送信号SIGVTALRM,根据it_virt_incr重置初值。
//Real定时器根据系统时间实时更新,不管进程是否在运行
//Virtual定时器只在进程运行时,根据进程在用户态消耗的时间更新
//Profile定时器在进程运行时,根据进程消耗的时(不管在用户态还是内核态)更新
unsigned long it_real_value, it_prof_value, it_virt_value;
unsigned long it_real_incr, it_prof_incr, it_virt_value;
struct timer_list real_timer;//指向实时定时器的指针
struct tms times; //记录进程消耗的时间,
unsigned long start_time;//进程创建的时间
long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS]; //记录进程在每个CPU上所消耗的用户态时间和核心态时间
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
//内存缺页和交换信息:
//min_flt, maj_flt累计进程的次缺页数(Copy on Write页和匿名页)和主缺页数(从映射文件或交换设备读入的页面数);
//nswap记录进程累计换出的页面数,即写到交换设备上的页面数。
//cmin_flt, cmaj_flt, cnswap记录本进程为祖先的所有子孙进程的累计次缺页数,主缺页数和换出页面数。在父进程
//回收终止的子进程时,父进程会将子进程的这些信息累计到自己结构的这些域中
unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
int swappable:1; //表示进程的虚拟地址空间是否允许换出
/* process credentials *进程认证信息
//uid,gid为运行该进程的用户的用户标识符和组标识符,通常是进程创建者的uid,gid //euid,egid为有效uid,gid
//fsuid,fsgid为文件系统uid,gid,这两个ID号通常与有效uid,gid相等,在检查对于文件系统的访问权限时使用他们。
//suid,sgid为备份uid,gid
uid_t uid,euid,suid,fsuid;
gid_t gid,egid,sgid,fsgid;
int ngroups; //记录进程在多少个用户组中
gid_t groups[NGROUPS]; //记录进程所在的组
kernel_cap_t cap_effective, cap_inheritable, cap_permitted;//进程的权能,分别是有效位集合,继承位集合,允许位集合
int keep_capabilities:1;
struct user_struct *user;
/* limits */
struct rlimit rlim[RLIM_NLIMITS]; //与进程相关的资源限制信息
unsigned short used_math; //是否使用FPU
char comm[16]; //进程正在运行的可执行文件名
/* file system info *///文件系统信息
int link_count, total_link_count;
struct tty_struct *tty; /* NULL if no tty 进程所在的控制终端,如果不需要控制终端,则该指针为空*/
unsigned int locks; /* How many file locks are being held */
/* ipc stuff *///进程间通信信息
struct sem_undo *semundo; //进程在信号灯上的所有undo操作
struct sem_queue *semsleeping; //当进程因为信号灯操作而挂起时,他在该队列中记录等待的操作
/* CPU-specific state of this task *///进程的CPU状态,切换时,要保存到停止进程的task_struct中
struct thread_struct thread;
/* filesystem information文件系统信息*/
struct fs_struct *fs;
/* open file information *///打开文件信息
struct files_struct *files;
/* signal handlers *///信号处理函数
spinlock_t sigmask_lock; /* Protects signal and blocked */
struct signal_struct *sig; //信号处理函数,
sigset_t blocked; //进程当前要阻塞的信号,每个信号对应一位
truct sigpending pending; //进程上是否有待处理的信号
unsigned long sas_ss_sp;
size_t sas_ss_size;
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;
/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty */
spinlock_t alloc_lock;
/* journalling filesystem info */
void *journal_info;
};
mm_struct 数据结构用于描述一个任务或进程的虚拟内存
参见 include/linux/sched.h
struct mm_struct {int count;
pgd_t * pgd; /* 进程页目录的起始地址*/
unsigned long context;
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack, start_mmap;
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long rss, total_vm, locked_vm;
unsigned long def_flags;
struct vm_area_struct * mmap; /* 指向vma双向链表的指针 */
struct vm_area_struct * mmap_avl; /* 指向vma AVL树的指针 */
struct semaphore mmap_sem;
}
start_code、end_code:进程代码段的起始地址和结束地址。
start_data、end_data:进程数据段的起始地址和结束地址。
arg_start、arg_end:调用参数区的起始地址和结束地址。
env_start、env_end:进程环境区的起始地址和结束地址。
rss:进程内容驻留在物理内存的页面总数。
vm_area_struct 数据结构描述一个进程的一个虚拟内存区
虚存段vma由数据结构vm_area_struct(include/linux/mm.h)描述:
struct vm_area_struct {
struct mm_struct * vm_mm; /* VM area parameters */
unsigned long vm_start;
unsigned long vm_end;
pgprot_t vm_page_prot;
unsigned short vm_flags;
/* AVL tree of VM areas per task, sorted by address */
short vm_avl_height;
struct vm_area_struct * vm_avl_left;
struct vm_area_struct * vm_avl_right;
/* linked list of VM areas per task, sorted by address */
struct vm_area_struct * vm_next;
/* for areas with inode, the circular list inode->i_mmap */
/* for shm areas, the circular list of attaches */
/* otherwise unused */
struct vm_area_struct * vm_next_share;
struct vm_area_struct * vm_prev_share;
/* more */
struct vm_operations_struct * vm_ops;
unsigned long vm_offset;
struct inode * vm_inode;
unsigned long vm_pte; /* shared mem */
};
vm_start;//所对应内存区域的开始地址
vm_end; //所对应内存区域的结束地址
vm_flags; //进程对所对应内存区域的访问权限
vm_avl_height;//avl树的高度
vm_avl_left; //avl树的左儿子
vm_avl_right; //avl树的右儿子
vm_next;// 进程所使用的按地址排序的vm_area链表指针
vm_ops;//一组对内存的操作
这些对内存的操作是当对虚存进行操作的时候Linux系统必须使用的一组方法。比如说,当进程准备访问某一虚存区域但是发现此区域在物理内存不存在时(缺页中断),就激发某种对内存的操作执行正确的行为。这种操作是空页(nopage)操作。当Linux系统按需调度可执行的页面映象进入内存时就使用这种空页(nopage)操作。
当一个可执行的页面映象映射到进程的虚存地址时,一组vm_area_struct结构的数据结构(vma)就会生成。每一个 vm_area_struct的数据结构(vma)代表可执行的页面映象的一部分:可执行代码,初始化数据(变量),非初始化数据等等。Linux系统可以支持大量的标准虚存操作,当vm_area_struct数据结构(vma)一被创建,它就对应于一组正确的虚存操作。
属于同一进程的vma段通过vm_next指针连接,组成链表。如图2-3所示,struct mm_struct结构的成员struct vm_area_struct * mmap 表示进程的vma链表的表头。
为了提高对vma段 查询、插入、删除操作的速度,LINUX同时维护了一个AVL(Adelson-Velskii and Landis)树。在树中,所有的vm_area_struct虚存段均有左指针vm_avl_left指向相邻的低地址虚存段,右指针 vm_avl_right指向相邻的高地址虚存段,如图2-5。struct mm_struct结构的成员struct vm_area_struct * mmap_avl表示进程的AVL树的根,vm_avl_height表示AVL树的高度。
对平衡树mmap_avl的任何操作必须满足平衡树的一些规则:
Consistency and balancing rulesJ(一致性和平衡规则):
tree->vm_avl_height==1+max(heightof(tree->vm_avl_left),heightof(
tree->vm_avl_right))
abs( heightof(tree->vm_avl_left) - heightof(tree->vm_avl_right) ) <= 1
foreach node in tree->vm_avl_left: node->vm_avl_key <= tree->vm_avl_key, foreach node in tree->vm_avl_right: node->vm_avl_key >= tree->vm_avl_key.
注:其中node->vm_avl_key= node->vm_end
对vma可以进行加锁、加保护、共享和动态扩展等操作。
file
每一个打开的文件、 socket 等等都用一个 file 数据结构代表
参见 include/linux/fs.h
struct file {
mode_t f_mode;
loff_t f_pos;
unsigned short f_flags;
unsigned short f_count;
unsigned long f_reada, f_ramax, f_raend, f_ralen, f_rawin;
struct file *f_next, *f_prev;
int f_owner; /* pid or -pgrp where SIGIO should be sent */
struct inode * f_inode;
struct file_operations * f_op;
unsigned long f_version;
void *private_data; /* needed for tty driver, and maybe others */
};
file_struct 数据结构描述了一个进程打开的文件
参见 include/linux/sched.h
struct files_struct {
int count;
fd_set close_on_exec;
fd_set open_fds;
struct file * fd[NR_OPEN];
};
file_operations 文件操作
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char *, size_t, loff_t *);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, struct dentry *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
#ifdef MAGIC_ROM_PTR
int (*romptr) (struct file *, struct vm_area_struct *);
#endif /* MAGIC_ROM_PTR */
};
fs_struct
参见 include/linux/sched.h
struct fs_struct {int count;
unsigned short umask;
struct inode * root, * pwd;
} ;