趣谈Linux操作系统随笔——7.0 进程的数据结构

进程的数据结构

  • 软件平台:运行于VMware Workstation 12 Player下UbuntuLTS16.04_x64 系统
  • 开发环境:Linux-4.19-rc3内核,glibc-2.9


在linux中,对于进程的管理是通过一个task_struct结构体描述

1、项目列表描述——tasks

struct list_head		tasks;

2、任务ID

在内核中,进程和线程都会被描述为任务。

对于每个任务,都需要一个ID来描述和识别它们,代码如下:

pid_t pid;
pid_t tgid;
struct task_struct *group_leader; 
  • pid(process id):描述的是线程的id
  • tgid(thread group id):描述的是进程的主线程的id
  • group_leader:指向进程的主线程的task_struct
  1. 对于一个只有一个线程的进程

    只有主线程,那pid是自己tgid是自己group_leader指向的还是自己

  2. 对于一个有多个线程的进程

    线程有自己的pidtgid就是进程的主线程的pidgroup_leader指向的就是进程的主线程

通过比较三者,主要是pidtgid,可得知tast_struct代表的是一个进程还是代表一个线程。

3、信号处理

对于每个线程或进程,会对系统的相关信号进行处理。

在该结构体中,有如下的代码描述:

/* Signal handlers: */
struct signal_struct		*signal;	/* 相关的信号:其中有struct sigpending shared_pending一个是线程组共享的 */
struct sighand_struct		*sighand;	/* 哪些信号正在通过信号处理函数进行处理 */
sigset_t					blocked;	/* 哪些信号被阻塞暂不处理 */
sigset_t					real_blocked;
sigset_t					saved_sigmask;
struct sigpending			pending;	/* 哪些信号尚等待处理 */

/* 默认使用用户态的函数栈,当然也可以开辟新的栈专门用于信号处理 */
unsigned long				sas_ss_sp;
size_t						sas_ss_size;
unsigned int				sas_ss_flags;

在这里也可以通过如下结构可区分线程与进程:

  • struct sigpending pending——本任务;
  • struct signal_struct *signal->struct sigpending shared_pending——线程组共享的;

4、任务状态

 volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
 int exit_state;
 unsigned int flags;
  • 对于状态的描述有如下宏定义:其是通过bitset的方式来进行设置的,即每一位对应一个状态

    /* Used in tsk->state: */
    #define TASK_RUNNING			0x0000
    #define TASK_INTERRUPTIBLE		0x0001
    #define TASK_UNINTERRUPTIBLE	0x0002
    #define __TASK_STOPPED			0x0004
    #define __TASK_TRACED			0x0008
    /* Used in tsk->exit_state: */
    #define EXIT_DEAD			0x0010
    #define EXIT_ZOMBIE			0x0020
    #define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
    /* Used in tsk->state again: */
    #define TASK_PARKED			0x0040
    #define TASK_DEAD			0x0080
    #define TASK_WAKEKILL		0x0100
    #define TASK_WAKING			0x0200
    #define TASK_NOLOAD			0x0400
    #define TASK_NEW			0x0800
    #define TASK_STATE_MAX		0x1000
    
    /* Convenience macros for the sake of set_current_state: */
    #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
    #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
    #define TASK_TRACED			(TASK_WAKEKILL | __TASK_TRACED)
    
    #define TASK_IDLE			(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
    
    /* Convenience macros for the sake of wake_up(): */
    #define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
    
    /* get_task_state(): */
    #define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
    					 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
    					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
    
    • TASK_RUNNING:表示进程在时刻准备运行的状态;

      当处于此状态的进程获得时间片的时候,就是在运行中;如果没有获得时间片,就说明它被其他进程抢占了,在等待再次分配时间片

      进程的状态
    • TASK_INTERRUPTIBLE可中断的睡眠状态。是一种浅睡眠的状态,即可以通过一个信号来唤醒进程;

    • TASK_UNINTERRUPTIBLE不可中断的睡眠状态。是一种深度睡眠状态,不可被信号唤醒,只能死等I/O操作完成;

    • TASK_KILLABLE:可以终止的新睡眠状态。通过定义可知,其运行原理类似TASK_UNINTERRUPTIBLE,只可以响应致命信号;

    • TASK_STOPPED:进程接收到SIGSTOP、SIGTTIN、SIGTSTP或者SIGTTOU信号之后进入;

    • TASK_TRACED:进程被debugger等进程监视,进程执行被调试程序所停止。当一个进程被另外的进程所监视,每一个信号都会让进程进入该状态;

    • EXIT_ZOMBIE:僵尸状态。若一个进程结束时,其父进程未调用wait()系统调用来获知它的终止信息,此时进程就成了僵尸进程;

    • EXIT_DEAD:是进程的最终状态

  • 上述EXIT_ZOMBIEEXIT_DEAD可用于设置exit_state

  • 对于flags字段,使用如下宏定义进行设置:

    /*
     * Per process flags
     */
    #define PF_IDLE				0x00000002	/* I am an IDLE thread */
    #define PF_EXITING			0x00000004	/* Getting shut down */
    #define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
    #define PF_VCPU				0x00000010	/* I'm a virtual CPU */
    #define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
    #define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
    #define PF_MCE_PROCESS		0x00000080  /* Process policy on mce errors */
    #define PF_SUPERPRIV		0x00000100	/* Used super-user privileges */
    #define PF_DUMPCORE			0x00000200	/* Dumped core */
    #define PF_SIGNALED			0x00000400	/* Killed by a signal */
    #define PF_MEMALLOC			0x00000800	/* Allocating memory */
    #define PF_NPROC_EXCEEDED	0x00001000	/* set_user() noticed that RLIMIT_NPROC was exceeded */
    #define PF_USED_MATH		0x00002000	/* If unset the fpu must be initialized before use */
    #define PF_USED_ASYNC		0x00004000	/* Used async_schedule*(), used by module init */
    #define PF_NOFREEZE			0x00008000	/* This thread should not be frozen */
    #define PF_FROZEN			0x00010000	/* Frozen for system suspend */
    #define PF_KSWAPD			0x00020000	/* I am kswapd */
    #define PF_MEMALLOC_NOFS	0x00040000	/* All allocation requests will inherit GFP_NOFS */
    #define PF_MEMALLOC_NOIO	0x00080000	/* All allocation requests will inherit GFP_NOIO */
    #define PF_LESS_THROTTLE	0x00100000	/* Throttle me less: I clean memory */
    #define PF_KTHREAD			0x00200000	/* I am a kernel thread */
    #define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
    #define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
    #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
    #define PF_MCE_EARLY		0x08000000  /* Early kill for mce process policy */
    #define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
    #define PF_FREEZER_SKIP		0x40000000	/* Freezer should not count it as freezable */
    #define PF_SUSPEND_TASK		0x80000000  /* This thread called freeze_processes() and should not be frozen */
    
    • PF_EXITING:表示正在退出。

      当有这个flag的时候,在函数find_alive_thread中,找活着的线程,遇到有这个flag的,就直接跳过;

      static struct task_struct *find_alive_thread(struct task_struct *p)
      {
      	struct task_struct *t;
      
      	for_each_thread(p, t) {
      		if (!(t->flags & PF_EXITING))
      			return t;
      	}
      	return NULL;
      }
      
    • PF_VCPU:表示进程运行在虚拟CPU上。

      在函数account_system_time中,统计进程的系统运行时间,如果有这个flag,就调用account_guest_time,按照客户机的时间进行统计;

      /*
       * Account guest CPU time to a process.
       * @p: the process that the CPU time gets accounted to
       * @cputime: the CPU time spent in virtual machine since the last update
       */
      void account_guest_time(struct task_struct *p, u64 cputime)
      {
      	u64 *cpustat = kcpustat_this_cpu->cpustat;
      
      	/* Add guest time to process. */
      	p->utime += cputime;
      	account_group_user_time(p, cputime);
      	p->gtime += cputime;
      
      	/* Add guest time to cpustat. */
      	if (task_nice(p) > 0) {
      		cpustat[CPUTIME_NICE] += cputime;
      		cpustat[CPUTIME_GUEST_NICE] += cputime;
      	} else {
      		cpustat[CPUTIME_USER] += cputime;
      		cpustat[CPUTIME_GUEST] += cputime;
      	}
      }
      
      /*
       * Account system CPU time to a process.
       * @p: the process that the CPU time gets accounted to
       * @hardirq_offset: the offset to subtract from hardirq_count()
       * @cputime: the CPU time spent in kernel space since the last update
       */
      void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
      {
      	int index;
      
      	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
      		account_guest_time(p, cputime);
      		return;
      	}
      
      	if (hardirq_count() - hardirq_offset)
      		index = CPUTIME_IRQ;
      	else if (in_serving_softirq())
      		index = CPUTIME_SOFTIRQ;
      	else
      		index = CPUTIME_SYSTEM;
      
      	account_system_index_time(p, cputime, index);
      }
      
    • PF_FORKNOEXEC:表示fork完了,还没有exec。

      在_do_fork函数里面调用copy_process,这个时候把flag设置为PF_FORKNOEXEC。当exec中调用了load_elf_binary的时候,又把这个flag去掉;

      static __latent_entropy struct task_struct *copy_process(					unsigned long clone_flags,					unsigned long stack_start,					unsigned long stack_size,					int __user *child_tidptr,					struct pid *pid,					int trace,					unsigned long tls,					int node){	int retval;	struct task_struct *p;	struct multiprocess_signals delayed;    /* ...... */	/* 一些判断处理 */	/* ...... */	retval = -EAGAIN;	if (atomic_read(&p->real_cred->user->processes) >=			task_rlimit(p, RLIMIT_NPROC)) {		if (p->real_cred->user != INIT_USER &&		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))			goto bad_fork_free;	}	current->flags &= ~PF_NPROC_EXCEEDED;	retval = copy_creds(p, clone_flags);	if (retval < 0)		goto bad_fork_free;	/*	 * If multiple threads are within copy_process(), then this check	 * triggers too late. This doesn't hurt, the check is only there	 * to stop root fork bombs.	 */	retval = -EAGAIN;	if (nr_threads >= max_threads)		goto bad_fork_cleanup_count;	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE);	p->flags |= PF_FORKNOEXEC;	INIT_LIST_HEAD(&p->children);	INIT_LIST_HEAD(&p->sibling);	rcu_copy_process(p);	p->vfork_done = NULL;	spin_lock_init(&p->alloc_lock);    	/* ...... */}
      

5、进程调度

这里只列出部分字段供理解

//是否在运行队列上int				on_rq;//优先级int				prio;int				static_prio;int				normal_prio;unsigned int	rt_priority;//调度器类const struct sched_class	*sched_class;//调度实体struct sched_entity		se;struct sched_rt_entity	rt;struct sched_dl_entity	dl;//调度策略unsigned int			policy;//可以使用哪些CPUint						nr_cpus_allowed;cpumask_t				cpus_allowed;struct sched_info		sched_info;

6、运行统计信息

对于每一个任务,有时候需要了解运行时的信息。

u64				utime;		//用户态消耗的CPU时间u64				stime;		//内核态消耗的CPU时间unsigned long	nvcsw;		//自愿(voluntary)上下文切换计数unsigned long	nivcsw;		//非自愿(involuntary)上下文切换计数u64				start_time;	//进程启动时间,不包含睡眠时间u64				real_start_time;//进程启动时间,包含睡眠时间

7、进程的亲缘关系

对于Linux上的所有进程,其创建时都是有父进程的。

struct task_struct __rcu 	*real_parent;	/* real parent process */struct task_struct __rcu 	*parent; 		/* recipient of SIGCHLD, wait4() reports */struct list_head			children;      	/* list of my children */struct list_head 			sibling;       	/* linkage in my parent's children list */
  • **real_parent:**指向真正的父进程(亲爹,即调用fork()函数的进程)
  • **parent:**指向其父进程(干爹)。当它终止时,必须向它的父进程发送信号;
  • **children:**表示链表的头部。链表中的所有元素都是它的子进程;
  • **sibling:**用于把当前进程插入到兄弟链表中。

对于real_parentparent,其通俗的理解如下:

  • real_parent是亲爹,调fork的那个
  • parent是干爹
  • 大部分情况下亲爹干爹是一个人,ps看到的是干爹
  • 亲爹干爹不一样的:比如有一种情况,比如亲爹死了,但是又得有一个父进程,比如1号进程就会被当成父进程。但进程不是1号进程fork出来的

对于亲缘关系,有如下的图表示(树形结构

进程亲缘关系

8、进程的权限

对于每个进程,都需要规定其权限来决定其可做什么不可做什么,以此来保证程序的安全。

	/* Tracer's credentials at attach: */
	const struct cred __rcu		*ptracer_cred;

	/* Objective and real subjective task credentials (COW): 谁操作我*/
	const struct cred __rcu		*real_cred;

	/* Effective (overridable) subjective task credentials (COW): 我能操作谁*/
	const struct cred __rcu		*cred;

对于struct cred结构体,其定义如下:

struct cred {
	atomic_t	usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
	atomic_t	subscribers;	/* number of processes subscribed */
	void		*put_addr;
	unsigned	magic;
#define CRED_MAGIC	0x43736564
#define CRED_MAGIC_DEAD	0x44656144
#endif
	kuid_t		uid;		/* real UID of the task */
	kgid_t		gid;		/* real GID of the task */
	kuid_t		suid;		/* saved UID of the task */
	kgid_t		sgid;		/* saved GID of the task */
	kuid_t		euid;		/* effective UID of the task */
	kgid_t		egid;		/* effective GID of the task */
	kuid_t		fsuid;		/* UID for VFS ops */
	kgid_t		fsgid;		/* GID for VFS ops */
	unsigned	securebits;	/* SUID-less security management */
	kernel_cap_t	cap_inheritable; /* caps our children can inherit */
	kernel_cap_t	cap_permitted;	/* caps we're permitted */
	kernel_cap_t	cap_effective;	/* caps we can actually use */
	kernel_cap_t	cap_bset;	/* capability bounding set */
	kernel_cap_t	cap_ambient;	/* Ambient capability set */
#ifdef CONFIG_KEYS
	unsigned char	jit_keyring;	/* default keyring to attach requested
					 * keys to */
	struct key __rcu *session_keyring; /* keyring inherited over fork */
	struct key	*process_keyring; /* keyring private to this process */
	struct key	*thread_keyring; /* keyring private to this thread */
	struct key	*request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
	void		*security;	/* subjective LSM security */
#endif
	struct user_struct *user;	/* real user ID subscription */
	struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
	struct rcu_head	rcu;		/* RCU deletion hook */
} __randomize_layout;

  • uid和gid,注释是real user/group id。一般情况下,谁启动的进程,就是谁的ID。但是权限审核的时候,往往不比较这两个,也就是说不大起作用。
  • euid和egid,注释是effective user/group id。一看这个名字,就知道这个是起“作用”的。当这个进程要操作消息队列、共享内存、信号量等对象的时候,其实就是在比较这个用户和组是否有权限
  • fsuid和fsgid,也就是filesystem user/group id。这个是对文件操作会审核的权限
  • suid和sgid:会把用户id和组id保存在一个地方,也就是saved uid和save gid。

对于一般的进程,其需要做特定的事时并不需要完整的权限,只需要给定特定权限即可,故引入capabilities机制(用位图表示权限)。

	kernel_cap_t	cap_inheritable; /* caps our children can inherit */
	kernel_cap_t	cap_permitted;	/* caps we're permitted */
	kernel_cap_t	cap_effective;	/* caps we can actually use */
	kernel_cap_t	cap_bset;	/* capability bounding set */
	kernel_cap_t	cap_ambient;	/* Ambient capability set */
  • cap_permitted:表示进程能够使用的权限。但是真正起作用的是cap_effective。cap_permitted中可以包含cap_effective中没有的权限;

  • cap_effective:表示进程起作用的权限

  • cap_inheritable:表示当可执行文件的扩展属性

    设置了inheritable位时,调用exec执行该程序会继承调用者的inheritable集合,并将其加入到permitted集合。但在非root用户下执行exec时,通常不会保留inheritable集合,但是往往又是非root用户,才想保留权限,所以非常鸡肋。

  • cap_bset,也就是capability bounding set,是系统中所有进程允许保留的权限。如果这个集合中不存在某个权限,那么系统中的所有进程都没有这个权限。即使以超级用户权限执行的进程,也是一样的。

这样有很多好处。例如,系统启动以后,将加载内核模块的权限去掉,那所有进程都不能加载内核模块。这样,即便这台机器被攻破,也做不了太多有害的事情。

  • cap_ambient:就是为了解决cap_inheritable鸡肋的状况,也就是,非root用户进程使用exec执行一个程序的时候,如何保留权限的问题。当执行exec的时候,cap_ambient会被添加到cap_permitted,同时设置到cap_effective

9、内存管理

对于每个进程,其都需要一个独立的虚拟内存空间,用以保存相关数据。

    struct mm_struct                *mm;
    struct mm_struct                *active_mm;

10、文件与文件系统

在Linux中万物皆文件,需要有一个结构体去描述该文件系统与一个打开的文件

    /* Filesystem information: */
    struct fs_struct                *fs;
    /* Open file information: */
    struct files_struct             *files;

11、(看不懂)

上两节,我们解读了task_struct的大部分的成员变量。这样一个任务执行的方方面面,都可以很好地管理起来,但是其中有一个问题我们没有谈。在程序执行过程中,一旦调用到系统调用,就需要进入内核继续执行。那如何将用户态的执行和内核态的执行串起来呢?

这就需要以下两个重要的成员变量:

struct thread_info        thread_info;
void  *stack;
用户态函数栈

在用户态中,程序的执行往往是一个函数调用另一个函数。函数调用都是通过栈来进行的。我们前面大致讲过函数栈的原理,今天我们仔细分析一下。

函数调用其实也很简单。如果你去看汇编语言的代码,其实就是指令跳转,从代码的一个地方跳到另外一个地方。这里比较棘手的问题是,参数和返回地址应该怎么传递过去呢?

我们看函数的调用过程,A调用B、调用C、调用D,然后返回C、返回B、返回A,这是一个后进先出的过程。有没有觉得这个过程很熟悉?没错,咱们数据结构里学的栈,也是后进先出的,所以用栈保存这些最合适。

在进程的内存空间里面,栈是一个从高地址到低地址,往下增长的结构,也就是上面是栈底,下面是栈顶,入栈和出栈的操作都是从下面的栈顶开始的。

img

我们先来看32位操作系统的情况。在CPU里,ESP(Extended Stack Pointer)是栈顶指针寄存器,入栈操作Push和出栈操作Pop指令,会自动调整ESP的值。另外有一个寄存器EBP(Extended Base Pointer),是栈基地址指针寄存器,指向当前栈帧的最底部。

例如,A调用B,A的栈里面包含A函数的局部变量,然后是调用B的时候要传给它的参数,然后返回A的地址,这个地址也应该入栈,这就形成了A的栈帧。接下来就是B的栈帧部分了,先保存的是A栈帧的栈底位置,也就是EBP。因为在B函数里面获取A传进来的参数,就是通过这个指针获取的,接下来保存的是B的局部变量等等。

当B返回的时候,返回值会保存在EAX寄存器中,从栈中弹出返回地址,将指令跳转回去,参数也从栈中弹出,然后继续执行A。

对于64位操作系统,模式多少有些不一样。因为64位操作系统的寄存器数目比较多。rax用于保存函数调用的返回结果。栈顶指针寄存器变成了rsp,指向栈顶位置。堆栈的Pop和Push操作会自动调整rsp,栈基指针寄存器变成了rbp,指向当前栈帧的起始位置。

改变比较多的是参数传递。rdi、rsi、rdx、rcx、r8、r9这6个寄存器,用于传递存储函数调用时的6个参数。如果超过6的时候,还是需要放到栈里面。

然而,前6个参数有时候需要进行寻址,但是如果在寄存器里面,是没有地址的,因而还是会放到栈里面,只不过放到栈里面的操作是被调用函数做的。

img

以上的栈操作,都是在进程的内存空间里面进行的。

内核态函数栈

接下来,我们通过系统调用,从进程的内存空间到内核中了。内核中也有各种各样的函数调用来调用去的,也需要这样一个机制,这该怎么办呢?

这时候,上面的成员变量stack,也就是内核栈,就派上了用场。

Linux给每个task都分配了内核栈。在32位系统上arch/x86/include/asm/page_32_types.h,是这样定义的:一个PAGE_SIZE是4K,左移一位就是乘以2,也就是8K。

#define THREAD_SIZE_ORDER    1
#define THREAD_SIZE        (PAGE_SIZE << THREAD_SIZE_ORDER)

内核栈在64位系统上arch/x86/include/asm/page_64_types.h,是这样定义的:在PAGE_SIZE的基础上左移两位,也即16K,并且要求起始地址必须是8192的整数倍。

#ifdef CONFIG_KASAN
#define KASAN_STACK_ORDER 1
#else
#define KASAN_STACK_ORDER 0
#endif


#define THREAD_SIZE_ORDER    (2 + KASAN_STACK_ORDER)
#define THREAD_SIZE  (PAGE_SIZE << THREAD_SIZE_ORDER)

内核栈是一个非常特殊的结构,如下图所示:

img

这段空间的最低位置,是一个thread_info结构。这个结构是对task_struct结构的补充。因为task_struct结构庞大但是通用,不同的体系结构就需要保存不同的东西,所以往往与体系结构有关的,都放在thread_info里面。

在内核代码里面有这样一个union,将thread_info和stack放在一起,在include/linux/sched.h文件中就有。

union thread_union {
#ifndef CONFIG_THREAD_INFO_IN_TASK
    struct thread_info thread_info;
#endif
    unsigned long stack[THREAD_SIZE/sizeof(long)];
};

这个union就是这样定义的,开头是thread_info,后面是stack。

在内核栈的最高地址端,存放的是另一个结构pt_regs,定义如下。其中,32位和64位的定义不一样。

#ifdef __i386__
struct pt_regs {
    unsigned long bx;
    unsigned long cx;
    unsigned long dx;
    unsigned long si;
    unsigned long di;
    unsigned long bp;
    unsigned long ax;
    unsigned long ds;
    unsigned long es;
    unsigned long fs;
    unsigned long gs;
    unsigned long orig_ax;
    unsigned long ip;
    unsigned long cs;
    unsigned long flags;
    unsigned long sp;
    unsigned long ss;
};
#else 
struct pt_regs {
    unsigned long r15;
    unsigned long r14;
    unsigned long r13;
    unsigned long r12;
    unsigned long bp;
    unsigned long bx;
    unsigned long r11;
    unsigned long r10;
    unsigned long r9;
    unsigned long r8;
    unsigned long ax;
    unsigned long cx;
    unsigned long dx;
    unsigned long si;
    unsigned long di;
    unsigned long orig_ax;
    unsigned long ip;
    unsigned long cs;
    unsigned long flags;
    unsigned long sp;
    unsigned long ss;
/* top of stack page */
};
#endif 

看到这个是不是很熟悉?咱们在讲系统调用的时候,已经多次见过这个结构。当系统调用从用户态到内核态的时候,首先要做的第一件事情,就是将用户态运行过程中的CPU上下文保存起来,其实主要就是保存在这个结构的寄存器变量里。这样当从内核系统调用返回的时候,才能让进程在刚才的地方接着运行下去。

如果我们对比系统调用那一节的内容,你会发现系统调用的时候,压栈的值的顺序和struct pt_regs中寄存器定义的顺序是一样的。

在内核中,CPU的寄存器ESP或者RSP,已经指向内核栈的栈顶,在内核态里的调用都有和用户态相似的过程。

通过task_struct找内核栈

如果有一个task_struct的stack指针在手,你可以通过下面的函数找到这个线程内核栈:

static inline void *task_stack_page(const struct task_struct *task)
{
    return task->stack;
}

从task_struct如何得到相应的pt_regs呢?我们可以通过下面的函数:

/*
 * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
 * This is necessary to guarantee that the entire "struct pt_regs"
 * is accessible even if the CPU haven't stored the SS/ESP registers
 * on the stack (interrupt gate does not save these registers
 * when switching to the same priv ring).
 * Therefore beware: accessing the ss/esp fields of the
 * "struct pt_regs" is possible, but they may contain the
 * completely wrong values.
 */
#define task_pt_regs(task) \
({                                    \
    unsigned long __ptr = (unsigned long)task_stack_page(task);    \
    __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;        \
    ((struct pt_regs *)__ptr) - 1;                    \
})

你会发现,这是先从task_struct找到内核栈的开始位置。然后这个位置加上THREAD_SIZE就到了最后的位置,然后转换为struct pt_regs,再减一,就相当于减少了一个pt_regs的位置,就到了这个结构的首地址。

这里面有一个TOP_OF_KERNEL_STACK_PADDING,这个的定义如下:

#ifdef CONFIG_X86_32
# ifdef CONFIG_VM86
#  define TOP_OF_KERNEL_STACK_PADDING 16
# else
#  define TOP_OF_KERNEL_STACK_PADDING 8
# endif
#else
# define TOP_OF_KERNEL_STACK_PADDING 0
#endif

也就是说,32位机器上是8,其他是0。这是为什么呢?因为压栈pt_regs有两种情况。我们知道,CPU用ring来区分权限,从而Linux可以区分内核态和用户态。

因此,第一种情况,我们拿涉及从用户态到内核态的变化的系统调用来说。因为涉及权限的改变,会压栈保存SS、ESP寄存器的,这两个寄存器共占用8个byte。

另一种情况是,不涉及权限的变化,就不会压栈这8个byte。这样就会使得两种情况不兼容。如果没有压栈还访问,就会报错,所以还不如预留在这里,保证安全。在64位上,修改了这个问题,变成了定长的。

好了,现在如果你task_struct在手,就能够轻松得到内核栈和内核寄存器。

通过内核栈找task_struct

那如果一个当前在某个CPU上执行的进程,想知道自己的task_struct在哪里,又该怎么办呢?

这个艰巨的任务要交给thread_info这个结构。

struct thread_info {    struct task_struct    *task;        /* main task structure */    __u32            flags;        /* low level flags */    __u32            status;        /* thread synchronous flags */    __u32            cpu;        /* current CPU */    mm_segment_t        addr_limit;    unsigned int        sig_on_uaccess_error:1;    unsigned int        uaccess_err:1;    /* uaccess failed */};

这里面有个成员变量task指向task_struct,所以我们常用current_thread_info()->task来获取task_struct。

static inline struct thread_info *current_thread_info(void){    return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);}

而thread_info的位置就是内核栈的最高位置,减去THREAD_SIZE,就到了thread_info的起始地址。

但是现在变成这样了,只剩下一个flags。

struct thread_info {        unsigned long           flags;          /* low level flags */};

那这时候怎么获取当前运行中的task_struct呢?current_thread_info有了新的实现方式。

在include/linux/thread_info.h中定义了current_thread_info。

#include <asm/current.h>#define current_thread_info() ((struct thread_info *)current)#endif

那current又是什么呢?在arch/x86/include/asm/current.h中定义了。

struct task_struct;


DECLARE_PER_CPU(struct task_struct *, current_task);


static __always_inline struct task_struct *get_current(void)
{
    return this_cpu_read_stable(current_task);
}


#define current get_current

到这里,你会发现,新的机制里面,每个CPU运行的task_struct不通过thread_info获取了,而是直接放在Per CPU 变量里面了。

多核情况下,CPU是同时运行的,但是它们共同使用其他的硬件资源的时候,我们需要解决多个CPU之间的同步问题。

Per CPU变量是内核中一种重要的同步机制。顾名思义,Per CPU变量就是为每个CPU构造一个变量的副本,这样多个CPU各自操作自己的副本,互不干涉。比如,当前进程的变量current_task就被声明为Per CPU变量。

要使用Per CPU变量,首先要声明这个变量,在arch/x86/include/asm/current.h中有:

DECLARE_PER_CPU(struct task_struct *, current_task);

然后是定义这个变量,在arch/x86/kernel/cpu/common.c中有:

DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;

也就是说,系统刚刚初始化的时候,current_task都指向init_task。

当某个CPU上的进程进行切换的时候,current_task被修改为将要切换到的目标进程。例如,进程切换函数__switch_to就会改变current_task。

__visible __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
......
this_cpu_write(current_task, next_p);
......
return prev_p;
}

当要获取当前的运行中的task_struct的时候,就需要调用this_cpu_read_stable进行读取。

#define this_cpu_read_stable(var)       percpu_stable_op("mov", var)

好了,现在如果你是一个进程,正在某个CPU上运行,就能够轻松得到task_struct了。

总结时刻

这一节虽然只介绍了内核栈,但是内容更加重要。如果说task_struct的其他成员变量都是和进程管理有关的,内核栈是和进程运行有关系的。

我这里画了一张图总结一下32位和64位的工作模式,左边是32位的,右边是64位的。

  • 在用户态,应用程序进行了至少一次函数调用。32位和64的传递参数的方式稍有不同,32位的就是用函数栈,64位的前6个参数用寄存器,其他的用函数栈。
  • 在内核态,32位和64位都使用内核栈,格式也稍有不同,主要集中在pt_regs结构上。
  • 在内核态,32位和64位的内核栈和task_struct的关联关系不同。32位主要靠thread_info,64位主要靠Per-CPU变量。

img

附录:task_struct结构体源码

struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/*
	 * For reasons of header soup (see current_thread_info()), this
	 * must be the first element of task_struct.
	 */
	struct thread_info		thread_info;
#endif
	/* -1 unrunnable, 0 runnable, >0 stopped: */
	volatile long			state;

	/*
	 * This begins the randomizable portion of task_struct. Only
	 * scheduling-critical items should be added above here.
	 */
	randomized_struct_fields_start

	void				*stack;
	atomic_t			usage;
	/* Per task flags (PF_*), defined further below: */
	unsigned int			flags;
	unsigned int			ptrace;

#ifdef CONFIG_SMP
	struct llist_node		wake_entry;
	int				on_cpu;
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/* Current CPU: */
	unsigned int			cpu;
#endif
	unsigned int			wakee_flips;
	unsigned long			wakee_flip_decay_ts;
	struct task_struct		*last_wakee;

	/*
	 * recent_used_cpu is initially set as the last CPU used by a task
	 * that wakes affine another task. Waker/wakee relationships can
	 * push tasks around a CPU where each wakeup moves to the next one.
	 * Tracking a recently used CPU allows a quick search for a recently
	 * used CPU that may be idle.
	 */
	int				recent_used_cpu;
	int				wake_cpu;
#endif
	int				on_rq;

	int				prio;
	int				static_prio;
	int				normal_prio;
	unsigned int			rt_priority;

	const struct sched_class	*sched_class;
	struct sched_entity		se;
	struct sched_rt_entity		rt;
#ifdef CONFIG_CGROUP_SCHED
	struct task_group		*sched_task_group;
#endif
	struct sched_dl_entity		dl;

#ifdef CONFIG_PREEMPT_NOTIFIERS
	/* List of struct preempt_notifier: */
	struct hlist_head		preempt_notifiers;
#endif

#ifdef CONFIG_BLK_DEV_IO_TRACE
	unsigned int			btrace_seq;
#endif

	unsigned int			policy;
	int				nr_cpus_allowed;
	cpumask_t			cpus_allowed;

#ifdef CONFIG_PREEMPT_RCU
	int				rcu_read_lock_nesting;
	union rcu_special		rcu_read_unlock_special;
	struct list_head		rcu_node_entry;
	struct rcu_node			*rcu_blocked_node;
#endif /* #ifdef CONFIG_PREEMPT_RCU */

#ifdef CONFIG_TASKS_RCU
	unsigned long			rcu_tasks_nvcsw;
	u8				rcu_tasks_holdout;
	u8				rcu_tasks_idx;
	int				rcu_tasks_idle_cpu;
	struct list_head		rcu_tasks_holdout_list;
#endif /* #ifdef CONFIG_TASKS_RCU */

	struct sched_info		sched_info;

	struct list_head		tasks;
#ifdef CONFIG_SMP
	struct plist_node		pushable_tasks;
	struct rb_node			pushable_dl_tasks;
#endif

	struct mm_struct		*mm;
	struct mm_struct		*active_mm;

	/* Per-thread vma caching: */
	struct vmacache			vmacache;

#ifdef SPLIT_RSS_COUNTING
	struct task_rss_stat		rss_stat;
#endif
	int				exit_state;
	int				exit_code;
	int				exit_signal;
	/* The signal sent when the parent dies: */
	int				pdeath_signal;
	/* JOBCTL_*, siglock protected: */
	unsigned long			jobctl;

	/* Used for emulating ABI behavior of previous Linux versions: */
	unsigned int			personality;

	/* Scheduler bits, serialized by scheduler locks: */
	unsigned			sched_reset_on_fork:1;
	unsigned			sched_contributes_to_load:1;
	unsigned			sched_migrated:1;
	unsigned			sched_remote_wakeup:1;
	/* Force alignment to the next boundary: */
	unsigned			:0;

	/* Unserialized, strictly 'current' */

	/* Bit to tell LSMs we're in execve(): */
	unsigned			in_execve:1;
	unsigned			in_iowait:1;
#ifndef TIF_RESTORE_SIGMASK
	unsigned			restore_sigmask:1;
#endif
#ifdef CONFIG_MEMCG
	unsigned			in_user_fault:1;
#ifdef CONFIG_MEMCG_KMEM
	unsigned			memcg_kmem_skip_account:1;
#endif
#endif
#ifdef CONFIG_COMPAT_BRK
	unsigned			brk_randomized:1;
#endif
#ifdef CONFIG_CGROUPS
	/* disallow userland-initiated cgroup migration */
	unsigned			no_cgroup_migration:1;
#endif
#ifdef CONFIG_BLK_CGROUP
	/* to be used once the psi infrastructure lands upstream. */
	unsigned			use_memdelay:1;
#endif

	unsigned long			atomic_flags; /* Flags requiring atomic access. */

	struct restart_block		restart_block;

	pid_t				pid;
	pid_t				tgid;

#ifdef CONFIG_STACKPROTECTOR
	/* Canary value for the -fstack-protector GCC feature: */
	unsigned long			stack_canary;
#endif
	/*
	 * Pointers to the (original) parent process, youngest child, younger sibling,
	 * older sibling, respectively.  (p->father can be replaced with
	 * p->real_parent->pid)
	 */

	/* Real parent process: */
	struct task_struct __rcu	*real_parent;

	/* Recipient of SIGCHLD, wait4() reports: */
	struct task_struct __rcu	*parent;

	/*
	 * Children/sibling form the list of natural children:
	 */
	struct list_head		children;
	struct list_head		sibling;
	struct task_struct		*group_leader;

	/*
	 * 'ptraced' is the list of tasks this task is using ptrace() on.
	 *
	 * This includes both natural children and PTRACE_ATTACH targets.
	 * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
	 */
	struct list_head		ptraced;
	struct list_head		ptrace_entry;

	/* PID/PID hash table linkage. */
	struct pid			*thread_pid;
	struct hlist_node		pid_links[PIDTYPE_MAX];
	struct list_head		thread_group;
	struct list_head		thread_node;

	struct completion		*vfork_done;

	/* CLONE_CHILD_SETTID: */
	int __user			*set_child_tid;

	/* CLONE_CHILD_CLEARTID: */
	int __user			*clear_child_tid;

	u64				utime;
	u64				stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
	u64				utimescaled;
	u64				stimescaled;
#endif
	u64				gtime;
	struct prev_cputime		prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
	struct vtime			vtime;
#endif

#ifdef CONFIG_NO_HZ_FULL
	atomic_t			tick_dep_mask;
#endif
	/* Context switch counts: */
	unsigned long			nvcsw;
	unsigned long			nivcsw;

	/* Monotonic time in nsecs: */
	u64				start_time;

	/* Boot based time in nsecs: */
	u64				real_start_time;

	/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
	unsigned long			min_flt;
	unsigned long			maj_flt;

#ifdef CONFIG_POSIX_TIMERS
	struct task_cputime		cputime_expires;
	struct list_head		cpu_timers[3];
#endif

	/* Process credentials: */

	/* Tracer's credentials at attach: */
	const struct cred __rcu		*ptracer_cred;

	/* Objective and real subjective task credentials (COW): */
	const struct cred __rcu		*real_cred;

	/* Effective (overridable) subjective task credentials (COW): */
	const struct cred __rcu		*cred;

	/*
	 * executable name, excluding path.
	 *
	 * - normally initialized setup_new_exec()
	 * - access it with [gs]et_task_comm()
	 * - lock it with task_lock()
	 */
	char				comm[TASK_COMM_LEN];

	struct nameidata		*nameidata;

#ifdef CONFIG_SYSVIPC
	struct sysv_sem			sysvsem;
	struct sysv_shm			sysvshm;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
	unsigned long			last_switch_count;
	unsigned long			last_switch_time;
#endif
	/* Filesystem information: */
	struct fs_struct		*fs;

	/* Open file information: */
	struct files_struct		*files;

	/* Namespaces: */
	struct nsproxy			*nsproxy;

	/* Signal handlers: */
	struct signal_struct		*signal;
	struct sighand_struct		*sighand;
	sigset_t			blocked;
	sigset_t			real_blocked;
	/* Restored if set_restore_sigmask() was used: */
	sigset_t			saved_sigmask;
	struct sigpending		pending;
	unsigned long			sas_ss_sp;
	size_t				sas_ss_size;
	unsigned int			sas_ss_flags;

	struct callback_head		*task_works;

	struct audit_context		*audit_context;
#ifdef CONFIG_AUDITSYSCALL
	kuid_t				loginuid;
	unsigned int			sessionid;
#endif
	struct seccomp			seccomp;

	/* Thread group tracking: */
	u32				parent_exec_id;
	u32				self_exec_id;

	/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
	spinlock_t			alloc_lock;

	/* Protection of the PI data structures: */
	raw_spinlock_t			pi_lock;

	struct wake_q_node		wake_q;

#ifdef CONFIG_RT_MUTEXES
	/* PI waiters blocked on a rt_mutex held by this task: */
	struct rb_root_cached		pi_waiters;
	/* Updated under owner's pi_lock and rq lock */
	struct task_struct		*pi_top_task;
	/* Deadlock detection and priority inheritance handling: */
	struct rt_mutex_waiter		*pi_blocked_on;
#endif

#ifdef CONFIG_DEBUG_MUTEXES
	/* Mutex deadlock detection: */
	struct mutex_waiter		*blocked_on;
#endif

#ifdef CONFIG_TRACE_IRQFLAGS
	unsigned int			irq_events;
	unsigned long			hardirq_enable_ip;
	unsigned long			hardirq_disable_ip;
	unsigned int			hardirq_enable_event;
	unsigned int			hardirq_disable_event;
	int				hardirqs_enabled;
	int				hardirq_context;
	unsigned long			softirq_disable_ip;
	unsigned long			softirq_enable_ip;
	unsigned int			softirq_disable_event;
	unsigned int			softirq_enable_event;
	int				softirqs_enabled;
	int				softirq_context;
#endif

#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH			48UL
	u64				curr_chain_key;
	int				lockdep_depth;
	unsigned int			lockdep_recursion;
	struct held_lock		held_locks[MAX_LOCK_DEPTH];
#endif

#ifdef CONFIG_UBSAN
	unsigned int			in_ubsan;
#endif

	/* Journalling filesystem info: */
	void				*journal_info;

	/* Stacked block device info: */
	struct bio_list			*bio_list;

#ifdef CONFIG_BLOCK
	/* Stack plugging: */
	struct blk_plug			*plug;
#endif

	/* VM state: */
	struct reclaim_state		*reclaim_state;

	struct backing_dev_info		*backing_dev_info;

	struct io_context		*io_context;

	/* Ptrace state: */
	unsigned long			ptrace_message;
	siginfo_t			*last_siginfo;

	struct task_io_accounting	ioac;
#ifdef CONFIG_TASK_XACCT
	/* Accumulated RSS usage: */
	u64				acct_rss_mem1;
	/* Accumulated virtual memory usage: */
	u64				acct_vm_mem1;
	/* stime + utime since last update: */
	u64				acct_timexpd;
#endif
#ifdef CONFIG_CPUSETS
	/* Protected by ->alloc_lock: */
	nodemask_t			mems_allowed;
	/* Seqence number to catch updates: */
	seqcount_t			mems_allowed_seq;
	int				cpuset_mem_spread_rotor;
	int				cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
	/* Control Group info protected by css_set_lock: */
	struct css_set __rcu		*cgroups;
	/* cg_list protected by css_set_lock and tsk->alloc_lock: */
	struct list_head		cg_list;
#endif
#ifdef CONFIG_INTEL_RDT
	u32				closid;
	u32				rmid;
#endif
#ifdef CONFIG_FUTEX
	struct robust_list_head __user	*robust_list;
#ifdef CONFIG_COMPAT
	struct compat_robust_list_head __user *compat_robust_list;
#endif
	struct list_head		pi_state_list;
	struct futex_pi_state		*pi_state_cache;
#endif
#ifdef CONFIG_PERF_EVENTS
	struct perf_event_context	*perf_event_ctxp[perf_nr_task_contexts];
	struct mutex			perf_event_mutex;
	struct list_head		perf_event_list;
#endif
#ifdef CONFIG_DEBUG_PREEMPT
	unsigned long			preempt_disable_ip;
#endif
#ifdef CONFIG_NUMA
	/* Protected by alloc_lock: */
	struct mempolicy		*mempolicy;
	short				il_prev;
	short				pref_node_fork;
#endif
#ifdef CONFIG_NUMA_BALANCING
	int				numa_scan_seq;
	unsigned int			numa_scan_period;
	unsigned int			numa_scan_period_max;
	int				numa_preferred_nid;
	unsigned long			numa_migrate_retry;
	/* Migration stamp: */
	u64				node_stamp;
	u64				last_task_numa_placement;
	u64				last_sum_exec_runtime;
	struct callback_head		numa_work;

	struct numa_group		*numa_group;

	/*
	 * numa_faults is an array split into four regions:
	 * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
	 * in this precise order.
	 *
	 * faults_memory: Exponential decaying average of faults on a per-node
	 * basis. Scheduling placement decisions are made based on these
	 * counts. The values remain static for the duration of a PTE scan.
	 * faults_cpu: Track the nodes the process was running on when a NUMA
	 * hinting fault was incurred.
	 * faults_memory_buffer and faults_cpu_buffer: Record faults per node
	 * during the current scan window. When the scan completes, the counts
	 * in faults_memory and faults_cpu decay and these values are copied.
	 */
	unsigned long			*numa_faults;
	unsigned long			total_numa_faults;

	/*
	 * numa_faults_locality tracks if faults recorded during the last
	 * scan window were remote/local or failed to migrate. The task scan
	 * period is adapted based on the locality of the faults with different
	 * weights depending on whether they were shared or private faults
	 */
	unsigned long			numa_faults_locality[3];

	unsigned long			numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */

#ifdef CONFIG_RSEQ
	struct rseq __user *rseq;
	u32 rseq_len;
	u32 rseq_sig;
	/*
	 * RmW on rseq_event_mask must be performed atomically
	 * with respect to preemption.
	 */
	unsigned long rseq_event_mask;
#endif

	struct tlbflush_unmap_batch	tlb_ubc;

	struct rcu_head			rcu;

	/* Cache last used pipe for splice(): */
	struct pipe_inode_info		*splice_pipe;

	struct page_frag		task_frag;

#ifdef CONFIG_TASK_DELAY_ACCT
	struct task_delay_info		*delays;
#endif

#ifdef CONFIG_FAULT_INJECTION
	int				make_it_fail;
	unsigned int			fail_nth;
#endif
	/*
	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
	 * balance_dirty_pages() for a dirty throttling pause:
	 */
	int				nr_dirtied;
	int				nr_dirtied_pause;
	/* Start of a write-and-pause period: */
	unsigned long			dirty_paused_when;

#ifdef CONFIG_LATENCYTOP
	int				latency_record_count;
	struct latency_record		latency_record[LT_SAVECOUNT];
#endif
	/*
	 * Time slack values; these are used to round up poll() and
	 * select() etc timeout values. These are in nanoseconds.
	 */
	u64				timer_slack_ns;
	u64				default_timer_slack_ns;

#ifdef CONFIG_KASAN
	unsigned int			kasan_depth;
#endif

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	/* Index of current stored address in ret_stack: */
	int				curr_ret_stack;

	/* Stack of return addresses for return function tracing: */
	struct ftrace_ret_stack		*ret_stack;

	/* Timestamp for last schedule: */
	unsigned long long		ftrace_timestamp;

	/*
	 * Number of functions that haven't been traced
	 * because of depth overrun:
	 */
	atomic_t			trace_overrun;

	/* Pause tracing: */
	atomic_t			tracing_graph_pause;
#endif

#ifdef CONFIG_TRACING
	/* State flags for use by tracers: */
	unsigned long			trace;

	/* Bitmask and counter of trace recursion: */
	unsigned long			trace_recursion;
#endif /* CONFIG_TRACING */

#ifdef CONFIG_KCOV
	/* Coverage collection mode enabled for this task (0 if disabled): */
	unsigned int			kcov_mode;

	/* Size of the kcov_area: */
	unsigned int			kcov_size;

	/* Buffer for coverage collection: */
	void				*kcov_area;

	/* KCOV descriptor wired with this task or NULL: */
	struct kcov			*kcov;
#endif

#ifdef CONFIG_MEMCG
	struct mem_cgroup		*memcg_in_oom;
	gfp_t				memcg_oom_gfp_mask;
	int				memcg_oom_order;

	/* Number of pages to reclaim on returning to userland: */
	unsigned int			memcg_nr_pages_over_high;

	/* Used by memcontrol for targeted memcg charge: */
	struct mem_cgroup		*active_memcg;
#endif

#ifdef CONFIG_BLK_CGROUP
	struct request_queue		*throttle_queue;
#endif

#ifdef CONFIG_UPROBES
	struct uprobe_task		*utask;
#endif
#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
	unsigned int			sequential_io;
	unsigned int			sequential_io_avg;
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
	unsigned long			task_state_change;
#endif
	int				pagefault_disabled;
#ifdef CONFIG_MMU
	struct task_struct		*oom_reaper_list;
#endif
#ifdef CONFIG_VMAP_STACK
	struct vm_struct		*stack_vm_area;
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/* A live task holds one reference: */
	atomic_t			stack_refcount;
#endif
#ifdef CONFIG_LIVEPATCH
	int patch_state;
#endif
#ifdef CONFIG_SECURITY
	/* Used by LSM modules for access restriction: */
	void				*security;
#endif

	/*
	 * New fields for task_struct should be added above here, so that
	 * they are included in the randomized portion of task_struct.
	 */
	randomized_struct_fields_end

	/* CPU-specific state of this task: */
	struct thread_struct		thread;

	/*
	 * WARNING: on x86, 'thread_struct' contains a variable-sized
	 * structure.  It *MUST* be at the end of 'task_struct'.
	 *
	 * Do not put anything below here!
	 */
};

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值