进程控制块学习

橘子真好吃耶

已于 2023-10-11 13:47:25 修改

阅读量112

点赞数

分类专栏：进程管理文章标签：学习 linux

于 2023-10-07 13:33:54 首次发布

本文链接：https://blog.csdn.net/qq_43573047/article/details/133637182

版权

进程管理专栏收录该内容

3 篇文章 0 订阅

订阅专栏

通过top命令可以看到进程的相关信息

在 Ubuntu 下，top 命令可以监视即时的进程状态。通过man top查看了top的基本用法，在 top 中，按 u，再输入你的用户名，可以限定只显示以你的身份运行的进程，更方便观察。按 h 可得到帮助。

在这里插入图片描述

打印task_struct字段信息

可选：通过传递模块的参数，打印特定进程的相关信息

操作系统为了对进程更好的管理，专门用一个结构体来保存进程的相关的信息，这个结构体叫task_struct

在源码中查找有关这个结构体的信息，在/include/linux/sched.h中定义如下：

struct task_struct {

	unsigned int			__state;
	/*
	 * This begins the randomizable portion of task_struct. Only
	 * scheduling-critical items should be added above here.
	 */
	randomized_struct_fields_start

	void				*stack;
	refcount_t			usage;
	/* Per task flags (PF_*), defined further below: */
	unsigned int			flags;
	unsigned int			ptrace
	int				on_rq;

	int				prio;
	int				static_prio;
	int				normal_prio;
	unsigned int			rt_priority;

	struct sched_entity		se;
	struct sched_rt_entity		rt;
	struct sched_dl_entity		dl;
	const struct sched_class	*sched_class;

	struct sched_statistics         stats;
	unsigned int			policy;
	int				nr_cpus_allowed;
	const cpumask_t			*cpus_ptr;
	cpumask_t			*user_cpus_ptr;
	cpumask_t			cpus_mask;
	void				*migration_pending;

	unsigned short			migration_flags;
	struct sched_info		sched_info;

	struct list_head		tasks;       			//指向进程PCB的指针
	struct mm_struct		*mm;					
	struct mm_struct		*active_mm;

	int				exit_state;
	int				exit_code;
	int				exit_signal;
	/* The signal sent when the parent dies: */
	int				pdeath_signal;
	/* JOBCTL_*, siglock protected: */
	unsigned long			jobctl;

	/* Used for emulating ABI behavior of previous Linux versions: */
	unsigned int			personality;

	/* Scheduler bits, serialized by scheduler locks: */
	unsigned			sched_reset_on_fork:1;
	unsigned			sched_contributes_to_load:1;
	unsigned			sched_migrated:1;

	/* Force alignment to the next boundary: */
	unsigned			:0;

	/* Unserialized, strictly 'current' */

	/*
	 * This field must not be in the scheduler word above due to wakelist
	 * queueing no longer being serialized by p->on_cpu. However:
	 *
	 * p->XXX = X;			ttwu()
	 * schedule()			  if (p->on_rq && ..) // false
	 *   smp_mb__after_spinlock();	  if (smp_load_acquire(&p->on_cpu) && //true
	 *   deactivate_task()		      ttwu_queue_wakelist())
	 *     p->on_rq = 0;			p->sched_remote_wakeup = Y;
	 *
	 * guarantees all stores of 'current' are visible before
	 * ->sched_remote_wakeup gets used, so it can be in this word.
	 */
	unsigned			sched_remote_wakeup:1;

	/* Bit to tell LSMs we're in execve(): */
	unsigned			in_execve:1;
	unsigned			in_iowait:1;
	unsigned long			atomic_flags; /* Flags requiring atomic access. */

	struct restart_block		restart_block;
	
	pid_t				pid;				//进程pid
	pid_t				tgid				//进程的线程pid

	/* Real parent process: */
	struct task_struct __rcu	*real_parent;	//亲生父亲进程

	/* Recipient of SIGCHLD, wait4() reports: */
	struct task_struct __rcu	*parent;		//养父进程

	/*
	 * Children/sibling form the list of natural children:
	 */
	struct list_head		children;            //子进程链表
	struct list_head		sibling;			//兄弟进程链表
	struct task_struct		*group_leader;		//线程组的头进程

	/*
	 * 'ptraced' is the list of tasks this task is using ptrace() on.
	 *
	 * This includes both natural children and PTRACE_ATTACH targets.
	 * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
	 */
	struct list_head		ptraced;
	struct list_head		ptrace_entry;

	/* PID/PID hash table linkage. */
	struct pid			*thread_pid;
	struct hlist_node		pid_links[PIDTYPE_MAX];
	struct list_head		thread_group;
	struct list_head		thread_node;

	struct completion		*vfork_done;

	/* CLONE_CHILD_SETTID: */
	int __user			*set_child_tid;

	/* CLONE_CHILD_CLEARTID: */
	int __user			*clear_child_tid;

	/* PF_KTHREAD | PF_IO_WORKER */
	void				*worker_private;

	u64				utime;
	u64				stime;

	u64				gtime;
	struct prev_cputime		prev_cputime;

	/* Context switch counts: */
	unsigned long			nvcsw;
	unsigned long			nivcsw;

	/* Monotonic time in nsecs: */
	u64				start_time;

	/* Boot based time in nsecs: */
	u64				start_boottime;

	/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
	unsigned long			min_flt;
	unsigned long			maj_flt;

	/* Empty if CONFIG_POSIX_CPUTIMERS=n */
	struct posix_cputimers		posix_cputimers;

	/* Process credentials: */

	/* Tracer's credentials at attach: */
	const struct cred __rcu		*ptracer_cred;

	/* Objective and real subjective task credentials (COW): */
	const struct cred __rcu		*real_cred;

	/* Effective (overridable) subjective task credentials (COW): */
	const struct cred __rcu		*cred;

	char				comm[TASK_COMM_LEN];     // 可执行程序的名字，包含路径

	struct nameidata		*nameidata
	/* Filesystem information: */
	struct fs_struct		*fs;

	/* Open file information: */
	struct files_struct		*files
	/* Namespaces: */
	struct nsproxy			*nsproxy;

	/* Signal handlers: */
	struct signal_struct		*signal;
	struct sighand_struct __rcu		*sighand;
	sigset_t			blocked;
	sigset_t			real_blocked;
	/* Restored if set_restore_sigmask() was used: */
	sigset_t			saved_sigmask;
	struct sigpending		pending;
	unsigned long			sas_ss_sp;
	size_t				sas_ss_size;
	unsigned int			sas_ss_flags;

	struct callback_head		*task_works
	struct seccomp			seccomp;
	struct syscall_user_dispatch	syscall_dispatch;

	/* Thread group tracking: */
	u64				parent_exec_id;
	u64				self_exec_id;

	/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
	spinlock_t			alloc_lock;

	/* Protection of the PI data structures: */
	raw_spinlock_t			pi_lock;

	struct wake_q_node		wake_q
	/* Journalling filesystem info: */
	void				*journal_info;

	/* Stacked block device info: */
	struct bio_list			*bio_list;

	/* Stack plugging: */
	struct blk_plug			*plug;

	/* VM state: */
	struct reclaim_state		*reclaim_state;

	struct io_context		*io_context;
	/* Ptrace state: */
	unsigned long			ptrace_message;
	kernel_siginfo_t		*last_siginfo;

	struct task_io_accounting	ioac;
	struct tlbflush_unmap_batch	tlb_ubc;

	/* Cache last used pipe for splice(): */
	struct pipe_inode_info		*splice_pipe;

	struct page_frag		task_frag;

	/*
	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
	 * balance_dirty_pages() for a dirty throttling pause:
	 */
	int				nr_dirtied;
	int				nr_dirtied_pause;
	/* Start of a write-and-pause period: */
	unsigned long			dirty_paused_when;
	/*
	 * Time slack values; these are used to round up poll() and
	 * select() etc timeout values. These are in nanoseconds.
	 */
	u64				timer_slack_ns;
	u64				default_timer_slack_ns;

	struct rcu_head			rcu;
	refcount_t			rcu_users;
	int				pagefault_disabled;
};

思路

系统中的进程数量巨大，为了方便管理，于是推出了进程链表的概念，每个进程链表由指向PCB的指针组成，在struct task_struct中定义为tasks字段。其大概结构如图所示：

在这里插入图片描述

其中进程链表的头指针和尾指针均是init_task，这个PCB是0号进程的，0号进程是一直存在于系统中的，不会被撤销。因此可以通过以前学习的链表的相关知识，遍历系统中的进程链表，进而访问每一个进程的PCB，从而打印进程的相关信息。

可以看出来，task_struct的成员有很多个，在这块我将打印以下几个属性：

cur->pid 进程号

cur->comm 进程名

cur->__state 进程状态

cur->exit_state 进程退出的状态

cur->exit_code  进程正常终止的状态码

cur->exit_signal 进程异常终止的信号

(cur->parent)->pid 父进程的pid
(cur->parent)->comm 父进程名

(cur->real_parent)->pid  亲生父亲进程的pid
(cur->real_parent)->comm  亲生父亲进程名

//utime和stime单位均为jiffies，它在 kernel/sched.c 文件中定义为一个全局变量：
	long volatile jiffies=0;
它记录了从开机到当前时间的时钟中断发生次数
typedef unsigned long long	u64;


u64				utime;//运行在用户空间的CPU时间
u64				stime;//运行在内核空间的CPU时间

编写代码

Makefile

obj-m +=prmod.o
CURRENT_PATH:=$(shell pwd)
LINUX_KERNEL:=$(shell uname -r)
LINUX_KERNEL_PATH:=/usr/src/linux-headers-$(LINUX_KERNEL)
all:
	make -C/lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
	make -C/lib/modules/$(shell uname -r)/build M=$(PWD) clean

prmod.c

#include <linux/module.h>
#include <linux/init.h>
#include <linux/init_task.h>
#include<linux/list.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/kernel.h> 
//入口函数
static int __init my_print_init(void)
{
   struct task_struct *task,*p;
   struct list_head *pos;
    
   int count = 0;//计数器count
    printk("there are some infomation about processes\n");
    task = &init_task;//task设为双指针的头节点，让它指向0进程的PCB
    list_for_each(pos,&task->tasks){//从双链表的头开始遍历，
        p=list_entry(pos,struct task_struct,tasks);//找到结构体struct task_struct的tasks字段所在的结构体地址，即找到该进程的PCB
        count++;
        printk("第%d个进程信息如下：\n",count);
                printk("name: %s, pid: %d, state: %d, exit_state: %d, exit_code: %d, exit_signal: %d, parent_pid: %d, parent_name: %s,  utime: %d, stime: %d\n",p->comm,p->pid,p->__state,p->exit_state,p->exit_code,p->exit_signal,(p->parent)->pid,(p->parent)->comm,p->utime,p->stime);
               
    }   
    printk("总共有%d个进程\n",count);
    return 0;
}
//出口函数
static void __exit my_print_exit(void)
{
    printk("Finished!\n");
}
module_init(my_print_init);
module_exit(my_print_exit);
MODULE_LICENSE("GPL");

运行结果

在这里插入图片描述

传参访问特定的进程

思路

要实现对进程的快速查找，链表相对来说是要花费大量时间的，因此引入了哈希表的概念。这是通过哈希函数把进程的pid转化成表的索引，这部分linux使用了宏pid_hashfn来实现。而linux当中提供了一些从pid获取到pcb的接口函数，例如find_get_pid()和pid_task()。

在源码当中查找，发现find_get_pid定义在/kernel/pid.c中

struct pid *find_get_pid(pid_t nr)
{
	struct pid *pid; //定义了一个pid的结构体
	//RCU下可访问
	rcu_read_lock(); 
	pid = get_pid(find_vpid(nr)); 
	
	rcu_read_unlock();

	return pid;
}

再查看find_vpid和get_pid的代码：

//通过进程号和进程命名空间指针来找到对应的pid结构体指针
struct pid *find_vpid(int nr)
{
	return find_pid_ns(nr, task_active_pid_ns(current));
}

//idr是映射器，给定nr和命名空间指针来在idr中查找对应的pid结构体指针
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
	return idr_find(&ns->idr, nr);
        /**
         * idr_find() - Return pointer for given ID.
         * @idr: IDR handle.
         * @id: Pointer ID.
         *
         * Looks up the pointer associated with this ID.  A %NULL pointer may
         * indicate that @id is not allocated or that the %NULL pointer was
         * associated with this ID.
         *
         * This function can be called under rcu_read_lock(), given that the leaf
         * pointers lifetimes are correctly managed.
         *
         * Return: The pointer associated with this ID.
         */
}

//返回给定task对应的进程命名空间指针
struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
	return ns_of_pid(task_pid(tsk));
}

//返回当前的pid结构体指针的进程命名空间，如果当前的pid指针不存在，则给它赋值
static inline struct pid_namespace *ns_of_pid(struct pid *pid)
{
	struct pid_namespace *ns = NULL;
	if (pid)
		ns = pid->numbers[pid->level].ns;
	return ns;
}

//返回task结构体对应的的线程pid指针
static inline struct pid *task_pid(struct task_struct *task)
{
	return task->thread_pid; 
}

find_vpid的调用顺序如下：

在这里插入图片描述

get_pid代码：

static inline struct pid *get_pid(struct pid *pid)
{
	if (pid)
		refcount_inc(&pid->count);//增加结构体的引用计数
	return pid;   //返回该pid结构体
}

因此，find_get_pid是给定pid号找到pid号对应的struct pid指针

pid_task都定义在kernel/pid.c中，查看源码：

struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
	struct task_struct *result = NULL;
	if (pid) {
		struct hlist_node *first;
		//rcu_dereference_check() 是一个用于读取RCU保护数据的宏
		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
		//lockdep_tasklist_lock_is_held() 是一个用于判断当前任务列表锁是否被持有的函数。
					      lockdep_tasklist_lock_is_held());
		if (first)
            //通过给定的pid类型及first，找到对应的链表节点所在的结构体的指针，并保存在result返回
			result = hlist_entry(first, struct task_struct, pid_links[(type)]);
	}
	return result;
}

//哈希表头节点
struct hlist_head {
	struct hlist_node *first;
};

struct hlist_node {
	struct hlist_node *next, **pprev;
};
//hlist_first_rcu(head)是取到哈希表的头指针的头节点
#define hlist_first_rcu(head)	(*((struct hlist_node __rcu **)(&(head)->first)))
//通过member来获取它对应的type的指针
#define hlist_entry(ptr, type, member) container_of(ptr,type,member)

综上所述，通过find_get_pid获取指定pid的pid结构体指针，再通过pid_task查找哈希表，并返回对应的PCB，然后就可以访问该进程的一切信息啦

编写代码

code8.c

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h>  
#include <linux/types.h>
#include<linux/pid.h>

int my_pid = 5;
module_param(my_pid,int,0644);
static int __init my_test_init(void)
{   struct pid* pid = find_get_pid(my_pid);
   struct task_struct *p;
   p = pid_task(pid,PIDTYPE_PID);
    printk("pid为%d的信息如下：\n",my_pid);
if(p){
     printk("name: %s, pid: %d, state: %d, exit_state: %d, exit_code: %d, exit_signal: %d, parent_pid: %d, parent_name: %s,  utime: %d, stime: %d\n",p->comm,p->pid,p->__state,p->exit_state,p->exit_code,p->exit_signal,(p->parent)->pid,(p->parent)->comm,p->utime,p->stime);
}          
     
    return 0;
}
static void __exit my_test_exit(void)
{
    printk("goodbye\n");
}
module_init(my_test_init);
module_exit(my_test_exit);
MODULE_LICENSE("GPL");

Makefile

obj-m +=code8.o
CURRENT_PATH:=$(shell pwd)
LINUX_KERNEL:=$(shell uname -r)
LINUX_KERNEL_PATH:=/usr/src/linux-headers-$(LINUX_KERNEL)
all:
	make -C/lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
	make -C/lib/modules/$(shell uname -r)/build M=$(PWD) clean

运行结果

插入模块后直接打印信息，打印的为pid为5的进程信息。

在这里插入图片描述

在插入模块的时候，重设要打印的pid为2，则打印pid为2的进程信息。

在这里插入图片描述

打印兄弟进程以及子进程

在这里插入图片描述

上图清晰的表明了task_struct结构中的parent、children、sibling之间的关系，可以看到：

（1）sibling.next是当兄弟进程存在时，就指向下一个兄弟进程的sibling成员，若兄弟进程不存在，则指向parent。而sibling.prev是指向前一个兄弟进程的sibling成员，但若没有上一个进程，则指向parent。

sibling是将所有的兄弟进程都链接成一个链表，链表头在父进程的sibling成员中

（2）children.next是指向parent的第一个子进程的sibling成员，而children.prev是指向parent的最后一个子进程的sibling成员。

编写代码

obj-m +=brother.o
CURRENT_PATH:=$(shell pwd)
LINUX_KERNEL:=$(shell uname -r)
LINUX_KERNEL_PATH:=/usr/src/linux-headers-$(LINUX_KERNEL)
all:
	make -C/lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
	make -C/lib/modules/$(shell uname -r)/build M=$(PWD) clean

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/init_task.h>   
#include <linux/types.h>
#include <linux/atomic.h>

static int __init print_pcb(void) 
{
        struct task_struct *task,*p,*child,*bro;
        struct list_head *pos,*childpos,*brother;   //双向链表
        int count=0;    
 
        printk("progress begin...\n");
        task=&init_task;  //指向0号进程pcb
        
        list_for_each(pos,&task->tasks)
        {
                p=list_entry(pos,struct task_struct,tasks);
                //此时的p指针已经指向task_struct结构体的首部，后面就可以通过p指针进行操作
                count++;
                printk("第%d个进程信息如下：\n",count);
                printk("name: %s, pid: %d, parent_pid: %d",p->comm,p->pid,(p->parent)->pid);

                //打印子进程的内容
                 list_for_each(childpos,&p->children){
                     child=list_entry(childpos,struct task_struct,sibling);
                     printk("子进程信息======> name: %s, pid: %d, parent_pid: %d",child->comm,child->pid,(child->parent)->pid);
                    
                 }
                 
                //打印兄弟进程的内容               
                list_for_each(brother,&p->parent->children){
                        bro=list_entry(brother,struct task_struct,sibling);
                        if(bro){
                                printk("兄弟进程信息======> name: %s, pid: %d, parent_pid: %d",bro->comm,bro->pid,(bro->parent)->pid); 
                        }
                }
        }
        printk("进程的个数:%d\n",count);
 
        return 0;
} 
static void __exit exit_pcb(void)
{
        printk("Exiting...\n");
} 
module_init(print_pcb);
module_exit(exit_pcb);

MODULE_LICENSE("GPL");

运行结果

用pstree打印系统进程树如下：

在这里插入图片描述

可以看出，它把进程gvfsd-metadata的所有兄弟进程都打印出来了

小结

通过这次实验，首先复习了之前学习的链表以及模块传参部分内容，使我更加熟练的运用所学知识。其次学习了进程相关的代码，对书写代码方面有很大提升，在传参部分我遇到了问题，用了find__task_by_pid()这个函数后编译的时候发现报错，于是在源码里面搜索这个函数也没找到，然后问了chatgpt，大概的解释就是版本问题或者操作系统，然后chatgpt又推荐了相关函数，我在源码里面找到之后学习了相关函数并应用到代码之中，最终能实现从给定的pid号找到对应的PCB，这次实验收获很大！还要对于进程之间的亲属关系，在徐同学的帮助下使我对于进程的亲属关系理解的更加深刻！

橘子真好吃耶

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
进程控制块学习

其次学习了进程相关的代码，对书写代码方面有很大提升，在传参部分我遇到了问题，用了find__task_by_pid()这个函数后编译的时候发现报错，于是在源码里面搜索这个函数也没找到，然后问了chatgpt，大概的解释就是版本问题或者操作系统，然后chatgpt又推荐了相关函数，我在源码里面找到之后学习了相关函数并应用到代码之中，最终能实现从给定的pid号找到对应的PCB，这次实验收获很大！因此可以通过以前学习的链表的相关知识，遍历系统中的进程链表，进而访问每一个进程的PCB，从而打印进程的相关信息。
复制链接

扫一扫