通过top命令可以看到进程的相关信息
在 Ubuntu 下,top
命令可以监视即时的进程状态。通过man top查看了top的基本用法,在 top 中,按 u,再输入你的用户名,可以限定只显示以你的身份运行的进程,更方便观察。按 h 可得到帮助。
打印task_struct字段信息
可选:通过传递模块的参数,打印特定进程的相关信息
操作系统为了对进程更好的管理,专门用一个结构体来保存进程的相关的信息,这个结构体叫task_struct
在源码中查找有关这个结构体的信息,在/include/linux/sched.h
中定义如下:
struct task_struct {
unsigned int __state;
/*
* This begins the randomizable portion of task_struct. Only
* scheduling-critical items should be added above here.
*/
randomized_struct_fields_start
void *stack;
refcount_t usage;
/* Per task flags (PF_*), defined further below: */
unsigned int flags;
unsigned int ptrace
int on_rq;
int prio;
int static_prio;
int normal_prio;
unsigned int rt_priority;
struct sched_entity se;
struct sched_rt_entity rt;
struct sched_dl_entity dl;
const struct sched_class *sched_class;
struct sched_statistics stats;
unsigned int policy;
int nr_cpus_allowed;
const cpumask_t *cpus_ptr;
cpumask_t *user_cpus_ptr;
cpumask_t cpus_mask;
void *migration_pending;
unsigned short migration_flags;
struct sched_info sched_info;
struct list_head tasks; //指向进程PCB的指针
struct mm_struct *mm;
struct mm_struct *active_mm;
int exit_state;
int exit_code;
int exit_signal;
/* The signal sent when the parent dies: */
int pdeath_signal;
/* JOBCTL_*, siglock protected: */
unsigned long jobctl;
/* Used for emulating ABI behavior of previous Linux versions: */
unsigned int personality;
/* Scheduler bits, serialized by scheduler locks: */
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
/* Force alignment to the next boundary: */
unsigned :0;
/* Unserialized, strictly 'current' */
/*
* This field must not be in the scheduler word above due to wakelist
* queueing no longer being serialized by p->on_cpu. However:
*
* p->XXX = X; ttwu()
* schedule() if (p->on_rq && ..) // false
* smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true
* deactivate_task() ttwu_queue_wakelist())
* p->on_rq = 0; p->sched_remote_wakeup = Y;
*
* guarantees all stores of 'current' are visible before
* ->sched_remote_wakeup gets used, so it can be in this word.
*/
unsigned sched_remote_wakeup:1;
/* Bit to tell LSMs we're in execve(): */
unsigned in_execve:1;
unsigned in_iowait:1;
unsigned long atomic_flags; /* Flags requiring atomic access. */
struct restart_block restart_block;
pid_t pid; //进程pid
pid_t tgid //进程的线程pid
/* Real parent process: */
struct task_struct __rcu *real_parent; //亲生父亲进程
/* Recipient of SIGCHLD, wait4() reports: */
struct task_struct __rcu *parent; //养父进程
/*
* Children/sibling form the list of natural children:
*/
struct list_head children; //子进程链表
struct list_head sibling; //兄弟进程链表
struct task_struct *group_leader; //线程组的头进程
/*
* 'ptraced' is the list of tasks this task is using ptrace() on.
*
* This includes both natural children and PTRACE_ATTACH targets.
* 'ptrace_entry' is this task's link on the p->parent->ptraced list.
*/
struct list_head ptraced;
struct list_head ptrace_entry;
/* PID/PID hash table linkage. */
struct pid *thread_pid;
struct hlist_node pid_links[PIDTYPE_MAX];
struct list_head thread_group;
struct list_head thread_node;
struct completion *vfork_done;
/* CLONE_CHILD_SETTID: */
int __user *set_child_tid;
/* CLONE_CHILD_CLEARTID: */
int __user *clear_child_tid;
/* PF_KTHREAD | PF_IO_WORKER */
void *worker_private;
u64 utime;
u64 stime;
u64 gtime;
struct prev_cputime prev_cputime;
/* Context switch counts: */
unsigned long nvcsw;
unsigned long nivcsw;
/* Monotonic time in nsecs: */
u64 start_time;
/* Boot based time in nsecs: */
u64 start_boottime;
/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
unsigned long min_flt;
unsigned long maj_flt;
/* Empty if CONFIG_POSIX_CPUTIMERS=n */
struct posix_cputimers posix_cputimers;
/* Process credentials: */
/* Tracer's credentials at attach: */
const struct cred __rcu *ptracer_cred;
/* Objective and real subjective task credentials (COW): */
const struct cred __rcu *real_cred;
/* Effective (overridable) subjective task credentials (COW): */
const struct cred __rcu *cred;
char comm[TASK_COMM_LEN]; // 可执行程序的名字,包含路径
struct nameidata *nameidata
/* Filesystem information: */
struct fs_struct *fs;
/* Open file information: */
struct files_struct *files
/* Namespaces: */
struct nsproxy *nsproxy;
/* Signal handlers: */
struct signal_struct *signal;
struct sighand_struct __rcu *sighand;
sigset_t blocked;
sigset_t real_blocked;
/* Restored if set_restore_sigmask() was used: */
sigset_t saved_sigmask;
struct sigpending pending;
unsigned long sas_ss_sp;
size_t sas_ss_size;
unsigned int sas_ss_flags;
struct callback_head *task_works
struct seccomp seccomp;
struct syscall_user_dispatch syscall_dispatch;
/* Thread group tracking: */
u64 parent_exec_id;
u64 self_exec_id;
/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
spinlock_t alloc_lock;
/* Protection of the PI data structures: */
raw_spinlock_t pi_lock;
struct wake_q_node wake_q
/* Journalling filesystem info: */
void *journal_info;
/* Stacked block device info: */
struct bio_list *bio_list;
/* Stack plugging: */
struct blk_plug *plug;
/* VM state: */
struct reclaim_state *reclaim_state;
struct io_context *io_context;
/* Ptrace state: */
unsigned long ptrace_message;
kernel_siginfo_t *last_siginfo;
struct task_io_accounting ioac;
struct tlbflush_unmap_batch tlb_ubc;
/* Cache last used pipe for splice(): */
struct pipe_inode_info *splice_pipe;
struct page_frag task_frag;
/*
* When (nr_dirtied >= nr_dirtied_pause), it's time to call
* balance_dirty_pages() for a dirty throttling pause:
*/
int nr_dirtied;
int nr_dirtied_pause;
/* Start of a write-and-pause period: */
unsigned long dirty_paused_when;
/*
* Time slack values; these are used to round up poll() and
* select() etc timeout values. These are in nanoseconds.
*/
u64 timer_slack_ns;
u64 default_timer_slack_ns;
struct rcu_head rcu;
refcount_t rcu_users;
int pagefault_disabled;
};
思路
系统中的进程数量巨大,为了方便管理,于是推出了进程链表的概念,每个进程链表由指向PCB的指针组成,在struct task_struct中定义为tasks
字段。其大概结构如图所示:
其中进程链表的头指针和尾指针均是init_task,这个PCB是0号进程的,0号进程是一直存在于系统中的,不会被撤销。因此可以通过以前学习的链表的相关知识,遍历系统中的进程链表,进而访问每一个进程的PCB,从而打印进程的相关信息。
可以看出来,task_struct的成员有很多个,在这块我将打印以下几个属性:
cur->pid 进程号
cur->comm 进程名
cur->__state 进程状态
cur->exit_state 进程退出的状态
cur->exit_code 进程正常终止的状态码
cur->exit_signal 进程异常终止的信号
(cur->parent)->pid 父进程的pid
(cur->parent)->comm 父进程名
(cur->real_parent)->pid 亲生父亲进程的pid
(cur->real_parent)->comm 亲生父亲进程名
//utime和stime单位均为jiffies,它在 kernel/sched.c 文件中定义为一个全局变量:
long volatile jiffies=0;
它记录了从开机到当前时间的时钟中断发生次数
typedef unsigned long long u64;
u64 utime;//运行在用户空间的CPU时间
u64 stime;//运行在内核空间的CPU时间
编写代码
- Makefile
obj-m +=prmod.o
CURRENT_PATH:=$(shell pwd)
LINUX_KERNEL:=$(shell uname -r)
LINUX_KERNEL_PATH:=/usr/src/linux-headers-$(LINUX_KERNEL)
all:
make -C/lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C/lib/modules/$(shell uname -r)/build M=$(PWD) clean
- prmod.c
#include <linux/module.h>
#include <linux/init.h>
#include <linux/init_task.h>
#include<linux/list.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/kernel.h>
//入口函数
static int __init my_print_init(void)
{
struct task_struct *task,*p;
struct list_head *pos;
int count = 0;//计数器count
printk("there are some infomation about processes\n");
task = &init_task;//task设为双指针的头节点,让它指向0进程的PCB
list_for_each(pos,&task->tasks){//从双链表的头开始遍历,
p=list_entry(pos,struct task_struct,tasks);//找到结构体struct task_struct的tasks字段所在的结构体地址,即找到该进程的PCB
count++;
printk("第%d个进程信息如下:\n",count);
printk("name: %s, pid: %d, state: %d, exit_state: %d, exit_code: %d, exit_signal: %d, parent_pid: %d, parent_name: %s, utime: %d, stime: %d\n",p->comm,p->pid,p->__state,p->exit_state,p->exit_code,p->exit_signal,(p->parent)->pid,(p->parent)->comm,p->utime,p->stime);
}
printk("总共有%d个进程\n",count);
return 0;
}
//出口函数
static void __exit my_print_exit(void)
{
printk("Finished!\n");
}
module_init(my_print_init);
module_exit(my_print_exit);
MODULE_LICENSE("GPL");
运行结果
传参访问特定的进程
思路
要实现对进程的快速查找,链表相对来说是要花费大量时间的,因此引入了哈希表的概念。这是通过哈希函数把进程的pid转化成表的索引,这部分linux使用了宏pid_hashfn来实现。而linux当中提供了一些从pid获取到pcb的接口函数,例如find_get_pid()和pid_task()。
- 在源码当中查找,发现find_get_pid定义在
/kernel/pid.c
中
struct pid *find_get_pid(pid_t nr)
{
struct pid *pid; //定义了一个pid的结构体
//RCU下可访问
rcu_read_lock();
pid = get_pid(find_vpid(nr));
rcu_read_unlock();
return pid;
}
再查看find_vpid和get_pid的代码:
//通过进程号和进程命名空间指针来找到对应的pid结构体指针
struct pid *find_vpid(int nr)
{
return find_pid_ns(nr, task_active_pid_ns(current));
}
//idr是映射器,给定nr和命名空间指针来在idr中查找对应的pid结构体指针
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
return idr_find(&ns->idr, nr);
/**
* idr_find() - Return pointer for given ID.
* @idr: IDR handle.
* @id: Pointer ID.
*
* Looks up the pointer associated with this ID. A %NULL pointer may
* indicate that @id is not allocated or that the %NULL pointer was
* associated with this ID.
*
* This function can be called under rcu_read_lock(), given that the leaf
* pointers lifetimes are correctly managed.
*
* Return: The pointer associated with this ID.
*/
}
//返回给定task对应的进程命名空间指针
struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
return ns_of_pid(task_pid(tsk));
}
//返回当前的pid结构体指针的进程命名空间,如果当前的pid指针不存在,则给它赋值
static inline struct pid_namespace *ns_of_pid(struct pid *pid)
{
struct pid_namespace *ns = NULL;
if (pid)
ns = pid->numbers[pid->level].ns;
return ns;
}
//返回task结构体对应的的线程pid指针
static inline struct pid *task_pid(struct task_struct *task)
{
return task->thread_pid;
}
find_vpid的调用顺序如下:
get_pid代码:
static inline struct pid *get_pid(struct pid *pid)
{
if (pid)
refcount_inc(&pid->count);//增加结构体的引用计数
return pid; //返回该pid结构体
}
因此,find_get_pid是给定pid号找到pid号对应的struct pid指针
- pid_task都定义在
kernel/pid.c
中,查看源码:
struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
//rcu_dereference_check() 是一个用于读取RCU保护数据的宏
first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
//lockdep_tasklist_lock_is_held() 是一个用于判断当前任务列表锁是否被持有的函数。
lockdep_tasklist_lock_is_held());
if (first)
//通过给定的pid类型及first,找到对应的链表节点所在的结构体的指针,并保存在result返回
result = hlist_entry(first, struct task_struct, pid_links[(type)]);
}
return result;
}
//哈希表头节点
struct hlist_head {
struct hlist_node *first;
};
struct hlist_node {
struct hlist_node *next, **pprev;
};
//hlist_first_rcu(head)是取到哈希表的头指针的头节点
#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first)))
//通过member来获取它对应的type的指针
#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
综上所述,通过find_get_pid获取指定pid的pid结构体指针,再通过pid_task查找哈希表,并返回对应的PCB,然后就可以访问该进程的一切信息啦
编写代码
- code8.c
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/types.h>
#include<linux/pid.h>
int my_pid = 5;
module_param(my_pid,int,0644);
static int __init my_test_init(void)
{ struct pid* pid = find_get_pid(my_pid);
struct task_struct *p;
p = pid_task(pid,PIDTYPE_PID);
printk("pid为%d的信息如下:\n",my_pid);
if(p){
printk("name: %s, pid: %d, state: %d, exit_state: %d, exit_code: %d, exit_signal: %d, parent_pid: %d, parent_name: %s, utime: %d, stime: %d\n",p->comm,p->pid,p->__state,p->exit_state,p->exit_code,p->exit_signal,(p->parent)->pid,(p->parent)->comm,p->utime,p->stime);
}
return 0;
}
static void __exit my_test_exit(void)
{
printk("goodbye\n");
}
module_init(my_test_init);
module_exit(my_test_exit);
MODULE_LICENSE("GPL");
- Makefile
obj-m +=code8.o
CURRENT_PATH:=$(shell pwd)
LINUX_KERNEL:=$(shell uname -r)
LINUX_KERNEL_PATH:=/usr/src/linux-headers-$(LINUX_KERNEL)
all:
make -C/lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C/lib/modules/$(shell uname -r)/build M=$(PWD) clean
运行结果
插入模块后直接打印信息,打印的为pid为5的进程信息。
在插入模块的时候,重设要打印的pid为2,则打印pid为2的进程信息。
打印兄弟进程以及子进程
上图清晰的表明了task_struct结构中的parent、children、sibling之间的关系,可以看到:
(1)sibling.next是当兄弟进程存在时,就指向下一个兄弟进程的sibling成员,若兄弟进程不存在,则指向parent。而sibling.prev是指向前一个兄弟进程的sibling成员,但若没有上一个进程,则指向parent。
sibling是将所有的兄弟进程都链接成一个链表,链表头在父进程的sibling成员中
(2)children.next
是指向parent的第一个子进程的sibling成员,而children.prev
是指向parent的最后一个子进程的sibling
成员。
编写代码
obj-m +=brother.o
CURRENT_PATH:=$(shell pwd)
LINUX_KERNEL:=$(shell uname -r)
LINUX_KERNEL_PATH:=/usr/src/linux-headers-$(LINUX_KERNEL)
all:
make -C/lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C/lib/modules/$(shell uname -r)/build M=$(PWD) clean
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/init_task.h>
#include <linux/types.h>
#include <linux/atomic.h>
static int __init print_pcb(void)
{
struct task_struct *task,*p,*child,*bro;
struct list_head *pos,*childpos,*brother; //双向链表
int count=0;
printk("progress begin...\n");
task=&init_task; //指向0号进程pcb
list_for_each(pos,&task->tasks)
{
p=list_entry(pos,struct task_struct,tasks);
//此时的p指针已经指向task_struct结构体的首部,后面就可以通过p指针进行操作
count++;
printk("第%d个进程信息如下:\n",count);
printk("name: %s, pid: %d, parent_pid: %d",p->comm,p->pid,(p->parent)->pid);
//打印子进程的内容
list_for_each(childpos,&p->children){
child=list_entry(childpos,struct task_struct,sibling);
printk("子进程信息======> name: %s, pid: %d, parent_pid: %d",child->comm,child->pid,(child->parent)->pid);
}
//打印兄弟进程的内容
list_for_each(brother,&p->parent->children){
bro=list_entry(brother,struct task_struct,sibling);
if(bro){
printk("兄弟进程信息======> name: %s, pid: %d, parent_pid: %d",bro->comm,bro->pid,(bro->parent)->pid);
}
}
}
printk("进程的个数:%d\n",count);
return 0;
}
static void __exit exit_pcb(void)
{
printk("Exiting...\n");
}
module_init(print_pcb);
module_exit(exit_pcb);
MODULE_LICENSE("GPL");
运行结果
用pstree打印系统进程树如下:
可以看出,它把进程gvfsd-metadata的所有兄弟进程都打印出来了
小结
通过这次实验,首先复习了之前学习的链表以及模块传参部分内容,使我更加熟练的运用所学知识。其次学习了进程相关的代码,对书写代码方面有很大提升,在传参部分我遇到了问题,用了find__task_by_pid()这个函数后编译的时候发现报错,于是在源码里面搜索这个函数也没找到,然后问了chatgpt,大概的解释就是版本问题或者操作系统,然后chatgpt又推荐了相关函数,我在源码里面找到之后学习了相关函数并应用到代码之中,最终能实现从给定的pid号找到对应的PCB,这次实验收获很大!还要对于进程之间的亲属关系,在徐同学的帮助下使我对于进程的亲属关系理解的更加深刻!