Linux 进程 PID 管理

JiMoKuangXiangQu

已于 2024-07-15 09:46:36 修改

阅读量596

点赞数 22

分类专栏： # 进程调度文章标签： Linux PID 管理 pid_namespace

于 2024-07-12 16:56:34 首次发布

本文链接：https://blog.csdn.net/JiMoKuangXiangQu/article/details/140342904

版权

进程调度专栏收录该内容

9 篇文章 0 订阅

订阅专栏

文章目录

1. 前言
2. 进程 PID 相关数据结构
3. 进程 PID 的构建
4. 进程 PID 管理相关接口
5. 进程 PID 的层级结构
6. PID 用户空间接口
7. 命名空间观察工具
8. 后记

1. 前言

限于作者能力水平，本文可能存在谬误，因此而给读者带来的损失，作者不做任何承诺。

2. 进程 PID 相关数据结构

/* include/linux/pid_namespace.h */

/* 进程 PID 命名空间对象 */
struct pid_namespace {
	...
	struct pidmap pidmap[PIDMAP_ENTRIES]; /* 空闲 PID 管理位图 */
	...
	struct kmem_cache *pid_cachep; /* 当前层级 struct pid 对象分配缓存，alloc_pid() 从中分配 struct pid 对象 */
	unsigned int level; /* PID 命名空间 层级编号，从 0 开始编号 */
	struct pid_namespace *parent; /* 父级 pid_namespace (@level - 1) */
	...
};

/* include/linux/nsproxy.h */

/* 命名空间代理对象: 包含指向各类型命名空间对象的指针 */
struct nsproxy {
	...
	struct pid_namespace *pid_ns_for_children; /* 进程关联的 PID 命名空间 */
	...
};

enum pid_type
{
	PIDTYPE_PID, /* 进程 PID */
	PIDTYPE_PGID, /* 进程组 ID(进程组 领头进程的 PID) */
	PIDTYPE_SID, /* session ID */
	PIDTYPE_MAX,
	/* only valid to __task_pid_nr_ns() */
 	/*
	 * 线程组 ID(线程组 group leader 进程的 PIDTYPE_PID 类型 PID)，
	 * 可通过进程所在 线程组的 group leader 进程 task_struct::group_leader
	 * 的 task_struct::pids[PIDTYPE_PID] 信息获取，所以无需在进程中维护一个
	 * __PIDTYPE_TGID 的 pid 信息，这也是 __PIDTYPE_TGID 定义在 PIDTYPE_MAX
	 * 之后的原因。
	 * 但这一点，在更新版本的内核中已经有所变化。
	 */
	__PIDTYPE_TGID
};

/* include/linux/pid.h */
struct upid {
	/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
	int nr; /* getpid(), gettid() 等 API 返回的值，来自于这里 */
	struct pid_namespace *ns; /* 关联的 PID 命名空间 */
	struct hlist_node pid_chain; /* 用于挂接到全局 PID 哈希表 pid_hash[] */
};

struct pid {
	...
	unsigned int level; /* 所属 pid_namespace 的层级编号。level 从 0 开始编号 */
	/*
	 * 使用当前 struct pid 的 任务列表：
	 * (1) tasks[PIDTYPE_PID]
	 *     使用当前 struct pid 作为 PID 的进程列表。
	 * (2) tasks[PIDTYPE_PGID]
	 *     使用当前 struct pid 作为 PGID 的进程列表。
	 *     同一 pid_namespace(即同一层级) 内的所有进程共享 pid_namespace 内
	 *     首进程的 struct pid 。
	 * (3) tasks[PIDTYPE_SID]
	 *     使用当前 struct pid 作为 SID 的进程列表。
	 *     同一 pid_namespace(即同一层级)内的所有进程共享 pid_namespace 内
	 *     首进程的 struct pid 。
	 * 
	 * 这 3 个哈希链表的构建细节参考函数 attach_pid(), 每个进程通过
	 * struct task_struct::pids[PIDTYPE_*].node 挂接到 struct pid::tasks[PIDTYPE_*] 
	 * 哈希链表，PIDTYPE_* 取值为 {PIDTYPE_PID, PIDTYPE_PGID, PIDTYPE_SID} 。
	 */
	struct hlist_head tasks[PIDTYPE_MAX];
	...
	/*
	 * numbers[] 的长度和其所属的 PID 命名空间的层级有关，其长度为 level + 1，
	 * 这由 struct upid 关联的 PID 命名空间 struct upid::ns 中，struct pid 
	 * 分配缓存 struct pid_namespace::pid_cachep 决定。
	 * 更多细节见后面的代码分析。
	 */
	struct upid numbers[1];
};

struct pid_link {
	struct hlist_node node; /* 用来将进程添加到所用 struct pid 的 tasks[PIDTYPE_*] 哈希链表 */
	struct pid *pid;
};

/* include/linux/sched.h */

/* 进程管理对象 */
struct task_struct {
	...
	struct task_struct  *group_leader; /* 【线程组】 leader 进程 */
	...
	struct nsproxy   *nsproxy; /* 进程关联的各种命令空间，这里只关注 PID 命名空间 (pid_namespace) */
	...
	struct pid_link   pids[PIDTYPE_MAX]; /* 进程的 PID, PGID, SID, TGID 管理数据 */
	...
};

对上面这些数据结构的作用，择重做一个扼要介绍：

struct pid_namespace
PID 命名空间。一方面，PID 命名空间用来实现 PID 隔离，允许进程在不同的PID 命名空间中有各自独立的 PID；另一方面，PID 命名空间也实现了 PID 的层级结构。PID 命名空间 组织结构如下图：

在这里插入图片描述

上图中，数据标记的格式为：level.PID：

. {0.1，0.2，0.3} 表示有 3 个进程，位于 `level 0 PID 命名空间`，
  它们的 `PID` 分别为 {1，2，3}。
. {0.4 1.1, 0.5 1.2} 表示有 2 个进程，位于 `level 1 PID 命名空间`，
  它们在 `level 0 PID 命名空间` 的 PID 分别为 {4，5}；
  它们在 `level 1 PID 命名空间` 的 PID 分别为 {1，2}。
. {0.6 1.3 2.1，0.7 1.4 2.2} 表示有 2 个进程，位于 `level 2 PID 命名空间`，
  它们在 `level 0 PID 命名空间` 的 PID 分别为 {6，7}；
  它们在 `level 1 PID 命名空间` 的 PID 分别为 {3，4}；
  它们在 `level 2 PID 命名空间` 的 PID 分别为 {1，2}。

struct upid
主要用来记录进程在某一PID 命名空间中的 PID (struct upid::nr)，以及 PID 所在的PID 命名空间 (struct upid::ns)。
struct pid
主要用来记录进程所处PID 命名空间层级(struct pid::level)，以及在所有PID 命名空间层级中的 PID (struct pid::numbers[]) 。

3. 进程 PID 的构建

3.1 第一个进程 PID 构建

Linux 系统中第一个进程，init_task，其 PID 是静态构建的。细节如下：

/* init/init_task.c */

/* Initial task structure */
struct task_struct init_task = INIT_TASK(init_task);
EXPORT_SYMBOL(init_task);

/* include/linux/init_task.h */

/*
 *  INIT_TASK is used to set up the first task table, touch at
 * your own risk!. Base=0, limit=0x1fffff (=2MB)
 */
#define INIT_TASK(tsk) \
{
	... \
	.real_parent = &tsk,      \
	.parent  = &tsk,      \
	... \
	.group_leader = &tsk,      \
	... \
	.nsproxy = &init_nsproxy, /* 进程 PID 命名空间管理数据 */ \
	... \
	/* 进程 PID 管理数据 */ \
	.pids = {       \
		[PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),  \
		[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),  \
		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),  \
	},        \
	... \
}

init_task 的 PID 命名空间管理数据：

/* kernel/nsproxy.c */

struct nsproxy init_nsproxy = {
	...
	.pid_ns_for_children = &init_pid_ns, /* init_task 的 PID 命名空间管理数据 */
	...
};

/* kernel/pid.c */

struct pid_namespace init_pid_ns = { /* 系统第一个、位于 level 0 的 PID 命名空间对象 */
	...
	.level = 0, /* init_task 位于 level 0 PID 命名空间 */
	.child_reaper = &init_task,
	...
#ifdef CONFIG_PID_NS
	.ns.ops = &pidns_operations,
#endif
};

init_task 的 PID 管理数据：

/* include/linux/init_task.h */

#define INIT_PID_LINK(type)      \
{        \
	.node = {      \
		.next = NULL,     \
		.pprev = NULL,     \
	},       \
	.pid = &init_struct_pid,    \
}

/* kernel/pid.c */

struct pid init_struct_pid = INIT_STRUCT_PID;

#define INIT_STRUCT_PID {      \
	.count   = ATOMIC_INIT(1),    \
	.tasks  = {      \
		{ .first = NULL },     \
		{ .first = NULL },     \
		{ .first = NULL },     \
	},        \
	.level  = 0,  /* init_task 处于 level 0 的 PID 命名空间 */    \
	.numbers = { \
		{      \
			.nr  = 0,  /* init_task 的 PID, PGID, SID 均为 0 */   \
			.ns  = &init_pid_ns,  /* init_task 所属的 PID 命名空间 */  \
			.pid_chain = { .next = NULL, .pprev = NULL }, \
		}, \
	}        \
}

从上面的代码可以了解到：

. init_task 处于 level 0 的 PID 命名空间
. init_task 的 PID，PGID，SID 均为 0
. init_task 位于 PID 命名空间 init_pid_ns 内

说了半天，还不知道 Linux 系统中第一个进程，init_task，到底是哪位。嗯，start_kernel() 熟悉吧？在 BOOT CPU 上运行的 start_kernel() 所在的执行序列，就是 init_task。

3.2 第二个进程 PID 的构建过程

init_task 在 start_kernel() 中执行部分系统初始化工作后，将创建系统中第二个进程来执行剩余的初始化工作：

start_kernel()
	...
	/*
	 * 全局 PID 哈希表空间分配，用来存储系统中
	 * 所有层级 PID 命名空间中所有 struct upid 。
	 */
	pidhash_init()
		pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
				HASH_EARLY | HASH_SMALL | HASH_ZERO, &pidhash_shift, NULL, 0, 4096);
	...
	/*
	 * . 设置默认允许的最大、最小 PID 值
	 * . 分配 level 0 PID 命名空间 进程 PID 管理位图
	 * . 创建 level 0 PID 命名空间 的 struct pid 分配缓存
	 */
	pidmap_init()
		...
		/* bump default and minimum pid_max based on number of cpus */
		pid_max = min(pid_max_max, max_t(int, pid_max, 
				PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
		pid_max_min = max_t(int, pid_max_min,
				PIDS_PER_CPU_MIN * num_possible_cpus());
		pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);

		/* 分配 level 0 PID 命名空间 进程 PID 管理位图 */
		init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
		/* Reserve PID 0. We never call free_pidmap(0) */
		/* 保留 level 0 PID 命名空间 进程 PID 0 */
		set_bit(0, init_pid_ns.pidmap[0].page);
		atomic_dec(&init_pid_ns.pidmap[0].nr_free);

		/* 创建 level 0 PID 命名空间 的 struct pid 分配缓存 */
		init_pid_ns.pid_cachep = KMEM_CACHE(pid,
				SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
	...
	/* 创建系统中 第二个进程 来执行剩余的初始化工作 */
	rest_init()
		pid = kernel_thread(kernel_init, NULL, CLONE_FS);
			_do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
				(unsigned long)arg, NULL, NULL, 0);

/* kernel/fork.c */

long _do_fork(unsigned long clone_flags,
	unsigned long stack_start,
	unsigned long stack_size,
	int __user *parent_tidptr,
	int __user *child_tidptr,
	unsigned long tls)
{
	struct task_struct *p;
	...
	long nr;
	
	...
	p = copy_process(clone_flags, stack_start, stack_size,
		child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
	...

	if (!IS_ERR(p)) {
		...
		struct pid *pid;
		
		...
		
		pid = get_task_pid(p, PIDTYPE_PID);
		nr = pid_vnr(pid); /* 进程 在当前层级 PID 命名空间中 的 PID */

		...
		
		put_pid(pid);
	} else {
		nr = PTR_ERR(p);
	}
	return nr; /* 返回 进程 在当前层级 PID 命名空间 中 的 PID */
}

static __latent_entropy struct task_struct *copy_process(
					unsigned long clone_flags,
					unsigned long stack_start,
					unsigned long stack_size,
					int __user *child_tidptr,
					struct pid *pid,
					int trace,
					unsigned long tls,
					int node)
{
	int retval;
	struct task_struct *p;

	...
	/* 分配进程结构体 task_struct，复制当前进程 @current 的信息到新进程 @p (包括 PID 数据) */
	p = dup_task_struct(current, node);
	...
	/* 
	 * 在设置了 CLONE_NEWNS,...,CLONE_NEWPID,...  等标记的情形， 
	 * 按需新建各种 namespace (包括 PID 命名空间 pid_namespace) 。
	 * 
	 * 创建 第二个进程时没有设置对应标志位，所以不会创建新的 PID 命名空间。
	 */
	retval = copy_namespaces(clone_flags, p);
	...
	/* 为新进程分配 PID 管理数据 */
	if (pid != &init_struct_pid) {
		pid = alloc_pid(p->nsproxy->pid_ns_for_children);
		...
	}
	...
	/* ok, now we should be set up.. */
	p->pid = pid_nr(pid); /* 记录进程在 level 0 PID 命名空间 中 的 PID */
	if (clone_flags & CLONE_THREAD) { /* 线程组内 非 group leader 进程 */
		...
		/* 设置 线程组内 非 group leader 进程 的 group leader */
		p->group_leader = current->group_leader;
		p->tgid = current->tgid;
	} else { /* 线程组的 group leader 进程 */
		...
		p->group_leader = p; /* 线程组内 group leader 进程 的 group leader 为自身 */
		p->tgid = p->pid; /* 线程组 group leader 进程: tgid == pid */
	}
	...
	if (likely(p->pid)) {
		...
		init_task_pid(p, PIDTYPE_PID, pid); /* 设置进程 @p 的 PID 信息 */
		if (thread_group_leader(p)) { /* 如果是 线程组 group leader, */
			init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); /* 设置进程 @p 的 PGID 信息 */
			init_task_pid(p, PIDTYPE_SID, task_session(current)); /* 设置进程 @p 的 SID 信息 */
			...
			attach_pid(p, PIDTYPE_PGID); /* 将进程 @p 添加到关联 struct pid 的 PGID 类型哈希链表 */
			attach_pid(p, PIDTYPE_SID); /* 将进程 @p 添加到关联 struct pid 的 SID 类型哈希链表 */
			...
		} else {
			...
		}
		attach_pid(p, PIDTYPE_PID); /* 将进程 @p 添加到关联 struct pid 的 PID 类型哈希链表 */
		nr_threads++;
	}
	...
}

3.2.1 从当前进程复制进程 PID 信息

/* kernel/fork.c */

p = dup_task_struct(current, node);
	struct task_struct *tsk;
	
	...
	tsk = alloc_task_struct_node(node); /* 分配进程结构体 task_struct */
	...
	/* !!! 复制旧进程的 task_struct 数据 @orig 到 新进程 @tsk （包括 PID 信息） */
	err = arch_dup_task_struct(tsk, orig);
		*dst = *src;
		return 0;
	...

3.2.2 创建每进程的 PID 管理数据 (`struct pid`) 并初始化

通过接口 alloc_pid() 创建每进程的 PID 管理数据 (struct pid) 并初始化：

/* kernel/pid.c */

struct pid *alloc_pid(struct pid_namespace *ns)
{
	struct pid *pid;
	enum pid_type type;
	int i, nr;
	struct pid_namespace *tmp;
	struct upid *upid;
	int retval = -ENOMEM;

	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); /* 分配 struct pid 对象 */
	...

	tmp = ns;
	pid->level = ns->level;
	/* 在每个层级的 pid_namespace 中, 分配一个 进程 PID */
	for (i = ns->level; i >= 0; i--) {
		nr = alloc_pidmap(tmp); /* 从当前层级 @i 的 pid_namespace 中分配一个空闲的 进程 PID */
		...

		pid->numbers[i].nr = nr; /* 记录在当前层级 @i 的 pid_namespace 中的 进程 PID */
		pid->numbers[i].ns = tmp; /* 记录当前层级 @i 关联的 pid_namesapce 对象 */
		tmp = tmp->parent; /* 进入父级 pid_namespace (@i - 1) */
	}

	...
	/* 使用 @pid 的 PID,PGID,SID 类型 进程列表 初始为空 */
	for (type = 0; type < PIDTYPE_MAX; ++type)
		INIT_HLIST_HEAD(&pid->tasks[type]);

	upid = pid->numbers + ns->level;
	spin_lock_irq(&pidmap_lock);
	...
	/* 将每层级 pid_namespace 为进程分配的 PID, 插入到全局 PID 哈希表 @pid_hash */
	for ( ; upid >= pid->numbers; --upid) {
		hlist_add_head_rcu(&upid->pid_chain,
				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
		upid->ns->nr_hashed++;
	}
	spin_unlock_irq(&pidmap_lock);

	return pid; /* 返回分配的 struct pid 对象 */

	...
}

3.2.3 绑定进程和其相关的 PID 管理数据

通过接口 init_task_pid() 设定进程 PID、PGID、SID 管理数据，实现将 PID 管理数据 struct pid 绑定到进程；通过接口 attach_pid() 将进程添加到 PID 管理数据 struct pid 的 PID、PGID、SID 类型哈希链表。如此，实现了进程和 PID 管理数据 struct pid 的双向绑定。其中，PGID、SID 的绑定，仅针对线程组的 group leader 进程，非 group leader 的 PGID、SID 信息，通过 dup_task_struct() 间接或直接复制自其所在 PID 命名空间对象 struct pid_namespace (如 init_pid_ns) 。

init_task_pid(p, PIDTYPE_PID, pid); /* 设置进程 @p 的 PID 信息 */
init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); /* 设置进程 @p 的 PGID 信息 */
init_task_pid(p, PIDTYPE_SID, task_session(current)); /* 设置进程 @p 的 SID 信息 */

static inline struct pid *task_pgrp(struct task_struct *task)
{
	/* @group_leader 的设置，见前面 copy_process() */
	return task->group_leader->pids[PIDTYPE_PGID].pid;
}

static inline struct pid *task_session(struct task_struct *task)
{
	/* @group_leader 的设置，见前面 copy_process() */
	return task->group_leader->pids[PIDTYPE_SID].pid;
}

static inline void
init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
{
	task->pids[type].pid = pid;
}

attach_pid(p, PIDTYPE_PGID); /* 将进程 @p 添加到关联 struct pid 的 PGID 类型哈希链表 */
attach_pid(p, PIDTYPE_SID); /* 将进程 @p 添加到关联 struct pid 的 SID 类型哈希链表 */
attach_pid(p, PIDTYPE_PID); /* 将进程 @p 添加到关联 struct pid 的 PID 类型哈希链表 */

void attach_pid(struct task_struct *task, enum pid_type type)
{
	struct pid_link *link = &task->pids[type];
	hlist_add_head_rcu(&link->node, &link->pid->tasks[type]);
}

经过一番操作，在当前层级的 PID 命名空间，形成了如下的 PID 数据结构图(以 level 0 举例，这正是系统初始创建第二个进程后的情形)。视图一是从进程(struct task_struct)的角度出发进程观察，视图二是从 PID 管理数据 struct pid 的角度出发进行观察。结合视图一和二，就是完整的 PID 数据结构关系视图。

在这里插入图片描述

3.3 进程的 PID 建立过程一般化

系统中其它进程 PID 的建立过程，和 3.2 中讨论的系统中第二个进程的创建过程，并无本质的不同，读者可自行进行转化理解，本文将不再赘述。

4. 进程 PID 管理相关接口

本小节对 Linux 系统中的进程 PID 管理 API 接口择重进行一些扼要说明。

/* include/linux/pid.h */

/* 从 PID 命名空间 @ns 的 pid 缓存，创建一个 struct pid 对象，并插入到全局哈希表 @pid_hash */
extern struct pid *alloc_pid(struct pid_namespace *ns);

/* 将进程 @task 添加到 struct pid 类型 @pid_type 哈希表 头部 */
extern void attach_pid(struct task_struct *task, enum pid_type);

/* include/linux/sched.h */

/* 返回进程 @task 的 struct pid */
static inline struct pid *task_pid(struct task_struct *task)
{
	return task->pids[PIDTYPE_PID].pid;
}

...

/* 返回 进程 @task 所在 [进程组] 的 struct pid */
static inline struct pid *task_pgrp(struct task_struct *task)
{
	return task->group_leader->pids[PIDTYPE_PGID].pid;
}

/* 返回 进程 @task 所在 [session] 的 struct pid */
static inline struct pid *task_session(struct task_struct *task)
{
	return task->group_leader->pids[PIDTYPE_SID].pid;
}

/* 返回进程 @tsk 在 level 0 pid_namespace 中的 PID */
static inline pid_t task_pid_nr(struct task_struct *tsk)
{
	return tsk->pid;
}

/* 返回 进程 @tsk 在 指定 pid_namespace 中 @ns 的 PID */
static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
{
	return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
}

/* 返回 进程 @tsk 在当前 pid_namespace 中的 PID */
static inline pid_t task_pid_vnr(struct task_struct *tsk)
{
	return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
}

...

5. 进程 PID 的层级结构

在章节 2. 对 struct pid_namespace 的描述中，用一张图片描述了 PID 的层次结构。struct pid_namespace 的如下字段，共同构建了 PID 的层次结构：

/* include/linux/pid_namespace.h */

struct pid_namespace {
	...
	struct kmem_cache *pid_cachep; /* alloc_pid() 分配缓存，决定了 struct pid::numbers[] 数组的长度 */
	unsigned int level; /* 所属层次编号，从 0 开始编号 */
	struct pid_namespace *parent; /* 父级 pid_namespace (@level - 1) */
	...
};

创建新的 PID 命名空间，可通过系统调用 setns()、unshare()，或通过 clone() 带上 CLONE_NEWNS, CLONE_NEWUTS, CLONE_NEWIPC, CLONE_NEWPID, CLONE_NEWNET, CLONE_NEWCGROUP 标志位之一，间接的创建。本文对 setns()、clone() 创建 PID 命名空间的过程做简要分析。先看一下 setns() 的创建过程：

/* kernel/nsproxy.c */

SYSCALL_DEFINE2(setns, int, fd, int, nstype)
{
	struct task_struct *tsk = current;
	struct nsproxy *new_nsproxy;
	...

	...
	new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
	...
}

static struct nsproxy *create_new_namespaces(unsigned long flags,
	struct task_struct *tsk, struct user_namespace *user_ns,
	struct fs_struct *new_fs)
{
	struct nsproxy *new_nsp;
	...

	new_nsp = create_nsproxy();
	...

	/*
	 * 这里忽略其它类型 namespace 对象的创建过程，只关注 
	 * PID 命名空间对象 struct pid_namespace 的创建过程：
	 */
	new_nsp->pid_ns_for_children =
		copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);

	...

	return new_nsp;

	...
}

/* kernel/pid_namespace.c */

struct pid_namespace *copy_pid_ns(unsigned long flags,
	struct user_namespace *user_ns, struct pid_namespace *old_ns)
{
	...
	return create_pid_namespace(user_ns, old_ns);
}

static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
	struct pid_namespace *parent_pid_ns)
{
	struct pid_namespace *ns;
	unsigned int level = parent_pid_ns->level + 1;
	struct ucounts *ucounts;
	int i;
	int err;

	...
	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); 
	...

	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); /* 分配 PID 分配管理位图空间 */
	...

	/*
	 * 为当前层级 @level 创建 struct pid 的 kmem_cache 。
	 * 这里的 level + 1 决定了了 struct pid::numbers[] 数组的长度，
	 * 即每个 level 的 pid_namespace 有一个 PID，正如章节 2. 的图片
	 * 所描述的层次结构那样。
	 */
	ns->pid_cachep = create_pid_cachep(level + 1);

	...

	ns->level = level; /* 设置 pid_namespace 层级编号 */
	ns->parent = get_pid_ns(parent_pid_ns); /* 设置父级 pid_namespace (@level - 1) */
	...

	return ns; /* 返回新建的 PID 命名空间对象 */
}

static struct kmem_cache *create_pid_cachep(int nr_ids)
{
	struct pid_cache *pcache;
	struct kmem_cache *cachep;

	...
	pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
	...

	/* 
	 * 为 struct pid_namespace 创建 struct pid 分配缓存。
	 * 从这里看到，新建的 struct pid 分配缓存，struct pid::numbers[] 
	 * 数组的长度为 @nr_ids ，也即 @level + 1 。
	 */
	snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
	cachep = kmem_cache_create(pcache->name,
			sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
			0, SLAB_HWCACHE_ALIGN, NULL);
	...

	pcache->nr_ids = nr_ids;
	pcache->cachep = cachep;
	...

	...
	return pcache->cachep;
}

clone() 调用过程类似：

SYSCALL_DEFINE5(clone, ...)
{
	return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
}

_do_fork()
	copy_process()
		retval = copy_namespaces(clone_flags, p);
			/* 没有设置对相应标志位，则仅仅增加当前 nsproxy 的引用计数 */
			if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
				CLONE_NEWPID | CLONE_NEWNET |
				CLONE_NEWCGROUP)))) {
				get_nsproxy(old_ns);
				return 0;
			}
			
			...
			
			// 后续过程同 setns()
			new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
			
			...

6. PID 用户空间接口

Linux 提供一组 API 供用户空间查询、设置进程 PID。

#include <unistd.h>

pid_t getpid(void); /* 获取 当前进程 所在线程组 的 group leader 的 PID */
pid_t getppid(void); /* 获取 当前进程 父进程 的 PID */

#define _GNU_SOURCE
#include <unistd.h>

pid_t gettid(void); /* 获取 进程 的 PID */

#include <unistd.h>

pid_t getsid(pid_t pid); /* 获取进程 @pid 所在 session 的 PID，即 SID */
pid_t setsid(void); /* 参考 man 手册 */

#include <unistd.h>

int setpgid(pid_t pid, pid_t pgid); /* 设置 进程 @pid 进程组 PID 为 @pgid */
pid_t getpgid(pid_t pid); /* 获取 进程 @pid 进程组 PID */

pid_t getpgrp(void); /* getpgid(0): 获取 当前进程组 PID */
pid_t setpgrp(void); /* setpgid(0, 0): 设置 进程的进程组 为 当前进程组 */