Namespace of Process/PID in Linux

命名空间是一种资源划分方案。资源有很多类别,比如:process IDs, hostnames, user IDs, file names, and some names associated with network access, and interprocess communication.

Process ID (pid)

一个PID命名空间具有独立的process IDs空间。PID命名空间是有层级的,上一级命名空间可以看到其所有下级的PIDs。从而最初始的命名空间可看到所有的进程。PID命名空间中第一个进程ID为1,它和init进程一样特殊,最典型的就是负责接管所有的孤儿进程(orphaned processes)。终止进程PID 1将直接终止其所在的已经所有下级PID命名空间里的进程。

现在让我们走进Linux的实现:

1. 进程与命名空间

struct task_struct {
...
/* Namespaces: */
struct nsproxy			*nsproxy;
...
};

每个进程绑定的命名空间[2]

/*
 * A structure to contain pointers to all per-process
 * namespaces - fs (mount), uts, network, sysvipc, etc.
 *
 * The pid namespace is an exception -- it's accessed using
 * task_active_pid_ns.  The pid namespace here is the
 * namespace that children will use.
 *
 * 'count' is the number of tasks holding a reference.
 * The count for each namespace, then, will be the number
 * of nsproxies pointing to it, not the number of tasks.
 *
 * The nsproxy is shared by tasks which share all namespaces.
 * As soon as a single namespace is cloned or unshared, the
 * nsproxy is copied.
 */
struct nsproxy {
	atomic_t count;
	struct uts_namespace *uts_ns;                       //UNIX Timesharing System
	struct ipc_namespace *ipc_ns;
	struct mnt_namespace *mnt_ns;
	struct pid_namespace *pid_ns_for_children;
	struct net 	     *net_ns;                       //网路相关的命名空间参数
	struct time_namespace *time_ns;                
	struct time_namespace *time_ns_for_children;
	struct cgroup_namespace *cgroup_ns;
};
extern struct nsproxy init_nsproxy;

上述结构只提供子进程的PID命名空间,进程自身的PID命名空间通过下面函数获得[3]

struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
	return ns_of_pid(task_pid(tsk));
}
EXPORT_SYMBOL_GPL(task_active_pid_ns);


// https://github.com/torvalds/linux/blob/master/include/linux/sched.h#L1309
static inline struct pid *task_pid(struct task_struct *task)
{
	return task->thread_pid;
}


// https://github.com/torvalds/linux/blob/master/include/linux/pid.h#L144
/*
 * ns_of_pid() returns the pid namespace in which the specified pid was
 * allocated.
 *
 * NOTE:
 * 	ns_of_pid() is expected to be called for a process (task) that has
 * 	an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid
 * 	is expected to be non-NULL. If @pid is NULL, caller should handle
 * 	the resulting NULL pid-ns.
 */
static inline struct pid_namespace *ns_of_pid(struct pid *pid)
{
	struct pid_namespace *ns = NULL;
	if (pid)
		ns = pid->numbers[pid->level].ns;
	return ns;
}


// 一个PID命名空间init进程的判断
/*
 * is_child_reaper returns true if the pid is the init process
 * of the current namespace. As this one could be checked before
 * pid_ns->child_reaper is assigned in copy_process, we check
 * with the pid number.
 */
static inline bool is_child_reaper(struct pid *pid)
{
	return pid->numbers[pid->level].nr == 1;
}

2. PID命名空间数据结构 [4]

enum { /* definitions for pid_namespace's hide_pid field */
	HIDEPID_OFF	  = 0,
	HIDEPID_NO_ACCESS = 1,
	HIDEPID_INVISIBLE = 2,
};

struct pid_namespace {
	struct kref kref;
	struct idr idr;
	struct rcu_head rcu;
	unsigned int pid_allocated;
	struct task_struct *child_reaper;
	struct kmem_cache *pid_cachep;
	unsigned int level;
	struct pid_namespace *parent;        //父级命名空间
#ifdef CONFIG_PROC_FS
	struct dentry *proc_self;
	struct dentry *proc_thread_self;
#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
	struct fs_pin *bacct;
#endif
	struct user_namespace *user_ns;
	struct ucounts *ucounts;
	kgid_t pid_gid;
	int hide_pid;
	int reboot;	/* group exit code if this pidns was rebooted */
	struct ns_common ns;
} __randomize_layout;

extern struct pid_namespace init_pid_ns;

2. 初始化全局默认PID命名空间[5]

/*
 * PID-map pages start out as NULL, they get allocated upon
 * first use and are never deallocated. This way a low pid_max
 * value does not cause lots of bitmaps to be allocated, but
 * the scheme scales to up to 4 million PIDs, runtime.
 */
struct pid_namespace init_pid_ns = {
	.kref = KREF_INIT(2),
	.idr = IDR_INIT(init_pid_ns.idr),
	.pid_allocated = PIDNS_ADDING,
	.level = 0,
	.child_reaper = &init_task,
	.user_ns = &init_user_ns,
	.ns.inum = PROC_PID_INIT_INO,
#ifdef CONFIG_PID_NS
	.ns.ops = &pidns_operations,
#endif
};
EXPORT_SYMBOL_GPL(init_pid_ns);

3. PID与Namespace的关联[6]

struct pid init_struct_pid = {
	.count		= REFCOUNT_INIT(1),
	.tasks		= {
		{ .first = NULL },
		{ .first = NULL },
		{ .first = NULL },
	},
	.level		= 0,
	.numbers	= { {
		.nr		= 0,                   // PID
		.ns		= &init_pid_ns,        //指定 Namespace
	}, }
};

3.1 Related Structs[7]

enum pid_type
{
	PIDTYPE_PID,
	PIDTYPE_TGID,
	PIDTYPE_PGID,
	PIDTYPE_SID,
	PIDTYPE_MAX,
};

/*
 * struct upid is used to get the id of the struct pid, as it is
 * seen in particular namespace. Later the struct pid is found with
 * find_pid_ns() using the int nr and struct pid_namespace *ns.
 */

struct upid {
	int nr;
	struct pid_namespace *ns;
};

struct pid
{
	refcount_t count;
	unsigned int level;
	spinlock_t lock;
	/* lists of tasks that use this pid */
	struct hlist_head tasks[PIDTYPE_MAX];
	struct hlist_head inodes;
	/* wait queue for pidfd notifications */
	wait_queue_head_t wait_pidfd;
	struct rcu_head rcu;
	struct upid numbers[1];
};

4. PID命名空间的创建

二种方式:

(1)fork or clone: 使用特定选项。

(2)unshare系统调用将进程的某些部分从父进程分离,其中也包括命名空间。

预定义的Fork/Clone的命名空间FLAG:

// https://github.com/torvalds/linux/blob/master/include/uapi/linux/sched.h#L8
/*
 * cloning flags:
 */
...
#define CLONE_NEWNS	0x00020000	/* New mount namespace group */
...
#define CLONE_NEWCGROUP		0x02000000	/* New cgroup namespace */
#define CLONE_NEWUTS		0x04000000	/* New utsname namespace */
#define CLONE_NEWIPC		0x08000000	/* New ipc namespace */
#define CLONE_NEWUSER		0x10000000	/* New user namespace */
#define CLONE_NEWPID		0x20000000	/* New pid namespace */
#define CLONE_NEWNET		0x40000000	/* New network namespace */

[1] https://github.com/torvalds/linux/blob/master/include/linux/sched.h#L922

[2] https://github.com/torvalds/linux/blob/master/include/linux/nsproxy.h#L16

[3] https://github.com/torvalds/linux/blob/master/kernel/pid.c#L487 

[4] https://github.com/torvalds/linux/blob/master/include/linux/pid_namespace.h

[5] https://github.com/torvalds/linux/blob/master/kernel/pid.c#L73

[6] https://github.com/torvalds/linux/blob/master/kernel/pid.c#L56

[7] https://github.com/torvalds/linux/blob/master/include/linux/pid.h#L48

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值