linux内核namespace,Linux内核的namespace机制分析

3.2  copy_process函数

在copy_process函数中调用copy_namespaces函数。

static struct task_struct *copy_process(unsigned long clone_flags,

unsigned long stack_start,

unsigned long stack_size,

int __user *child_tidptr,

struct pid *pid,

int trace)

{

int retval;

struct task_struct *p;

/*下面的代码是对clone_flag标志进行检查,有部分表示是互斥的,例如CLONE_NEWNS和CLONENEW_FS*/

if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))

return ERR_PTR(-EINVAL);

if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))

return ERR_PTR(-EINVAL);

if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))

return ERR_PTR(-EINVAL);

if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))

return ERR_PTR(-EINVAL);

if ((clone_flags & CLONE_PARENT) &&

current->signal->flags & SIGNAL_UNKILLABLE)

return ERR_PTR(-EINVAL);

……

retval = copy_namespaces(clone_flags, p);

if (retval)

goto bad_fork_cleanup_mm;

retval = copy_io(clone_flags, p);

if (retval)

goto bad_fork_cleanup_namespaces;

retval = copy_thread(clone_flags, stack_start, stack_size, p);

if (retval)

goto bad_fork_cleanup_io;

/*do_fork中调用copy_process函数,该函数中pid参数为NULL,所以这里的if判断是成立的。为进程所在的namespace分配pid,在3.0的内核之前还有一个关键函数,就是namespace创建后和cgroup的关系,

if (current->nsproxy != p->nsproxy) {

retval = ns_cgroup_clone(p, pid);

if (retval)

goto bad_fork_free_pid;

但在3.0内核以后给删掉了,具体请参考remove the ns_cgroup*/

if (pid != &init_struct_pid) {

retval = -ENOMEM;

pid = alloc_pid(p->nsproxy->pid_ns_for_children);

if (!pid)

goto bad_fork_cleanup_io;

}…..

}

c5cad66dc216b371607cd56423c02444.png

3.3  copy_namespaces 函数

在kernel/nsproxy.c文件中定义了copy_namespaces函数。

int copy_namespaces(unsigned long flags, struct task_struct *tsk)

{

struct nsproxy *old_ns = tsk->nsproxy;

struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);

struct nsproxy *new_ns;

/*首先检查flag,如果flag标志不是下面的五种之一,就会调用get_nsproxy对old_ns递减引用计数,然后直接返回0*/

if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |

CLONE_NEWPID | CLONE_NEWNET)))) {

get_nsproxy(old_ns);

return 0;

}

/*当前进程是否有超级用户的权限*/

if (!ns_capable(user_ns, CAP_SYS_ADMIN))

return -EPERM;

/*

* CLONE_NEWIPC must detach from the undolist: after switching

* to a new ipc namespace, the semaphore arrays from the old

* namespace are unreachable.  In clone parlance, CLONE_SYSVSEM

* means share undolist with parent, so we must forbid using

* it along with CLONE_NEWIPC.

对CLONE_NEWIPC进行特殊的判断,*/

if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==

(CLONE_NEWIPC | CLONE_SYSVSEM))

return -EINVAL;

/*为进程创建新的namespace*/

new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);

if (IS_ERR(new_ns))

return  PTR_ERR(new_ns);

tsk->nsproxy = new_ns;

return 0;

}

3.4  create_new_namespaces函数

create_new_namespaces创建新的namespace

static struct nsproxy *create_new_namespaces(unsigned long flags,

struct task_struct *tsk, struct user_namespace *user_ns,

struct fs_struct *new_fs)

{

struct nsproxy *new_nsp;

int err;

/*为新的nsproxy分配内存空间,并对其引用计数设置为初始1*/

new_nsp = create_nsproxy();

if (!new_nsp)

return ERR_PTR(-ENOMEM);

/*如果Namespace中的各个标志位进行了设置,则会调用相应的namespace进行创建*/

new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);

if (IS_ERR(new_nsp->mnt_ns)) {

err = PTR_ERR(new_nsp->mnt_ns);

goto out_ns;

}

new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);

if (IS_ERR(new_nsp->uts_ns)) {

err = PTR_ERR(new_nsp->uts_ns);

goto out_uts;

}

new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);

if (IS_ERR(new_nsp->ipc_ns)) {

err = PTR_ERR(new_nsp->ipc_ns);

goto out_ipc;

}

new_nsp->pid_ns_for_children =

copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);

if (IS_ERR(new_nsp->pid_ns_for_children)) {

err = PTR_ERR(new_nsp->pid_ns_for_children);

goto out_pid;

}

new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);

if (IS_ERR(new_nsp->net_ns)) {

err = PTR_ERR(new_nsp->net_ns);

goto out_net;

}

return new_nsp;

out_net:

if (new_nsp->pid_ns_for_children)

put_pid_ns(new_nsp->pid_ns_for_children);

out_pid:

if (new_nsp->ipc_ns)

put_ipc_ns(new_nsp->ipc_ns);

out_ipc:

if (new_nsp->uts_ns)

put_uts_ns(new_nsp->uts_ns);

out_uts:

if (new_nsp->mnt_ns)

put_mnt_ns(new_nsp->mnt_ns);

out_ns:

kmem_cache_free(nsproxy_cachep, new_nsp);

return ERR_PTR(err);

}

3.4.1 create_nsproxy函数

static inline struct nsproxy *create_nsproxy(void)

{

struct nsproxy *nsproxy;

nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);

if (nsproxy)

atomic_set(&nsproxy->count, 1);

return nsproxy;

}

例子1:namespace pid的例子

#include

#include

#include

#include

#include

#include

#include

static int fork_child(void *arg)

{

int a = (int)arg;

int i;

pid_t pid;

char *cmd  = "ps -el;

printf("In the container, my pid is: %d\n", getpid());

/*ps命令是解析procfs的内容得到结果的,而procfs根目录的进程pid目录是基于mount当时的pid namespace的,这个在procfs的get_sb回调中体现的。因此只需要重新mount一下proc, mount -t proc proc /proc*/

mount("proc", "/proc", "proc", 0, "");

for (i = 0; i

pid = fork();

if (pid <0)

return pid;

else if (pid)

printf("pid of my child is %d\n", pid);

else if (pid == 0) {

sleep(30);

exit(0);

}

}

execl("/bin/bash", "/bin/bash","-c",cmd, NULL);

return 0;

}

int main(int argc, char *argv[])

{

int cpid;

void *childstack, *stack;

int flags;

int ret = 0;

int stacksize = getpagesize() * 4;

if (argc != 2) {

fprintf(stderr, "Wrong usage.\n");

return -1;

}

stack = malloc(stacksize);

if(stack == NULL)

{

return -1;

}

printf("Out of the container, my pid is: %d\n", getpid());

childstack = stack + stacksize;

flags = CLONE_NEWPID | CLONE_NEWNS;

cpid = clone(fork_child, childstack, flags, (void *)atoi(argv[1]));

printf("cpid: %d\n", cpid);

if (cpid <0) {

perror("clone");

ret = -1;

goto out;

}

fprintf(stderr, "Parent sleeping 20 seconds\n");

sleep(20);

ret = 0;

out:

free(stack);

return ret;

}

}运行结果:

root@Ubuntu:~/c_program# ./namespace 7

Out of the container, my pid is: 8684

cpid: 8685

Parent sleeping 20 seconds

In the container, my pid is: 1

pid of my child is 2

pid of my child is 3

pid of my child is 4

pid of my child is 5

pid of my child is 6

pid of my child is 7

pid of my child is 8

F S  UID  PID  PPID  C PRI  NI ADDR SZ WCHAN  TTY          TIME CMD

4 R    0    1    0  0  80  0 -  1085 -      pts/0    00:00:00 ps

1 S    0    2    1  0  80  0 -  458 hrtime pts/0    00:00:00 namespace

1 S    0    3    1  0  80  0 -  458 hrtime pts/0    00:00:00 namespace

1 S    0    4    1  0  80  0 -  458 hrtime pts/0    00:00:00 namespace

1 S    0    5    1  0  80  0 -  458 hrtime pts/0    00:00:00 namespace

1 S    0    6    1  0  80  0 -  458 hrtime pts/0    00:00:00 namespace

1 S    0    7    1  0  80  0 -  458 hrtime pts/0    00:00:00 namespace

1 S    0    8    1  0  80  0 -  458 hrtime pts/0    00:00:00 namespace

例子2:UTS的例子

#define _GNU_SOURCE

#include

#include

#include

#include

#include

#include

#include

#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \

} while (0)

static int              /* Start function for cloned child */

childFunc(void *arg)

{

struct utsname uts;

/* Change hostname in UTS namespace of child */

if (sethostname(arg, strlen(arg)) == -1)

errExit("sethostname");

/* Retrieve and display hostname */

if (uname(&uts) == -1)

errExit("uname");

printf("uts.nodename in child:  %s\n", uts.nodename);

/* Keep the namespace open for a while, by sleeping.

*              This allows some experimentation--for example, another

*                            process might join the namespace. */

sleep(200);

return 0;          /* Child terminates now */

}

#define STACK_SIZE (1024 * 1024)    /* Stack size for cloned child */

int

main(int argc, char *argv[])

{

char *stack;                    /* Start of stack buffer */

char *stackTop;                /* End of stack buffer */

pid_t pid;

struct utsname uts;

if (argc < 2) {

fprintf(stderr, "Usage: %s \n", argv[0]);

exit(EXIT_SUCCESS);

}

/* Allocate stack for child */

stack = malloc(STACK_SIZE);

if (stack == NULL)

errExit("malloc");

stackTop = stack + STACK_SIZE;  /* Assume stack grows downward */

/* Create child that has its own UTS namespace;

*              child commences execution in childFunc() */

pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);

if (pid == -1)

errExit("clone");

printf("clone() returned %ld\n", (long) pid);

/* Parent falls through to here */

sleep(1);          /* Give child time to change its hostname */

/* Display hostname in parent's UTS namespace. This will be

*              different from hostname in child's UTS namespace. */

if (uname(&uts) == -1)

errExit("uname");

printf("uts.nodename in parent: %s\n", uts.nodename);

if (waitpid(pid, NULL, 0) == -1)    /* Wait for child */

errExit("waitpid");

printf("child has terminated\n");

exit(EXIT_SUCCESS);

}

root@ubuntu:~/c_program# ./namespace_1 test

clone() returned 4101

uts.nodename in child:  test

uts.nodename in parent: ubuntu

0b1331709591d260c1c78e86d0c51c18.png

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值