runc rootless模式代码解析---创建dev设备节点文件

runc rootless模式代码解析

本来以为user namespace映射了root权限后可以直接mount /proc /dev, 实现容器隔离。但是实际测试发现,/dev 并不能在userns中的root权限mount udev 到自身的/dev。但实际上, docker上是实现了类似的功能的,所以这里研究了相应的功能。docker 启动采用了containerd 和runc交互的形式,实现容器的创建。而runc可以直接启动利用rootless模式启动普通用户映射root权限的能力,并生成/dev设备文件。这里研究了 /dev目录文件的生成过程。
“libcontainer/rootfs_linux.go”


func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
	var (
		dest = m.Destination
	)
	if !strings.HasPrefix(dest, rootfs) {
		dest = filepath.Join(rootfs, dest)
	}
	switch m.Device {
	...
		case "tmpfs":
		...
		if err := mountPropagate(m, rootfs, mountLabel);
		if copyUp {
			if err := fileutils.CopyDirectory(dest, tmpDir); 
			if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, "")
		}

函数mount_root();挂载ROOT_DEV(真正的根文件系统所在块设备)对应的设备到rootfs的/root目录下。根文件系统挂载后进程的当前目录是/root。sys_mount(".", “/”, NULL, MS_MOVE, NULL);将当前目录的实际根文件系统移到根目录"/“下。sys_chroot((const char __user __force *)”.");将当前目录设为进程的根目录。

func mountPropagate(m *configs.Mount, rootfs string, mountLabel string)
{
//   /dev目录清除
	...
	if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); 

	for _, pflag := range m.PropagationFlags {
		unix.Mount("", dest, "", uintptr(pflag), "")
	}
	...
}
// CopyDirectory copies the files under the source directory
// to dest directory. The dest directory is created if it
// does not exist.
func CopyDirectory(source string, dest string) error {
	fi, err := os.Stat(source)
	// Get owner.
	st, ok := fi.Sys().(*syscall.Stat_t)
	// We have to pick an owner here anyway.
	MkdirAllNewAs(dest, fi.Mode(), int(st.Uid), int(st.Gid));
	return filepath.Walk(source, func(path string, info os.FileInfo, err error) error {
		// Get the relative path
		relPath, err := filepath.Rel(source, path)
		if err != nil {

		if info.IsDir() {
			// Skip the source directory.
			if path != source {
				// Get the owner.
				st, ok := info.Sys().(*syscall.Stat_t)
				uid := int(st.Uid)
				gid := int(st.Gid)
				os.Mkdir(filepath.Join(dest, relPath), info.Mode());
				os.Lchown(filepath.Join(dest, relPath), uid, gid);
			}
			return nil
		}
		// Copy the file.
		CopyFile(path, filepath.Join(dest, relPath));
		return nil
	})
}

mknod 创建设备节点

这里创建节点后复制了host的设备文件。

// CopyFile copies the file at source to dest
func CopyFile(source string, dest string) error {
	si, err := os.Lstat(source)
	st, ok := si.Sys().(*syscall.Stat_t)

	uid := int(st.Uid)
	gid := int(st.Gid)

	// Handle symlinks
	if si.Mode()&os.ModeSymlink != 0 {
		target, err := os.Readlink(source)
		os.Symlink(target, dest);
	}

	// Handle device files
	if st.Mode&syscall.S_IFMT == syscall.S_IFBLK || st.Mode&syscall.S_IFMT == syscall.S_IFCHR {
		devMajor := int64(major(uint64(st.Rdev)))
		devMinor := int64(minor(uint64(st.Rdev)))
		mode := uint32(si.Mode() & 07777)
		if st.Mode&syscall.S_IFMT == syscall.S_IFBLK {
			mode |= syscall.S_IFBLK
		}
		if st.Mode&syscall.S_IFMT == syscall.S_IFCHR {
			mode |= syscall.S_IFCHR
		}
		syscall.Mknod(dest, mode, int(mkdev(devMajor, devMinor)));
	}

	// Handle regular files 复制文件
	if si.Mode().IsRegular() {
		sf, err := os.Open(source)
		defer sf.Close()
		df, err := os.Create(dest)
		defer df.Close()
		_, err = io.Copy(df, sf)
	}

	// Chown the file
	if err := os.Lchown(dest, uid, gid); 

	// Chmod the file
	if !(si.Mode()&os.ModeSymlink == os.ModeSymlink) {
		os.Chmod(dest, si.Mode());
	}

	return nil
}

// CopyDirectory copi

建立dev目录的开始

mountToRootfs在往上即是prepareRootfs,prepareRootfs调用mountToRootfs创建设备节点,并复制设备文件。


// prepareRootfs sets up the devices, mount points, and filesystems for use
// inside a new mount namespace. It doesn't set anything as ro. You must call
// finalizeRootfs after this function to finish setting up the rootfs.
func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
	config := iConfig.Config
	if err := prepareRoot(config); err != nil {
		return newSystemErrorWithCause(err, "preparing rootfs")
	}

	hasCgroupns := config.Namespaces.Contains(configs.NEWCGROUP)
	setupDev := needsSetupDev(config)
	for _, m := range config.Mounts {
		for _, precmd := range m.PremountCmds {
			if err := mountCmd(precmd); err != nil {
				return newSystemErrorWithCause(err, "running premount command")
			}
		}
		mountToRootfs(m, config.Rootfs, config.MountLabel, hasCgroupns)
		for _, postcmd := range m.PostmountCmds {
			mountCmd(postcmd);
		}
	}

	if setupDev {
		createDevices(config)
		setupPtmx(config)
		setupDevSymlinks(config.Rootfs)
	}

	return nil
}

这里还创建了设备节点。


// Create the device nodes in the container.
func createDevices(config *configs.Config) error {
	useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
	oldMask := unix.Umask(0000)
	for _, node := range config.Devices {
		// containers running in a user namespace are not allowed to mknod
		// devices so we can just bind mount it from the host.
		if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
			unix.Umask(oldMask)
			return err
		}
	}
	unix.Umask(oldMask)
	return nil
}


// Creates the device node in the rootfs of the container.
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
	dest := filepath.Join(rootfs, node.Path)
	os.MkdirAll(filepath.Dir(dest), 0755);

	if bind {
		return bindMountDeviceNode(dest, node)
	}
	if err := mknodDevice(dest, node); err != nil {
		if os.IsExist(err) {
			return nil
		} else if os.IsPermission(err) {
			return bindMountDeviceNode(dest, node)
		}
		return err
	}
	return nil
}

创建设备节点并绑定设备节点。


func bindMountDeviceNode(dest string, node *configs.Device) error {
	f, err := os.Create(dest)
	if err != nil && !os.IsExist(err) {
		return err
	}
	if f != nil {
		f.Close()
	}
	return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值