kernel :5.0
当在cgroup某个子系统下创建一个目录,此时该目录下会自动生成许多文件,这个过程有cgroup_mkdir完成。可通过ftrace(events/cgroup/cgroup_mkdir)跟踪
cgroup_mkdir
int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
{
struct cgroup *parent, *cgrp;
struct kernfs_node *kn;
int ret;
/* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
if (strchr(name, '\n'))
return -EINVAL;
parent = cgroup_kn_lock_live(parent_kn, false); //kernfs_node->priv指向一个cgroup
if (!parent)
return -ENODEV;
if (!cgroup_check_hierarchy_limits(parent)) { //对父cgroup的后代数、深度做检查,确保创建新的cgroup后不会超过最大值
ret = -EAGAIN;
goto out_unlock;
}
cgrp = cgroup_create(parent);
if (IS_ERR(cgrp)) {
ret = PTR_ERR(cgrp);
goto out_unlock;
}
/* create the directory */
kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
if (IS_ERR(kn)) {
ret = PTR_ERR(kn);
goto out_destroy;
}
cgrp->kn = kn;
/*
* This extra ref will be put in cgroup_free_fn() and guarantees
* that @cgrp->kn is always accessible.
*/
kernfs_get(kn);
ret = cgroup_kn_set_ugid(kn);
if (ret)
goto out_destroy;
ret = css_populate_dir(&cgrp->self);//创建基本属性节点
if (ret)
goto out_destroy;
ret = cgroup_apply_control_enable(cgrp);//根据cgroup_create时赋予的ss_mask填充cgroup的subsys数组,并创建子系统属性节点
if (ret)
goto out_destroy;
TRACE_CGROUP_PATH(mkdir, cgrp);
/* let's create and online css's */
kernfs_activate(kn);
ret = 0;
goto out_unlock;
out_destroy:
cgroup_destroy_locked(cgrp);
out_unlock:
cgroup_kn_unlock(parent_kn);
return ret;
}
cgroup_create(parent)
当cgroup_create完成后,新创建的cgroup的subsys数组并未填充。
static struct cgroup *cgroup_create(struct cgroup *parent)
{
struct cgroup_root *root = parent->root;
struct cgroup *cgrp, *tcgrp;
int level = parent->level + 1;
int ret;
/* allocate the cgroup and its ID, 0 is reserved for the root */
cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
GFP_KERNEL);
if (!cgrp)
return ERR_PTR(-ENOMEM);
ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
if (ret)
goto out_free_cgrp;
if (cgroup_on_dfl(parent)) {
ret = cgroup_rstat_init(cgrp);
if (ret)
goto out_cancel_ref;
}
/*
* Temporarily set the pointer to NULL, so idr_find() won't return
* a half-baked cgroup.
*/
cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
if (cgrp->id < 0) {
ret = -ENOMEM;
goto out_stat_exit;
}
init_cgroup_housekeeping(cgrp);
cgrp->self.parent = &parent->self;
cgrp->root = root;
cgrp->level = level;
ret = psi_cgroup_alloc(cgrp);
if (ret)
goto out_idr_free;
ret = cgroup_bpf_inherit(cgrp);
if (ret)
goto out_psi_free;
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { //使所有祖先的后代数+1
cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
if (tcgrp != cgrp)
tcgrp->nr_descendants++;
}
if (notify_on_release(parent))
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
cgrp->self.serial_nr = css_serial_nr_next++;
/* allocation complete, commit to creation */
list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); //将子css以sibling为连接件接入父css的children链表
atomic_inc(&root->nr_cgrps);
cgroup_get_live(parent);
/*
* @cgrp is now fully operational. If something fails after this
* point, it'll be released via the normal destruction path.
*/
cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
/*
* On the default hierarchy, a child doesn't automatically inherit
* subtree_control from the parent. Each is configured manually.
*/
if (!cgroup_on_dfl(cgrp))
cgrp->subtree_control = cgroup_control(cgrp); //返回父cgroup的subtree_control
cgroup_propagate_control(cgrp);//设置cgrp的subsys_mask
return cgrp;
out_psi_free:
psi_cgroup_free(cgrp);
out_idr_free:
cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
out_stat_exit:
if (cgroup_on_dfl(parent))
cgroup_rstat_exit(cgrp);
out_cancel_ref:
percpu_ref_exit(&cgrp->self.refcnt);
out_free_cgrp:
kfree(cgrp);
return ERR_PTR(ret);
}
总结
通过这个流程可以看出来,cgroup树的层级关系由cgroup_subsys_state维护,每个cgroup包含一个css用于维护这个树,另外还有数个css,其由对应子系统的css_alloc函数分配,其ss指向对应的子系统。
子系统的css_alloc函数会分配一个各子系统对应的cgroup结构(如mem_cgroup、blkcg等),其第一个成员就是一个css结构。