Android将需要mount的子系统和对应路径等信息,放在配置文件/system/core/libprocessgroup/profiles/cgroups.json中。init进程的SecondStage阶段调用函数SetupCgroupsAction()建立各子系统,主要是在ReadDescriptors()中解析配置文件,在SetupCgroup()中调用系统调用mount对应子系统。
int main(int argc, char** argv) {
.......................................
if (!strcmp(argv[1], "second_stage") {
return SecondStageMain(argc, argv);
}
.......................................
}
int SecondStageMain(int argc, char** argv) {
.......................................
am.QueueBuiltinAction(SetupCgroupsAction, "SetupCgroups");
.......................................
}
static Result<Success> SecondStageMain(const BuiltinArguments&) {
make_dir(android::base::Dirname(CGROUPS_RC_PATH), 0711);
if (!CgroupSetup()) {
return ErrorError()<<"Failed to setup cgroups";
}
return Success();
}
bool CgroupSetup() {
using namespace android::cgrouprc;
std::map<std::string, CgroupDescriptor> descriptors;
........................................................
if (!ReadDescriptors(&descriptors)) { //解析配置文件
return false;
}
for (auto [name, descriptor] : descriptors) {
if (SetupCgroup(descriptor)) { //mount各子系统
................................................
}
....................................................
}
........................................................
}
mount系统调用最终会走到cgroup_mount(),cgroup_mount()有两个重要任务:一是创建cgroup_root并初始化;二是创建子系统根目录。本文重点在Cgroup层次关系,主要分析cgroup_root初始化过程。cgroup_mount()首先为cgroup_root分配内存,然后调用init_cgroup_root()初始化cgroup_root,init_cgroup_root()在《Android Cgroup分析之Cgroup初始化》中已经分析过。
static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data)
{
bool is_v2 = fs_type == &cgroup2_fs_type;
struct super_block *pinned_sb = NULL;
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
struct cgroup_subsys *ss;
struct cgroup_root *root;
struct cgroup_sb_opts opts;
struct dentry *dentry;
int ret;
int i;
bool new_sb;
...............................................................
ret = parse_cgroupfs_options(data, &opts);
...............................................................
root = kzalloc(sizeof(*root), GFP_KERNEL);
...............................................................
init_cgroup_root(root, &opts);
ret = cgroup_setup_root(root, opts.subsys_mask);
...............................................................
out_mount:
dentry = kernfs_mount(fs_type, flags, root->kf_root,
is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC,
&new_sb);
...............................................................
return dentry;
}
mount Cgroup子系统后,在对应文件夹能看到cgroup.procs、tasks等操作Cgroup的节点,这些节点的创建是在cgroup_setup_root()中完成的。首先调用kernfs_create_root()创建kernfs_root,kernfs_root在kernfs文件系统中代表sys文件系统的根,kernfs_node代表一个目录或文件(kernfs_node->mode有标志位S_IFDIR为目录否则为文件)。kernfs_root的成员syscall_ops指向cgroup_kf_syscall_ops,kernfs_root->kn->priv指向cgroup_root包含的struct cgroup。
static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
{
LIST_HEAD(tmp_links);
struct cgroup *root_cgrp = &root->cgrp;
struct css_set *cset;
int i, ret;
...........................................................
//创建kf_root
root->kf_root = kernfs_create_root(&cgroup_kf_syscall_ops,
KERNFS_ROOT_CREATE_DEACTIVATED,
root_cgrp);
root_cgrp->kn = root->kf_root->kn;
ret = css_populate_dir(&root_cgrp->self);
if (ret)
goto destroy_root;
ret = rebind_subsystems(root, ss_mask);
......................................................................
list_add(&root->root_list, &cgroup_roots);
cgroup_root_count++;
...............................................................
}
struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
unsigned int flags, void *priv)
{
struct kernfs_root *root;
struct kernfs_node *kn;
root = kzalloc(sizeof(*root), GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
ida_init(&root->ino_ida);
INIT_LIST_HEAD(&root->supers);
kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
KERNFS_DIR);
....................................................................
kn->priv = priv;
kn->dir.root = root;
root->syscall_ops = scops;
root->flags = flags;
root->kn = kn;
init_waitqueue_head(&root->deactivate_waitq);
....................................................................
return root;
}
cgroup_setup_root()创建kernfs_root后,第一次调用css_populate_dir()创建默认节点。此时cgroup_root->cgrp->self->ss为空,故进入if (!css->ss) 分支,创建一些默认节点。Cgroup的参数以及操作接口,抽象为结构体struct cftype,cgroup_addrm_files()以cftype为参数创建kernfs文件节点,以供应用程序操作。
static int css_populate_dir(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
struct cftype *cfts, *failed_cfts;
int ret;
.................................................................
if (!css->ss) {
if (cgroup_on_dfl(cgrp))
cfts = cgroup_dfl_base_files;
else
cfts = cgroup_legacy_base_files;
return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
}
...............................................................
}
static int cgroup_addrm_files(struct cgroup_subsys_state *css,
struct cgroup *cgrp, struct cftype cfts[],
bool is_add)
{
struct cftype *cft, *cft_end = NULL;
int ret = 0;
lockdep_assert_held(&cgroup_mutex);
restart:
for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
...............................................................
if (is_add) {
ret = cgroup_add_file(css, cgrp, cft);
...........................................................
} else {
cgroup_rm_file(cgrp, cft);
}
}
return ret;
}
下一步是调用rebind_subsystems()建立与子系统的关系,将子系统的struct cgroup_root *root指向新创建的cgroup_root,struct cgroup_subsys_state的*cgroup成员指向新创建cgroup_root包含的cgroup,从而建立其新创建cgroup与子系统的联系。
static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
{
struct cgroup *dcgrp = &dst_root->cgrp;
struct cgroup_subsys *ss;
int ssid, i, ret;
...................................................................
do_each_subsys_mask(ss, ssid, ss_mask) {
struct cgroup_root *src_root = ss->root;
struct cgroup *scgrp = &src_root->cgrp;
struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
struct css_set *cset;
WARN_ON(!css || cgroup_css(dcgrp, ss));
/* disable from the source */
src_root->subsys_mask &= ~(1 << ssid);
WARN_ON(cgroup_apply_control(scgrp));
cgroup_finalize_control(scgrp, 0);
/* rebind */
RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
rcu_assign_pointer(dcgrp->subsys[ssid], css);
ss->root = dst_root;
css->cgroup = dcgrp;
...............................................................
ret = cgroup_apply_control(dcgrp);
...............................................................
} while_each_subsys_mask();
kernfs_activate(dcgrp->kn);
return 0;
}
cgroup_apply_control_enable()再次调用css_populate_dir()创建子系统的操作节点。这次调用时,参数css为css_create()所创建,不为空,故会根据子系统成员cfts链表中的cftype来创建节点。
static int cgroup_apply_control_enable(struct cgroup *cgrp)
{
struct cgroup *dsct;
struct cgroup_subsys_state *d_css;
struct cgroup_subsys *ss;
int ssid, ret;
cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
continue;
if (!css) {
css = css_create(dsct, ss);
if (IS_ERR(css))
return PTR_ERR(css);
}
if (css_visible(css)) {
ret = css_populate_dir(css);
if (ret)
return ret;
}
}
}
return 0;
}
static int css_populate_dir(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
struct cftype *cfts, *failed_cfts;
int ret;
if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
return 0;
.........................................................
list_for_each_entry(cfts, &css->ss->cfts, node) {
ret = cgroup_addrm_files(css, cgrp, cfts, true);
if (ret < 0) {
failed_cfts = cfts;
goto err;
}
}
css->flags |= CSS_VISIBLE;
..........................................................
}