前面分析了cgroup机制的框架及数据结构之间的关系,现在看代码就比较见简单了。
代码部分
在init/main.c的start_kernel函数中会调用cgroup_init_early函数进行cgroup的初始化。其实对cgroup的初始化分两次,分别是cgroup_init_early和cgroup_init。这么做的原因是,系统初始阶段需要使用一些 subsystem,先对这一部分进行初始化。
来看一下cgroup_init_early
/**
* cgroup_init_early - cgroup initialization at system boot
*
* Initialize cgroups at system boot, and initialize any
* subsystems that request early init.
*/
int __init cgroup_init_early(void)
{
struct cgroup_subsys *ss;
int i;
atomic_set(&init_css_set.refcount, 1);
INIT_LIST_HEAD(&init_css_set.cgrp_links);
INIT_LIST_HEAD(&init_css_set.tasks);
INIT_HLIST_NODE(&init_css_set.hlist);
css_set_count = 1;
init_cgroup_root(&cgroup_dummy_root);
cgroup_root_count = 1;
RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
init_cgrp_cset_link.cset = &init_css_set;
init_cgrp_cset_link.cgrp = cgroup_dummy_top;
list_add(&init_cgrp_cset_link.cset_link, &cgroup_dummy_top->cset_links);
list_add(&init_cgrp_cset_link.cgrp_link, &init_css_set.cgrp_links);
/* at bootup time, we don't worry about modular subsystems */
for_each_builtin_subsys(ss, i) {
BUG_ON(!ss->name);
BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
BUG_ON(!ss->css_alloc);
BUG_ON(!ss->css_free);
if (ss->subsys_id != i) {
printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
ss->name, ss->subsys_id);
BUG();
}
if (ss->early_init)
cgroup_init_subsys(ss);
}
return 0;
}
注释的说的很清楚了,initialize any subsystems that request early init.
init_css_set是init进程使用的css_set结构体,先进行初始化:
设置init_css_set的引用计数为1.
初始化init_css_set.cgrp_links链表,该链表与cgrp_cset_link的cgrp_link相关联.
初始化init_css_set.tasks链表,该链表与所有应用init_css_set的task相关联.
初始化init_css_set.hlist链表,哈希表相关,没有深入研究.
css_set_count用来记录系统css_set结构体变量的个数.
初始化cgroup_dummy_root,定义为
static struct cgroupfs_root cgroup_dummy_root;
/*
* A cgroupfs_root represents the root of a cgroup hierarchy, and may be
* associated with a superblock to form an active hierarchy. This is
* internal to cgroup core. Don't access directly from controllers.
*/
struct cgroupfs_root {
struct super_block *sb;
/* The bitmask of subsystems attached to this hierarchy */
unsigned long subsys_mask;
/* Unique id for this hierarchy. */
int hierarchy_id;
/* The root cgroup for this hierarchy */
struct cgroup top_cgroup;
/* Tracks how many cgroups are currently defined in hierarchy.*/
int number_of_cgroups;
/* A list running through the active hierarchies */
struct list_head root_list;
/* Hierarchy-specific flags */
unsigned long flags;
/* IDs for cgroups in this hierarchy */
struct idr cgroup_idr;
/* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
/* The name for this hierarchy - may be empty */
char name[MAX_CGROUP_ROOT_NAMELEN];
};
static void init_cgroup_root(struct cgroupfs_root *root)
{
struct cgroup *cgrp = &root->top_cgroup;
INIT_LIST_HEAD(&root->root_list);
root->number_of_cgroups = 1;
cgrp->root = root;
RCU_INIT_POINTER(cgrp->name, &root_cgroup_name);
init_cgroup_housekeeping(cgrp);
idr_init(&root->cgroup_idr);
}
初始化cgroup_dummy_root的root_list链表
将层次图中的cgroup个数设置为1
将自身设置为层次图的顶端cgroup,即top_cgroup->root指向cgroup_dummy_root
init_cgroup_housekeeping用于构建层次图的list变量和互斥变量
初始化cgroup_dummy_root->cgroup_idr链表
回到cgroup_init_early:
设置cgroup_root_count = 1;
接下来几行代码:
init_cgrp_cset_link.cset = &init_css_set;
init_cgrp_cset_link.cgrp = cgroup_dummy_top;
list_add(&init_cgrp_cset_link.cset_link, &cgroup_dummy_top->cset_links);
list_add(&init_cgrp_cset_link.cgrp_link, &init_css_set.cgrp_links);
内核定义
static struct cgrp_cset_link init_cgrp_cset_link;
/* dummy_top is a shorthand for the dummy hierarchy's top cgroup */
static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup;
所以这里是将init进程的css_set(即init_css_set)与cgroup_dummy_root的top_cgroup(即cgroup_dummy_top)通过init_cgrp_cset_link相关联。
继续看cgroup_init_early的剩余代码
/* at bootup time, we don't worry about modular subsystems */
for_each_builtin_subsys(ss, i) {
BUG_ON(!ss->name);
BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
BUG_ON(!ss->css_alloc);
BUG_ON(!ss->css_free);
if (ss->subsys_id != i) {
printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
ss->name, ss->subsys_id);
BUG();
}
if (ss->early_init)
cgroup_init_subsys(ss);
}
这里应该是遍历所有builtin的subsystems并进行初始化,从注释不难看出,linux中subsystems可以分为builtin和modular两类。
其中会做一些检查,检查子系统的默认信息是否有误,出现问题时停止初始化。检查之后调用cgroup_init_subsys进行初始化。
这些buildtin的subsystem从何而来?
/**
* for_each_builtin_subsys - iterate all built-in cgroup subsystems
* @ss: the iteration cursor
* @i: the index of @ss, CGROUP_BUILTIN_SUBSYS_COUNT after reaching the end
*
* Bulit-in subsystems are always present and iteration itself doesn't
* require any synchronization.
*/
#define for_each_builtin_subsys(ss, i) \
for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT && \
(((ss) = cgroup_subsys[i]) || true); (i)++)
for_each_builtin_subsys是一个宏,展开并去掉BUG_ON信息后得到:
for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT &&
(((ss) = cgroup_subsys[i]) || true); i++){
if (ss->early_init)
cgroup_init_subsys(ss);
}
cgroup_subsys数组的定义在kernel/group.c
static struct cgroup_subsys *cgroup_subsys[CGROUP_SUBSYS_COUNT] = {
#include <linux/cgroup_subsys.h>
};
成员变量是#include
/*
* List of cgroup subsystems.
*
* DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
*/
#if IS_SUBSYS_ENABLED(CONFIG_CPUSETS)
SUBSYS(cpuset)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEBUG)
SUBSYS(debug)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_SCHED)
SUBSYS(cpu_cgroup)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_CPUACCT)
SUBSYS(cpuacct)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_MEMCG)
SUBSYS(mem_cgroup)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEVICE)
SUBSYS(devices)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FREEZER)
SUBSYS(freezer)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_NET_CLASSID)
SUBSYS(net_cls)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_BLK_CGROUP)
SUBSYS(blkio)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF)
SUBSYS(perf)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_NET_PRIO)
SUBSYS(net_prio)
#endif
#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
SUBSYS(hugetlb)
#endif
/*
* DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
*/
一共有CGROUP_BUILTIN_SUBSYS_COUNT个subsystem,与inclue/linux/cgroup_subsys.h中subsystem中cgroup subsystems的数目相等。这个数值的定义用了一种很巧妙的方法,有兴趣的同学可以看一下。
接下来看cgroup_init_subsys
static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
mutex_lock(&cgroup_mutex);
/* init base cftset */
cgroup_init_cftsets(ss);
/* Create the top cgroup state for this subsystem */
ss->root = &cgroup_dummy_root;
css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
init_css(css, ss, cgroup_dummy_top);
/* Update the init_css_set to contain a subsys
* pointer to this state - since the subsystem is
* newly registered, all tasks and hence the
* init_css_set is in the subsystem's top cgroup. */
init_css_set.subsys[ss->subsys_id] = css;
need_forkexit_callback |= ss->fork || ss->exit;
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
BUG_ON(online_css(css));
mutex_unlock(&cgroup_mutex);
/* this function shouldn't be used with modular subsystems, since they
* need to register a subsys_id, among other things */
BUG_ON(ss->module);
}
这里主要是做一些初始化,由于时间问题,先不去深究了。
这里我只是分析了一下cgroup的框架及初始化,其实还有很多值得研究的地方,比如subsys_id,比如cpu等cgroup是如何运作的,这写内容以后一定会补上。