linux qos 实现机制,linux的qos机制 - cgroup篇 (4)

下面来看各个子系统对cgroup的支持,第一篇先研究blkio子系统

blkio子系统支持三种类型的QoS控制:

blkio.weight, blkio.weight_device:这些是基于设备权重值的控制方式

blkio.throttle.read_bps_device,blkio.throttle.write_bps_device:这些是基于带宽的控制方式

blkio.throttle.read_iops_device,blkio.throttle.write_iops_device:这些是基于iops的控制方式

其中基于权重的控制方式,必须依赖于CFQ调度器,而基于throttle的控制方式则只需要在通用块层实现就可以了

1) 基于blkio的cgroup_subsys的定义如下:

struct cgroup_subsys blkio_subsys = {

.name = "blkio",

.create = blkiocg_create,

.can_attach_task = blkiocg_can_attach_task,

.attach_task = blkiocg_attach_task,

.destroy = blkiocg_destroy,

.populate = blkiocg_populate,

#ifdef CONFIG_BLK_CGROUP

/* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */

.subsys_id = blkio_subsys_id,

#endif

.use_id = 1,

.module = THIS_MODULE,

};

blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup):初始化一个blkio_cgroup结构,并初始化blkio_cgroup->policy_list, blkio_cgroup->blkg_list

blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup):略过

blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup):初始化好blkio_files里所有的blkio_policy_node对应的cgroup文件系统的文件

blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk):

/*

* We cannot support shared io contexts, as we have no mean to support

* two tasks with the same ioc in two different groups without major rework

* of the main cic data structures.  For now we allow a task to change

* its cgroup only if it's the only owner of its ioc.

*/

2) 基于blkio的policy的数据结构定义如下:

struct blkio_policy_node {

struct list_head node;

dev_t dev;

/* This node belongs to max bw policy or porportional weight policy */

enum blkio_policy_id plid;

/* cgroup file to which this rule belongs to */

int fileid;

union {

unsigned int weight;

/*

* Rate read/write in terms of byptes per second

* Whether this rate represents read or write is determined

* by file type "fileid".

*/

u64 bps;

unsigned int iops;

} val;

};

struct blkio_policy_ops {

blkio_unlink_group_fn *blkio_unlink_group_fn;

blkio_update_group_weight_fn *blkio_update_group_weight_fn;

blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;

blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;

blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;

blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;

};

enum blkio_policy_id {

BLKIO_POLICY_PROP = 0,/* Proportional Bandwidth division */

BLKIO_POLICY_THROTL,/* Throttling */

};

struct blkio_policy_type {

struct list_head list;

struct blkio_policy_ops ops;

enum blkio_policy_id plid;

};

blkio_policy_node,基本上可以看做一个cgroup文件系统下的一个配置文件对应一个blkio_policy_node,一个cgroup目录的所有的policy_node都会被链在一个blkio_cgroup->policy_list的链表中

blkio_policy_type根据不同的blkio_policy_id有不同的blkio_policy_ops,blkio_policy_register在cfq_init,throtl_init时被调用,这两个初始化函数分别对应基于权重的控制和基于阀值的控制,目前有两个全局的blkio_policy_type的变量:

static struct blkio_policy_type blkio_policy_cfq = {

.ops = {

.blkio_unlink_group_fn =cfq_unlink_blkio_group,

.blkio_update_group_weight_fn =cfq_update_blkio_group_weight,

},

.plid = BLKIO_POLICY_PROP,

};

以及

static struct blkio_policy_type blkio_policy_throtl = {

.ops = {

.blkio_unlink_group_fn = throtl_unlink_blkio_group,

.blkio_update_group_read_bps_fn =

throtl_update_blkio_group_read_bps,

.blkio_update_group_write_bps_fn =

throtl_update_blkio_group_write_bps,

.blkio_update_group_read_iops_fn =

throtl_update_blkio_group_read_iops,

.blkio_update_group_write_iops_fn =

throtl_update_blkio_group_write_iops,

},

.plid = BLKIO_POLICY_THROTL,

};

3) 基于blkio的cgroup文件系统的数据结构如下:

struct cftype blkio_files[] = {

{

.name = "weight_device",

.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,

BLKIO_PROP_weight_device),

.read_seq_string = blkiocg_file_read,

.write_string = blkiocg_file_write,

.max_write_len = 256,

},

{

.name = "weight",

.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,

BLKIO_PROP_weight),

.read_u64 = blkiocg_file_read_u64,

.write_u64 = blkiocg_file_write_u64,

},

{

.name = "throttle.read_bps_device",

.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,

BLKIO_THROTL_read_bps_device),

.read_seq_string = blkiocg_file_read,

.write_string = blkiocg_file_write,

.max_write_len = 256,

},

{

.name = "throttle.write_bps_device",

.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,

BLKIO_THROTL_write_bps_device),

.read_seq_string = blkiocg_file_read,

.write_string = blkiocg_file_write,

.max_write_len = 256,

},

{

.name = "throttle.read_iops_device",

.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,

BLKIO_THROTL_read_iops_device),

.read_seq_string = blkiocg_file_read,

.write_string = blkiocg_file_write,

.max_write_len = 256,

},

{

.name = "throttle.write_iops_device",

.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,

BLKIO_THROTL_write_iops_device),

.read_seq_string = blkiocg_file_read,

.write_string = blkiocg_file_write,

.max_write_len = 256,

},

基本上调用的都是blkiocg_file_read,blkiocg_file_write

blkio_files中的struct cftype有个private成员变量,通过BLKIOFILE_PRIVATE宏来赋值,e.g.

.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, BLKIO_PROP_weight_device)

之后可以通过BLKIOFILE_POLICY获取其policy类型:BLKIO_POLICY_THROTL或者BLKIO_POLICY_PROP,通过BLKIOFILE_ATTR获取其文件名,所有的配置文件都有如下定义:

/* cgroup files owned by proportional weight policy */

enum blkcg_file_name_prop {

BLKIO_PROP_weight = 1,

BLKIO_PROP_weight_device,

BLKIO_PROP_io_service_bytes,

BLKIO_PROP_io_serviced,

BLKIO_PROP_time,

BLKIO_PROP_sectors,

BLKIO_PROP_unaccounted_time,

BLKIO_PROP_io_service_time,

BLKIO_PROP_io_wait_time,

BLKIO_PROP_io_merged,

BLKIO_PROP_io_queued,

BLKIO_PROP_avg_queue_size,

BLKIO_PROP_group_wait_time,

BLKIO_PROP_idle_time,

BLKIO_PROP_empty_time,

BLKIO_PROP_dequeue,

};

/* cgroup files owned by throttle policy */

enum blkcg_file_name_throtl {

BLKIO_THROTL_read_bps_device,

BLKIO_THROTL_write_bps_device,

BLKIO_THROTL_read_iops_device,

BLKIO_THROTL_write_iops_device,

BLKIO_THROTL_io_service_bytes,

BLKIO_THROTL_io_serviced,

};

static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft, struct seq_file *m):通过cftype得到POLICY_ID, POLICY_FILE_NAME,通过struct cgroup得到struct blkio_cgroup,然后调用blkio_read_policy_node_files,按照一定格式存到一个seq_file里面,可以参考blkio_print_policy_node函数

static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft, const char *buffer):先调用blkio_policy_parse_and_set生成一个新的blkio_policy_node,下面的步骤就是先删了已有的policy node,再把新的policy node插入到blkio_cgroup->policy_list里面,最后调用blkio_update_policy_node_blkg,该函数对blkio_cgroup下面的所有blkio_group,都调用blkio_update_blkg_policy,该函数会根据blkio_policy_node的plid, fileid,调用不同的 blkio_update_xxxxx函数,以weight为例,最终调用到blkio_update_group_weight,后者又调用cfq_update_blkio_group_weight(这是跟CFQ紧耦合的一个函数,这里不做介绍了)

4) 几个关键的数据结构blkio_cgroup和blkio_group

struct blkio_cgroup {

struct cgroup_subsys_state css;

unsigned int weight;

spinlock_t lock;

struct hlist_head blkg_list;

struct list_head policy_list; /* list of blkio_policy_node */

};

struct blkio_group {

/* An rcu protected unique identifier for the group */

void *key;

struct hlist_node blkcg_node;

unsigned short blkcg_id;

/* Store cgroup path */

char path[128];

/* The device MKDEV(major, minor), this group has been created for */

dev_t dev;

/* policy which owns this blk group */

enum blkio_policy_id plid;

/* Need to serialize the stats in the case of reset/update */

spinlock_t stats_lock;

struct blkio_group_stats stats;

/* Per cpu stats pointer */

struct blkio_group_stats_cpu __percpu *stats_cpu;

};

blkio_cgroup代表了一个cgroup,但是这个cgroup里的进程有可能会读写多个块设备,所有通过一个cfq_data或者throtl_data的结构作为红黑树的key,把多个blkio_group关联到一个blkio_cgroup中。每个cfq_data或者throtl_data(根据policy的不同)实际上代表了一个块设备

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值