源码视角看memory cgroup目录下创建目录以及读写节点发生了什么

1./dev/memcg/apps下创建test目录
gdb) bt
#0  __kernfs_create_file (parent=0xffff888005bcaf00, name=0xffff888005bc3cf6 "blkio.throttle.write_iops_device", mode=420, uid=..., gid=..., size=0, ops=0xffffffff82666ce0 <cgroup_kf_single_ops>, 
    priv=0xffffffff826dce68 <throtl_legacy_files+648>, ns=0x0 <fixed_percpu_data>, key=0x0 <fixed_percpu_data>) at fs/kernfs/file.c:1002
#1  0xffffffff8117f277 in cgroup_add_file (cft=<optimized out>, cgrp=<optimized out>, css=<optimized out>) at kernel/cgroup/cgroup.c:1496
#2  cgroup_addrm_files (css=<optimized out>, cgrp=0xffff888005bc7800, cfts=<optimized out>, is_add=false) at kernel/cgroup/cgroup.c:3881
#3  0xffffffff8117f577 in css_populate_dir (css=0xffff888005b30c00) at kernel/cgroup/cgroup.c:1689
#4  0xffffffff81183977 in cgroup_apply_control_enable (cgrp=<optimized out>) at kernel/cgroup/cgroup.c:3067
#5  0xffffffff81185dd9 in cgroup_mkdir (parent_kn=0xffff888005ba3300, name=0xffff888005c8cdb8 "test", mode=<optimized out>) at kernel/cgroup/cgroup.c:5358
#6  0xffffffff81335c91 in kernfs_iop_mkdir (dir=<optimized out>, dentry=<optimized out>, mode=<optimized out>) at fs/kernfs/dir.c:1127
#7  0xffffffff812a5ffd in vfs_mkdir (dir=0xffff888005bcaf00, dentry=0xffff888005bc3cf6, mode=<optimized out>) at fs/namei.c:3649
#8  0xffffffff812a8d9b in do_mkdirat (dfd=96251648, pathname=0xffff888005bc3cf6 "blkio.throttle.write_iops_device", mode=<optimized out>) at fs/namei.c:3672
#9  0xffffffff812a8e66 in __do_sys_mkdir (mode=<optimized out>, pathname=<optimized out>) at fs/namei.c:3688
#10 __se_sys_mkdir (mode=<optimized out>, pathname=<optimized out>) at fs/namei.c:3686
#11 __x64_sys_mkdir (regs=<optimized out>) at fs/namei.c:3686
#12 0xffffffff81c71608 in do_syscall_64 (nr=<optimized out>, regs=0xffff888005bc3f58) at arch/x86/entry/common.c:46
#13 0xffffffff81e0007c in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:118
#14 0x0000000000000000 in ?? ()
2.cat /dev/memcg/apps/test/cgroups.procs

读取文件需要经历两个阶段:

  1. open
  2. read

open的调用栈:

Delete all breakpoints? (y or n) y
(gdb) b kernfs_fop_open
Breakpoint 9 at 0xffffffff81336e10: file fs/kernfs/file.c, line 618.
(gdb) c
Continuing.

Breakpoint 9, kernfs_fop_open (inode=0xffff888005c93970, file=0xffff888005ad9400) at fs/kernfs/file.c:618
618		struct kernfs_node *kn = inode->i_private;
(gdb) bt
#0  kernfs_fop_open (inode=0xffff888005c93970, file=0xffff888005ad9400) at fs/kernfs/file.c:618
#1  0xffffffff81291b8d in do_dentry_open (f=0xffff888005ad9400, inode=0xffff888005c93970, open=0xffffffff81336e10 <kernfs_fop_open>) at fs/open.c:817
#2  0xffffffff81293898 in vfs_open (path=<optimized out>, file=<optimized out>) at ./include/linux/dcache.h:551
#3  0xffffffff812a81da in do_open (op=<optimized out>, op=<optimized out>, file=<optimized out>, nd=<optimized out>) at fs/namei.c:3251
#4  path_openat (nd=0xffff888005bc3da0, op=0xffff888005bc3ebc, flags=<optimized out>) at fs/namei.c:3368
#5  0xffffffff812aa16e in do_filp_open (dfd=<optimized out>, pathname=<optimized out>, op=0xffff888005bc3ebc) at fs/namei.c:3395
#6  0xffffffff81292788 in do_sys_openat2 (dfd=-100, filename=<optimized out>, how=<optimized out>) at fs/open.c:1168
#7  0xffffffff81293cd8 in do_sys_open (dfd=<optimized out>, filename=<optimized out>, flags=<optimized out>, mode=<optimized out>) at fs/open.c:1184
#8  0xffffffff81293d2c in __do_sys_open (mode=<optimized out>, flags=<optimized out>, filename=<optimized out>) at fs/open.c:1192
#9  __se_sys_open (mode=<optimized out>, flags=<optimized out>, filename=<optimized out>) at fs/open.c:1188
#10 __x64_sys_open (regs=<optimized out>) at fs/open.c:1188
#11 0xffffffff81c71608 in do_syscall_64 (nr=<optimized out>, regs=0xffff888005bc3f58) at arch/x86/entry/common.c:46
#12 0xffffffff81e0007c in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:118

read的调用栈:

(gdb) b cgroup_pidlist_show
Note: breakpoint 4 also set at pc 0xffffffff811882d0.
Breakpoint 7 at 0xffffffff811882d0: file kernel/cgroup/cgroup-v1.c, line 481.
(gdb) c
Continuing.

Breakpoint 4, cgroup_pidlist_show (s=0xffff888005bcc000, v=0xffff888006077fe0) at kernel/cgroup/cgroup-v1.c:481
481		seq_printf(s, "%d\n", *(int *)v);
(gdb) bt
#0  cgroup_pidlist_show (s=0xffff888005bcc000, v=0xffff888006077fe0) at kernel/cgroup/cgroup-v1.c:481
#1  0xffffffff8117ea97 in cgroup_seqfile_show (m=0xffff888005bcc000, arg=<optimized out>) at kernel/cgroup/cgroup.c:3758
#2  0xffffffff813366a2 in kernfs_seq_show (sf=<optimized out>, v=<optimized out>) at fs/kernfs/file.c:167
#3  0xffffffff812c1140 in seq_read (file=<optimized out>, buf=<optimized out>, size=<optimized out>, ppos=<optimized out>) at fs/seq_file.c:208
#4  0xffffffff813373a0 in kernfs_fop_read (file=<optimized out>, user_buf=0xffff888006077fe0 "\001", count=<optimized out>, ppos=0xffff888005ba3300) at fs/kernfs/file.c:251
#5  0xffffffff81295b43 in do_loop_readv_writev (flags=<optimized out>, type=<optimized out>, ppos=<optimized out>, iter=<optimized out>, filp=<optimized out>) at fs/read_write.c:742
#6  do_loop_readv_writev (flags=<optimized out>, type=<optimized out>, ppos=<optimized out>, iter=<optimized out>, filp=<optimized out>) at fs/read_write.c:729
#7  do_iter_read (file=0xffff888005ad9300, iter=0xffff888005bc3ad8, pos=0xffff888005bc3c00, flags=<optimized out>) at fs/read_write.c:963
#8  0xffffffff81298478 in vfs_readv (file=0xffff888005ad9300, vec=<optimized out>, vlen=<optimized out>, pos=0xffff888005bc3c00, flags=0) at fs/read_write.c:1081
#9  0xffffffff812cf355 in kernel_readv (offset=<optimized out>, vlen=<optimized out>, vec=<optimized out>, file=<optimized out>) at fs/splice.c:355
#10 default_file_splice_read (in=<optimized out>, ppos=0xffff888005bc3de0, pipe=<optimized out>, len=<optimized out>, flags=<optimized out>) at fs/splice.c:412
#11 0xffffffff812cf538 in do_splice_to (in=0xffff888005ad9300, ppos=0xffff888005bc3de0, pipe=0xffff888005b34900, len=<optimized out>, flags=0) at fs/splice.c:891
#12 0xffffffff812cf622 in splice_direct_to_actor (in=<optimized out>, sd=0xffff888006077fe0, actor=<optimized out>) at fs/splice.c:970
#13 0xffffffff812cf823 in do_splice_direct (in=0xffff888005ad9300, ppos=0xffff888005bc3eb0, out=<optimized out>, opos=<optimized out>, len=<optimized out>, flags=<optimized out>) at fs/splice.c:1079
#14 0xffffffff81296ab9 in do_sendfile (out_fd=<optimized out>, in_fd=<optimized out>, ppos=0x0 <fixed_percpu_data>, count=16777216, max=<optimized out>) at fs/read_write.c:1548
#15 0xffffffff81297101 in __do_sys_sendfile64 (count=<optimized out>, offset=<optimized out>, in_fd=<optimized out>, out_fd=<optimized out>) at fs/read_write.c:1609
#16 __se_sys_sendfile64 (count=<optimized out>, offset=<optimized out>, in_fd=<optimized out>, out_fd=<optimized out>) at fs/read_write.c:1595
#17 __x64_sys_sendfile64 (regs=<optimized out>) at fs/read_write.c:1595
#18 0xffffffff81c71608 in do_syscall_64 (nr=<optimized out>, regs=0xffff888005bc3f58) at arch/x86/entry/common.c:46
#19 0xffffffff81e0007c in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:118

3. 怎么通过cgroup中的seq_file获取到css

读取cgroup的节点的show函数经常如下:

static int xxx_show(struct seq_file *m, void *v)                                                                                   
{
    struct mem_cgroup *memcg = NULL;
    ...
    memcg = mem_cgroup_from_css(seq_css(m));
    ...
}

是否好奇为什么memcg = mem_cgroup_from_css(seq_css(m))为什么能够从seq_file结构获取到对应的mem_cgroup结构,到底是什么时候把mem_cgroup设置进入seq_file里面的?

关键就在seq_css和kernfs_fop_open和__kernfs_create_file函数

static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
{
    return of_css(seq->private);
}

struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)                                                                                
{
    struct cgroup *cgrp = of->kn->parent->priv;
    struct cftype *cft = of_cft(of);

    /*
     * This is open and unprotected implementation of cgroup_css().
     * seq_css() is only called from a kernfs file operation which has
     * an active reference on the file.  Because all the subsystem
     * files are drained before a css is disassociated with a cgroup,
     * the matching css from the cgroup's subsys table is guaranteed to
     * be and stay valid until the enclosing operation is complete.
     */
    if (cft->ss)
        return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
    else
        return &cgrp->self;
}

/* cft/css accessors for cftype->write() operation */
static inline struct cftype *of_cft(struct kernfs_open_file *of)                                                                               
{
    return of->kn->priv;
}

我们知道of_css参数是kernfs_open_file, 说明了of_css(seq->private)的参数就是一个kernfs_open_file,那么seq_file->private是什么时候设置的?答案就在kernfs_fop_open

static int kernfs_fop_open(struct inode *inode, struct file *file)
{
    struct kernfs_node *kn = inode->i_private;
    struct kernfs_root *root = kernfs_root(kn);
    const struct kernfs_ops *ops;
    struct kernfs_open_file *of;
    bool has_read, has_write, has_mmap;
    int error = -EACCES;

    if (!kernfs_get_active(kn))
        return -ENODEV;

    ops = kernfs_ops(kn);
    ...
    of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL);
    ...
    of->kn = kn;
    of->file = file;

    of->seq_file = file->private_data;
    of->seq_file->private = of;
    ...
}

而of->kn = kn,那么kn->private又来自于__kernfs_create_file

struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
                     const char *name,
                     umode_t mode, kuid_t uid, kgid_t gid,
                     loff_t size,
                     const struct kernfs_ops *ops,
                     void *priv, const void *ns,
                     struct lock_class_key *key)
{
    struct kernfs_node *kn;
    unsigned flags;
    int rc;

    flags = KERNFS_FILE;

    kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
                 uid, gid, flags);
    if (!kn)
        return ERR_PTR(-ENOMEM);

    kn->attr.ops = ops;
    kn->attr.size = size;
    kn->ns = ns;
    kn->priv = priv;

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值