zram压缩机制看swapon系统调用

1.swapon开启zram交换分区
swapon /dev/block/zram0
mkswap /dev/block/zram0

上面命令调用了linux的swapon系统调用启动zram0交换分区;mkswap命令向块设备文件/dev/block/zram0写入了swap_header信息

问题:实际安卓平台是哪里触发swapon和mkswap调用的,我们已MTK8195平台为例:

init.xxx.rc:
    swapon_all /vendor/etc/fstab.enableswap

其中fstab.enableswap内容如下:
/dev/block/zram0 none swap defaults zramsize=xx%

 那么swapon_all命令执行了哪里的代码呢?又是哪里解析fstab.enableswap文件呢,答案是:

system/core/init/builtins.cpp:


/* swapon_all [ <fstab> ] */
static Result<void> do_swapon_all(const BuiltinArguments& args) {
    auto swapon_all = ParseSwaponAll(args.args);
    if (!swapon_all.ok()) return swapon_all.error();

    Fstab fstab;
    if (swapon_all->empty()) {
        if (!ReadDefaultFstab(&fstab)) {
            return Error() << "Could not read default fstab";
        }
    } else {
        if (!ReadFstabFromFile(*swapon_all, &fstab)) {
            return Error() << "Could not read fstab '" << *swapon_all << "'";
        }
    }

    if (!fs_mgr_swapon_all(fstab)) {                                                                                                           
        return Error() << "fs_mgr_swapon_all() failed";
    }

    return {};
}

而fs_mgr_swapon_all实现:/system/core/fs_mgr/fs_mgr.cpp:

bool fs_mgr_swapon_all(const Fstab& fstab) {
    ...
        const char* mkswap_argv[2] = {
                MKSWAP_BIN,
                entry.blk_device.c_str(),
        };
        int err = logwrap_fork_execvp(ARRAY_SIZE(mkswap_argv), mkswap_argv, nullptr, false,
                                      LOG_KLOG, false, nullptr);
        if (err) {
            LERROR << "mkswap failed for " << entry.blk_device;
            ret = false;
            continue;
        }

        /* If -1, then no priority was specified in fstab, so don't set
         * SWAP_FLAG_PREFER or encode the priority */
        int flags = 0;
        if (entry.swap_prio >= 0) {
            flags = (entry.swap_prio << SWAP_FLAG_PRIO_SHIFT) & SWAP_FLAG_PRIO_MASK;
            flags |= SWAP_FLAG_PREFER;
        } else {
            flags = 0;
        }
        err = swapon(entry.blk_device.c_str(), flags);
        if (err) {
            LERROR << "swapon failed for " << entry.blk_device;
            ret = false;
        }
        ...
}
2.swapon系统调用
kernel-5.15/mm/swapfile.c
swapon(...) {
    swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0);
    p->swap_file = swap_file;
    mapping = swap_file->f_mapping;
    dentry = swap_file->f_path.dentry;
    inode = mapping->host;

     /*
     * Read the swap header.
     */
    if (!mapping->a_ops->readpage) {
        error = -EINVAL;
        goto bad_swap_unlock_inode;
    }
    page = read_mapping_page(mapping, 0, swap_file);
    if (IS_ERR(page)) {
        error = PTR_ERR(page);
        goto bad_swap_unlock_inode;
    }
    swap_header = kmap(page);

    maxpages = read_swap_header(p, swap_header, inode);

    ...

    nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,                                                                          
        cluster_info, maxpages, &span);
    ...
    error = init_swap_address_space(p->type, maxpages);
    ...
}

 上面代码read_mapping_page是能够读取内容的关键是mapping对象,因为mapping->aops有readpage函数,但是这里mapping和mapping->aops是什么时候设置的?先公布答案:mapping的aops指向的是def_blk_aops,定义在kernel-5.15/block/fops.c:

const struct address_space_operations def_blk_aops = {                                                                                         
    .set_page_dirty = __set_page_dirty_buffers,
    .readpage   = blkdev_readpage,
    .readahead  = blkdev_readahead,
    .writepage  = blkdev_writepage,
    .write_begin    = blkdev_write_begin,
    .write_end  = blkdev_write_end,
    .writepages = blkdev_writepages,
    .direct_IO  = blkdev_direct_IO,
    .migratepage    = buffer_migrate_page_norefs,
    .is_dirty_writeback = buffer_check_dirty_writeback,
};

但是这是什么代码路径设置进去的呢?我们就以zram为例:一切要从blk_alloc_disk函数说起

zram_drv.c:
static int zram_add(void)
{
    struct zram *zram;
    int ret, device_id;

    zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
    if (!zram)
        return -ENOMEM;

    ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
    if (ret < 0)
        goto out_free_dev;
    device_id = ret;

    init_rwsem(&zram->init_lock);
#ifdef CONFIG_ZRAM_WRITEBACK
    spin_lock_init(&zram->wb_limit_lock);
#endif

    /* gendisk structure */
    zram->disk = blk_alloc_disk(NUMA_NO_NODE);
    ...
}

__blk_alloc_disk(kernel-5.15/block/genhd.c)
    -->__alloc_disk_node(kernel-5.15/block/genhd.c)
        --->bdev_alloc(kernel-5.15/block/bdev.c)

struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
{
    struct block_device *bdev;
    struct inode *inode;

    inode = new_inode(blockdev_superblock);
    if (!inode)
        return NULL;
    inode->i_mode = S_IFBLK;
    inode->i_rdev = 0;
    inode->i_data.a_ops = &def_blk_aops;
    mapping_set_gfp_mask(&inode->i_data, GFP_USER);

    bdev = I_BDEV(inode);
    mutex_init(&bdev->bd_fsfreeze_mutex);
    spin_lock_init(&bdev->bd_size_lock);
    bdev->bd_partno = partno;
    bdev->bd_inode = inode;
    bdev->bd_stats = alloc_percpu(struct disk_stats);
    if (!bdev->bd_stats) {
        iput(inode);
        return NULL;
    }
    bdev->bd_disk = disk;
    return bdev;
}

看到bdev_alloc可以看到inode->i_data.a_ops = &def_blk_aops就是设置的地方了。同时这里还生成了block_device,我们再看下block_device是怎么生成的!!!这里的关键是上面的new_inode调用到了哪里?答案:kernel-5.15/fs/inode.c:new_inode函数

kernel-5.15/fs/inode.c:

struct inode *new_inode(struct super_block *sb)                                                                                                
{
    struct inode *inode;

    spin_lock_prefetch(&sb->s_inode_list_lock);

    inode = new_inode_pseudo(sb);
    if (inode)
        inode_sb_list_add(inode);
    return inode;
}

kernel-5.15/fs/inode.c:

struct inode *new_inode_pseudo(struct super_block *sb)                                                                                         
{
    struct inode *inode = alloc_inode(sb);

    if (inode) {
        spin_lock(&inode->i_lock);
        inode->i_state = 0;
        spin_unlock(&inode->i_lock);
        INIT_LIST_HEAD(&inode->i_sb_list);
    }
    return inode;
}

kernel-5.15/fs/inode.c:

static struct inode *alloc_inode(struct super_block *sb)
{
    const struct super_operations *ops = sb->s_op;
    struct inode *inode;

    if (ops->alloc_inode)
        //调用对应的alloc_inode,我们分析的场景调用到了kernel-5.15/block/bdev.c:bdev_alloc_inode
        inode = ops->alloc_inode(sb);
    else
        inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);

    if (!inode)
        return NULL;

    //很重要的初始化函数:inode_init_always
    if (unlikely(inode_init_always(sb, inode))) {
        if (ops->destroy_inode) {
            ops->destroy_inode(inode);
            if (!ops->free_inode)
                return NULL;
        }
        inode->free_inode = ops->free_inode;
        i_callback(&inode->i_rcu);
        return NULL;
    }

    return inode;
}

最终new_inode调用到了kernel-5.15/fs/inode.c:z的alloc_inode函数如下:


static struct inode *bdev_alloc_inode(struct super_block *sb)                                                                                  
{
    struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);

    if (!ei)
        return NULL;
    memset(&ei->bdev, 0, sizeof(ei->bdev));
    return &ei->vfs_inode;
}

int inode_init_always(struct super_block *sb, struct inode *inode)
{
    static const struct inode_operations empty_iops;
    static const struct file_operations no_open_fops = {.open = no_open};
    struct address_space *const mapping = &inode->i_data;

    inode->i_sb = sb;
    inode->i_blkbits = sb->s_blocksize_bits;
    inode->i_flags = 0;
    atomic64_set(&inode->i_sequence, 0);
    atomic_set(&inode->i_count, 1);
    inode->i_op = &empty_iops;
    inode->i_fop = &no_open_fops;
    inode->i_ino = 0;
    inode->__i_nlink = 1;
    inode->i_opflags = 0;
    if (sb->s_xattr)
        inode->i_opflags |= IOP_XATTR;
    i_uid_write(inode, 0);
    i_gid_write(inode, 0);
    atomic_set(&inode->i_writecount, 0);
    inode->i_size = 0;
    inode->i_write_hint = WRITE_LIFE_NOT_SET;
    inode->i_blocks = 0;
    inode->i_bytes = 0;
    inode->i_generation = 0;
    inode->i_pipe = NULL;
    inode->i_cdev = NULL;
    inode->i_link = NULL;
    inode->i_dir_seq = 0;
    inode->i_rdev = 0;
    inode->dirtied_when = 0;

    ...
    
    mapping->a_ops = &empty_aops;
    mapping->host = inode;
}

  • 4
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值