1.swapon开启zram交换分区
swapon /dev/block/zram0
mkswap /dev/block/zram0
上面命令调用了linux的swapon系统调用启动zram0交换分区;mkswap命令向块设备文件/dev/block/zram0写入了swap_header信息
问题:实际安卓平台是哪里触发swapon和mkswap调用的,我们已MTK8195平台为例:
init.xxx.rc:
swapon_all /vendor/etc/fstab.enableswap
其中fstab.enableswap内容如下:
/dev/block/zram0 none swap defaults zramsize=xx%
那么swapon_all命令执行了哪里的代码呢?又是哪里解析fstab.enableswap文件呢,答案是:
system/core/init/builtins.cpp:
/* swapon_all [ <fstab> ] */
static Result<void> do_swapon_all(const BuiltinArguments& args) {
auto swapon_all = ParseSwaponAll(args.args);
if (!swapon_all.ok()) return swapon_all.error();
Fstab fstab;
if (swapon_all->empty()) {
if (!ReadDefaultFstab(&fstab)) {
return Error() << "Could not read default fstab";
}
} else {
if (!ReadFstabFromFile(*swapon_all, &fstab)) {
return Error() << "Could not read fstab '" << *swapon_all << "'";
}
}
if (!fs_mgr_swapon_all(fstab)) {
return Error() << "fs_mgr_swapon_all() failed";
}
return {};
}
而fs_mgr_swapon_all实现:/system/core/fs_mgr/fs_mgr.cpp:
bool fs_mgr_swapon_all(const Fstab& fstab) {
...
const char* mkswap_argv[2] = {
MKSWAP_BIN,
entry.blk_device.c_str(),
};
int err = logwrap_fork_execvp(ARRAY_SIZE(mkswap_argv), mkswap_argv, nullptr, false,
LOG_KLOG, false, nullptr);
if (err) {
LERROR << "mkswap failed for " << entry.blk_device;
ret = false;
continue;
}
/* If -1, then no priority was specified in fstab, so don't set
* SWAP_FLAG_PREFER or encode the priority */
int flags = 0;
if (entry.swap_prio >= 0) {
flags = (entry.swap_prio << SWAP_FLAG_PRIO_SHIFT) & SWAP_FLAG_PRIO_MASK;
flags |= SWAP_FLAG_PREFER;
} else {
flags = 0;
}
err = swapon(entry.blk_device.c_str(), flags);
if (err) {
LERROR << "swapon failed for " << entry.blk_device;
ret = false;
}
...
}
2.swapon系统调用
kernel-5.15/mm/swapfile.c
swapon(...) {
swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0);
p->swap_file = swap_file;
mapping = swap_file->f_mapping;
dentry = swap_file->f_path.dentry;
inode = mapping->host;
/*
* Read the swap header.
*/
if (!mapping->a_ops->readpage) {
error = -EINVAL;
goto bad_swap_unlock_inode;
}
page = read_mapping_page(mapping, 0, swap_file);
if (IS_ERR(page)) {
error = PTR_ERR(page);
goto bad_swap_unlock_inode;
}
swap_header = kmap(page);
maxpages = read_swap_header(p, swap_header, inode);
...
nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
cluster_info, maxpages, &span);
...
error = init_swap_address_space(p->type, maxpages);
...
}
上面代码read_mapping_page是能够读取内容的关键是mapping对象,因为mapping->aops有readpage函数,但是这里mapping和mapping->aops是什么时候设置的?先公布答案:mapping的aops指向的是def_blk_aops,定义在kernel-5.15/block/fops.c:
const struct address_space_operations def_blk_aops = {
.set_page_dirty = __set_page_dirty_buffers,
.readpage = blkdev_readpage,
.readahead = blkdev_readahead,
.writepage = blkdev_writepage,
.write_begin = blkdev_write_begin,
.write_end = blkdev_write_end,
.writepages = blkdev_writepages,
.direct_IO = blkdev_direct_IO,
.migratepage = buffer_migrate_page_norefs,
.is_dirty_writeback = buffer_check_dirty_writeback,
};
但是这是什么代码路径设置进去的呢?我们就以zram为例:一切要从blk_alloc_disk函数说起
zram_drv.c:
static int zram_add(void)
{
struct zram *zram;
int ret, device_id;
zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
if (!zram)
return -ENOMEM;
ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
if (ret < 0)
goto out_free_dev;
device_id = ret;
init_rwsem(&zram->init_lock);
#ifdef CONFIG_ZRAM_WRITEBACK
spin_lock_init(&zram->wb_limit_lock);
#endif
/* gendisk structure */
zram->disk = blk_alloc_disk(NUMA_NO_NODE);
...
}
__blk_alloc_disk(kernel-5.15/block/genhd.c)
-->__alloc_disk_node(kernel-5.15/block/genhd.c)
--->bdev_alloc(kernel-5.15/block/bdev.c)
struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
{
struct block_device *bdev;
struct inode *inode;
inode = new_inode(blockdev_superblock);
if (!inode)
return NULL;
inode->i_mode = S_IFBLK;
inode->i_rdev = 0;
inode->i_data.a_ops = &def_blk_aops;
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
bdev = I_BDEV(inode);
mutex_init(&bdev->bd_fsfreeze_mutex);
spin_lock_init(&bdev->bd_size_lock);
bdev->bd_partno = partno;
bdev->bd_inode = inode;
bdev->bd_stats = alloc_percpu(struct disk_stats);
if (!bdev->bd_stats) {
iput(inode);
return NULL;
}
bdev->bd_disk = disk;
return bdev;
}
看到bdev_alloc可以看到inode->i_data.a_ops = &def_blk_aops就是设置的地方了。同时这里还生成了block_device,我们再看下block_device是怎么生成的!!!这里的关键是上面的new_inode调用到了哪里?答案:kernel-5.15/fs/inode.c:new_inode函数:
kernel-5.15/fs/inode.c:
struct inode *new_inode(struct super_block *sb)
{
struct inode *inode;
spin_lock_prefetch(&sb->s_inode_list_lock);
inode = new_inode_pseudo(sb);
if (inode)
inode_sb_list_add(inode);
return inode;
}
kernel-5.15/fs/inode.c:
struct inode *new_inode_pseudo(struct super_block *sb)
{
struct inode *inode = alloc_inode(sb);
if (inode) {
spin_lock(&inode->i_lock);
inode->i_state = 0;
spin_unlock(&inode->i_lock);
INIT_LIST_HEAD(&inode->i_sb_list);
}
return inode;
}
kernel-5.15/fs/inode.c:
static struct inode *alloc_inode(struct super_block *sb)
{
const struct super_operations *ops = sb->s_op;
struct inode *inode;
if (ops->alloc_inode)
//调用对应的alloc_inode,我们分析的场景调用到了kernel-5.15/block/bdev.c:bdev_alloc_inode
inode = ops->alloc_inode(sb);
else
inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
if (!inode)
return NULL;
//很重要的初始化函数:inode_init_always
if (unlikely(inode_init_always(sb, inode))) {
if (ops->destroy_inode) {
ops->destroy_inode(inode);
if (!ops->free_inode)
return NULL;
}
inode->free_inode = ops->free_inode;
i_callback(&inode->i_rcu);
return NULL;
}
return inode;
}
最终new_inode调用到了kernel-5.15/fs/inode.c:z的alloc_inode函数如下:
static struct inode *bdev_alloc_inode(struct super_block *sb)
{
struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
if (!ei)
return NULL;
memset(&ei->bdev, 0, sizeof(ei->bdev));
return &ei->vfs_inode;
}
int inode_init_always(struct super_block *sb, struct inode *inode)
{
static const struct inode_operations empty_iops;
static const struct file_operations no_open_fops = {.open = no_open};
struct address_space *const mapping = &inode->i_data;
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
atomic64_set(&inode->i_sequence, 0);
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &no_open_fops;
inode->i_ino = 0;
inode->__i_nlink = 1;
inode->i_opflags = 0;
if (sb->s_xattr)
inode->i_opflags |= IOP_XATTR;
i_uid_write(inode, 0);
i_gid_write(inode, 0);
atomic_set(&inode->i_writecount, 0);
inode->i_size = 0;
inode->i_write_hint = WRITE_LIFE_NOT_SET;
inode->i_blocks = 0;
inode->i_bytes = 0;
inode->i_generation = 0;
inode->i_pipe = NULL;
inode->i_cdev = NULL;
inode->i_link = NULL;
inode->i_dir_seq = 0;
inode->i_rdev = 0;
inode->dirtied_when = 0;
...
mapping->a_ops = &empty_aops;
mapping->host = inode;
}