zswap是一个轻量级的由于swap page压缩的的cache.它可以将process中准备swap out的page压缩到dram中。zswap基本上是以cpu 密集型计算获取io 密集型。以提供系统整体的效率.
zswap的源码在Linux/mm/zswap.c
1238 static int __init init_zswap(void)
1239 {
1240 struct zswap_pool *pool;
1241
1242 zswap_init_started = true;
1243
1244 if (zswap_entry_cache_create()) {
1245 pr_err("entry cache creation failed\n");
1246 goto cache_fail;
1247 }
1248
1249 if (zswap_cpu_dstmem_init()) {
1250 pr_err("dstmem alloc failed\n");
1251 goto dstmem_fail;
1252 }
1253
1254 pool = __zswap_pool_create_fallback();
1255 if (!pool) {
1256 pr_err("pool creation failed\n");
1257 goto pool_fail;
1258 }
1259 pr_info("loaded using pool %s/%s\n", pool->tfm_name,
1260 zpool_get_type(pool->zpool));
1261
1262 list_add(&pool->list, &zswap_pools);
1263
1264 frontswap_register_ops(&zswap_frontswap_ops);
1265 if (zswap_debugfs_init())
1266 pr_warn("debugfs initialization failed\n");
1267 return 0;
1268
1269 pool_fail:
1270 zswap_cpu_dstmem_destroy();
1271 dstmem_fail:
1272 zswap_entry_cache_destroy();
1273 cache_fail:
1274 return -ENOMEM;
1275 }
1276 /* must be late so crypto has time to come up */
1277 late_initcall(init_zswap);
我们可以看到是以late_initcall的方式call init_zswap的。也就是kernel在start_kernel的最后阶段会call init_zswap。
这个函数是zswap最重要的函数,里面有call 了很多的函数。我们一个个看.
1244 if (zswap_entry_cache_create()) {
1245 pr_err("entry cache creation failed\n");
1246 goto cache_fail;
1247 }
只是调用zswap_entry_cache_create 来为zswap_entry_cache通过slab升级一个buffer.
221 static int __init zswap_entry_cache_create(void)
222 {
223 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
224 return zswap_entry_cache == NULL;
225 }
zswap_cpu_dstmem_init() 注册一个通知链,当cpu up的时候为percpu 变量zswap_dstmem 申请一个8K的buffer。当cpu dead的时候释放这个buffer.
源码如下:
353 static DEFINE_PER_CPU(u8 *, zswap_dstmem);
354
355 static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
356 {
357 u8 *dst;
358
359 switch (action) {
360 case CPU_UP_PREPARE:
361 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
362 if (!dst) {
363 pr_err("can't allocate compressor buffer\n");
364 return NOTIFY_BAD;
365 }
366 per_cpu(zswap_dstmem, cpu) = dst;
367 break;
368 case CPU_DEAD:
369 case CPU_UP_CANCELED:
370 dst = per_cpu(zswap_dstmem, cpu);
371 kfree(dst);
372 per_cpu(zswap_dstmem, cpu) = NULL;
373 break;
374 default:
375 break;
376 }
377 return NOTIFY_OK;
378 }
__zswap_pool_create_fallback 决定压缩的格式如629行所示zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT
为zswap使用的pool赋值,默认是zpool.
static __init struct zswap_pool *__zswap_pool_create_fallback(void)
619 {
620 if (!crypto_has_comp(zswap_compressor, 0, 0)) {
621 if (!strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
622 pr_err("default compressor %s not available\n",
623 zswap_compressor);
624 return NULL;
625 }
626 pr_err("compressor %s not available, using default %s\n",
627 zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
628 param_free_charp(&zswap_compressor);
629 zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
630 }
631 if (!zpool_has_pool(zswap_zpool_type)) {
632 if (!strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
633 pr_err("default zpool %s not available\n",
634 zswap_zpool_type);
635 return NULL;
636 }
637 pr_err("zpool %s not available, using default %s\n",
638 zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
639 param_free_charp(&zswap_zpool_type);
640 zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
641 }
642
643 return zswap_pool_create(zswap_zpool_type, zswap_compressor);
644 }
这个模块到这边已经初始化好了,具体要怎么工作呢?zswap是通过frontswap 来loader或者store page的。所以调用frontswap_register_ops(&zswap_frontswap_ops); 来做注册callbac给frontswap来说会用
1175 static struct frontswap_ops zswap_frontswap_ops = {
1176 .store = zswap_frontswap_store,
1177 .load = zswap_frontswap_load,
1178 .invalidate_page = zswap_frontswap_invalidate_page,
1179 .invalidate_area = zswap_frontswap_invalidate_area,
1180 .init = zswap_frontswap_init
1181 };
最重要的是zswap_frontswap_store 用于压缩buffer,zswap_frontswap_load用于解压buffer.
fronswap 使用的例子如下:
int __frontswap_load(struct page *page)
298 {
299 int ret = -1;
300 swp_entry_t entry = { .val = page_private(page), };
301 int type = swp_type(entry);
302 struct swap_info_struct *sis = swap_info[type];
303 pgoff_t offset = swp_offset(entry);
304 struct frontswap_ops *ops;
305
306 if (!frontswap_ops)
307 return -1;
308
309 BUG_ON(!PageLocked(page));
310 BUG_ON(sis == NULL);
311 if (!__frontswap_test(sis, offset))
312 return -1;
313
314 /* Try loading from each implementation, until one succeeds. */
315 for_each_frontswap_ops(ops) {
316 ret = ops->load(type, offset, page);
317 if (!ret) /* successful load */
318 break;
319 }
320 if (ret == 0) {
321 inc_frontswap_loads();
322 if (frontswap_tmem_exclusive_gets_enabled) {
323 SetPageDirty(page);
324 __frontswap_clear(sis, offset);
325 }
326 }
327 return ret;
328 }
329 EXPORT_SYMBOL(__frontswap_load);
最重要的316行调用zswap_frontswap_load用于解压buffer解压buffer.
zswap 可以通过sys文件系统设定max_pool_percent。表示最大可以压缩的memory 比例。
max_pool_percent的默认值是20.
195 static bool zswap_is_full(void) 196 { 197 return totalram_pages * zswap_max_pool_percent / 100 < 198 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 199 }可以看判断zwap最多压缩总的page的20%。 可以在uboot中加上zswap.enable=1 来enable zswap。通过zswap.compressor=lzo 来选择压缩的格式.
zswap的源码在Linux/mm/zswap.c
1238 static int __init init_zswap(void)
1239 {
1240 struct zswap_pool *pool;
1241
1242 zswap_init_started = true;
1243
1244 if (zswap_entry_cache_create()) {
1245 pr_err("entry cache creation failed\n");
1246 goto cache_fail;
1247 }
1248
1249 if (zswap_cpu_dstmem_init()) {
1250 pr_err("dstmem alloc failed\n");
1251 goto dstmem_fail;
1252 }
1253
1254 pool = __zswap_pool_create_fallback();
1255 if (!pool) {
1256 pr_err("pool creation failed\n");
1257 goto pool_fail;
1258 }
1259 pr_info("loaded using pool %s/%s\n", pool->tfm_name,
1260 zpool_get_type(pool->zpool));
1261
1262 list_add(&pool->list, &zswap_pools);
1263
1264 frontswap_register_ops(&zswap_frontswap_ops);
1265 if (zswap_debugfs_init())
1266 pr_warn("debugfs initialization failed\n");
1267 return 0;
1268
1269 pool_fail:
1270 zswap_cpu_dstmem_destroy();
1271 dstmem_fail:
1272 zswap_entry_cache_destroy();
1273 cache_fail:
1274 return -ENOMEM;
1275 }
1276 /* must be late so crypto has time to come up */
1277 late_initcall(init_zswap);
我们可以看到是以late_initcall的方式call init_zswap的。也就是kernel在start_kernel的最后阶段会call init_zswap。
这个函数是zswap最重要的函数,里面有call 了很多的函数。我们一个个看.
1244 if (zswap_entry_cache_create()) {
1245 pr_err("entry cache creation failed\n");
1246 goto cache_fail;
1247 }
只是调用zswap_entry_cache_create 来为zswap_entry_cache通过slab升级一个buffer.
221 static int __init zswap_entry_cache_create(void)
222 {
223 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
224 return zswap_entry_cache == NULL;
225 }
zswap_cpu_dstmem_init() 注册一个通知链,当cpu up的时候为percpu 变量zswap_dstmem 申请一个8K的buffer。当cpu dead的时候释放这个buffer.
源码如下:
353 static DEFINE_PER_CPU(u8 *, zswap_dstmem);
354
355 static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu)
356 {
357 u8 *dst;
358
359 switch (action) {
360 case CPU_UP_PREPARE:
361 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
362 if (!dst) {
363 pr_err("can't allocate compressor buffer\n");
364 return NOTIFY_BAD;
365 }
366 per_cpu(zswap_dstmem, cpu) = dst;
367 break;
368 case CPU_DEAD:
369 case CPU_UP_CANCELED:
370 dst = per_cpu(zswap_dstmem, cpu);
371 kfree(dst);
372 per_cpu(zswap_dstmem, cpu) = NULL;
373 break;
374 default:
375 break;
376 }
377 return NOTIFY_OK;
378 }
__zswap_pool_create_fallback 决定压缩的格式如629行所示zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT
为zswap使用的pool赋值,默认是zpool.
static __init struct zswap_pool *__zswap_pool_create_fallback(void)
619 {
620 if (!crypto_has_comp(zswap_compressor, 0, 0)) {
621 if (!strcmp(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT)) {
622 pr_err("default compressor %s not available\n",
623 zswap_compressor);
624 return NULL;
625 }
626 pr_err("compressor %s not available, using default %s\n",
627 zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT);
628 param_free_charp(&zswap_compressor);
629 zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
630 }
631 if (!zpool_has_pool(zswap_zpool_type)) {
632 if (!strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) {
633 pr_err("default zpool %s not available\n",
634 zswap_zpool_type);
635 return NULL;
636 }
637 pr_err("zpool %s not available, using default %s\n",
638 zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT);
639 param_free_charp(&zswap_zpool_type);
640 zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT;
641 }
642
643 return zswap_pool_create(zswap_zpool_type, zswap_compressor);
644 }
这个模块到这边已经初始化好了,具体要怎么工作呢?zswap是通过frontswap 来loader或者store page的。所以调用frontswap_register_ops(&zswap_frontswap_ops); 来做注册callbac给frontswap来说会用
1175 static struct frontswap_ops zswap_frontswap_ops = {
1176 .store = zswap_frontswap_store,
1177 .load = zswap_frontswap_load,
1178 .invalidate_page = zswap_frontswap_invalidate_page,
1179 .invalidate_area = zswap_frontswap_invalidate_area,
1180 .init = zswap_frontswap_init
1181 };
最重要的是zswap_frontswap_store 用于压缩buffer,zswap_frontswap_load用于解压buffer.
fronswap 使用的例子如下:
int __frontswap_load(struct page *page)
298 {
299 int ret = -1;
300 swp_entry_t entry = { .val = page_private(page), };
301 int type = swp_type(entry);
302 struct swap_info_struct *sis = swap_info[type];
303 pgoff_t offset = swp_offset(entry);
304 struct frontswap_ops *ops;
305
306 if (!frontswap_ops)
307 return -1;
308
309 BUG_ON(!PageLocked(page));
310 BUG_ON(sis == NULL);
311 if (!__frontswap_test(sis, offset))
312 return -1;
313
314 /* Try loading from each implementation, until one succeeds. */
315 for_each_frontswap_ops(ops) {
316 ret = ops->load(type, offset, page);
317 if (!ret) /* successful load */
318 break;
319 }
320 if (ret == 0) {
321 inc_frontswap_loads();
322 if (frontswap_tmem_exclusive_gets_enabled) {
323 SetPageDirty(page);
324 __frontswap_clear(sis, offset);
325 }
326 }
327 return ret;
328 }
329 EXPORT_SYMBOL(__frontswap_load);
最重要的316行调用zswap_frontswap_load用于解压buffer解压buffer.