LZ4算法使用

LZ4下载编译

LZ4 是无损压缩算法,提供每个核 大于 500 MB/s 的压缩速度,可通过多核 CPU 进行扩展。LZ4算法解压速度极快,单核解压速度达到GB/s,通常达到多核系统的 RAM 速度极限。

$ git clone https://github.com/lz4/lz4.git
make 
make install

lz4算法的实现在lib下,lz4.c实现最基本的块格式压缩和解压,lz4hc.c实现块高压缩率的块格式压缩和解压,lz4frame.c实现帧格式压缩和解压,为了方便lz4file.c实现文件压缩解压操作,xxhash.c是LZ4采用的哈希算法。

$ tree lib
lib
├── lz4.c
├── lz4.h
├── lz4file.c
├── lz4file.h
├── lz4frame.c
├── lz4frame.h
├── lz4frame_static.h
├── lz4hc.c
├── lz4hc.h
├── xxhash.c
├── xxhash.h

lz4命令行使用如下:

$ lz4 -h
*** LZ4 command line interface 64-bits v1.9.2, by Yann Collet ***
Usage : 
      lz4 [arg] [input] [output] 

input   : a filename 
          with no FILE, or when FILE is - or stdin, read standard input
Arguments : 
 -1     : Fast compression (default) 
 -9     : High compression 
 -d     : decompression (default for .lz4 extension)
 -z     : force compression 
 -D FILE: use FILE as dictionary 
 -f     : overwrite output without prompting 
 -k     : preserve source files(s)  (default) 
--rm    : remove source file(s) after successful de/compression 
 -h/-H  : display help/long help and exit 

Advanced arguments :
 -V     : display Version number and exit 
 -v     : verbose mode 
 -q     : suppress warnings; specify twice to suppress errors too
 -c     : force write to standard output, even if it is the console
 -t     : test compressed file integrity
 -m     : multiple input files (implies automatic output filenames)
 -r     : operate recursively on directories (sets also -m) 
 -l     : compress using Legacy format (Linux kernel compression)
 -B#    : cut file into blocks of size # bytes [32+] 
                     or predefined block size [4-7] (default: 7) 
 -BI    : Block Independence (default) 
 -BD    : Block dependency (improves compression ratio) 
 -BX    : enable block checksum (default:disabled) 
--no-frame-crc : disable stream checksum (default:enabled) 
--content-size : compressed frame includes original size (default:not present)
--list FILE : lists information about .lz4 files (useful for files compressed with --content-size flag)
--[no-]sparse  : sparse mode (default:enabled on file, disabled on stdout)
--favor-decSpeed: compressed files decompress faster, but are less compressed 
--fast[=#]: switch to ultra fast compression level (default: 1)
--best  : same as -12
Benchmark arguments : 
 -b#    : benchmark file(s), using # compression level (default : 1) 
 -e#    : test all compression levels from -bX to # (default : 1)
 -i#    : minimum evaluation time in seconds (default : 3s) 

使用示例

块格式

LZ4块格式压缩和解压的函数为:LZ4_compress_defaultLZ4_decompress_safe。测试代码如下:

int lz4_block_compress_test(void)
{
    int ret;
	const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";

	// 块block格式
	const char expected_compressed_data[] = {
		0xd2, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x64, 0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00, 
		0x44, 0x6c, 0x69, 0x6c, 0x79, 0x0c, 0x00, 0x34, 0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75, 
		0x63, 0x17, 0x00, 0x50, 0x6f, 0x20, 0x62, 0x6f, 0x62
	};	
	char buf[64] = {0};

	// 压缩
    ret = LZ4_compress_default(msg, buf, strlen(msg), sizeof(buf));
    if (ret <= 0) {
        printf("Failed to compresss data, ret=%d\n", ret);
        return -1;
    }
    if (ret != sizeof(expected_compressed_data) || \
            memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
        printf("Compresss data is not expected\n");
        return -1;
    }
    hexdump("Compress data", buf, ret);
    

    // 解压
	ret = LZ4_decompress_safe(expected_compressed_data, buf, sizeof(expected_compressed_data), sizeof(buf));
    if (ret <= 0) {
        printf("Failed to decompresss data, ret=%d\n", ret);
        return -1;
    }
    if (ret != strlen(msg) || \
            memcmp(buf, msg, strlen(msg)) != 0) {
        printf("Decompresss data is not expected\n");
        return -1;
    }
    printf("Deompress data: %.*s\n", ret, buf);

	return ret;    
}

Linux kernel块格式

由于下一节帧格式解压需要依赖动态内存分配,因此linux kernel的自解压代码也是采用帧格式形式,只是在压缩数据最前面添加了4字节的压缩魔法数0x184C2102和4字节压缩后的数据长度。

$ echo -n "hello david, hello lily, hello tom, hello lucy, hello bob" > hello
# 注意使用-l参数,Linux kernel compression
$ lz4 -l -f -v hello
$ hexdump -C hello.lz4
00000000  02 21 4c 18 29 00 00 00  d2 68 65 6c 6c 6f 20 64  |.!L.)....hello d|
00000010  61 76 69 64 2c 20 0d 00  44 6c 69 6c 79 0c 00 34  |avid, ..Dlily..4|
00000020  74 6f 6d 0b 00 33 6c 75  63 17 00 50 6f 20 62 6f  |tom..3luc..Po bo|
00000030  62                                                |b|
00000031

测试代码如下:

int lz4_block_compress_kernel_format_test(void)
{
    int ret, chunksize;
	const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";

	// 块block格式(linux kernel format)
	const char expected_compressed_data[] = {
        0x02, 0x21, 0x4c, 0x18, 0x29, 0x00, 0x00, 0x00, 0xd2, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x64, 
        0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00, 0x44, 0x6c, 0x69, 0x6c, 0x79, 0x0c, 0x00, 0x34, 
        0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75, 0x63, 0x17, 0x00, 0x50, 0x6f, 0x20, 0x62, 0x6f, 
        0x62
	};	
	char buf[64] = {0};

	// 写入魔法数
    PUT_UINT32_LE(ARCHIVE_MAGICNUMBER, buf, 0);
    // 压缩
    ret = LZ4_compress_default(msg, buf + 8, strlen(msg), sizeof(buf) - 8);
    if (ret <= 0) {
        printf("Failed to compresss data, ret=%d\n", ret);
        return -1;
    }
    // 写入压缩后的数据长度
    chunksize = ret;
    PUT_UINT32_LE(chunksize, buf, 4);
    if (ret + 8 != sizeof(expected_compressed_data) || \
            memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
        printf("Compresss data is not expected\n");
        return -1;
    }
    hexdump("Compress data", buf, ret + 8);

    // 读取魔法数
	chunksize = GET_UINT32_LE(expected_compressed_data, 0);
	if (chunksize != ARCHIVE_MAGICNUMBER) {
		printf("Invalid header\n");
		return -1;
	}
    // 读取压缩后的数据长度
    chunksize = GET_UINT32_LE(expected_compressed_data, 4);
    // 解压
	ret = LZ4_decompress_safe(expected_compressed_data + 8, buf, chunksize, sizeof(buf));
    if (ret <= 0) {
        printf("Failed to decompresss data, ret=%d\n", ret);
        return -1;
    }
    if (ret != strlen(msg) || \
            memcmp(buf, msg, strlen(msg)) != 0) {
        printf("Decompresss data is not expected\n");
        return -1;
    }
    printf("Deompress data: %.*s\n", ret, buf);

	return ret;    
}

帧格式

LZ4帧格式压缩和解压的函数为:LZ4F_compressFrameLZ4F_decompress。测试代码如下:

int lz4_frame_compress_test(void)
{
    int ret;
	const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";

	// 帧frame格式
	const char expected_compressed_data[] = {
		0x04, 0x22, 0x4d, 0x18, 0x64, 0x40, 0xa7, 0x29, 0x00, 0x00, 0x00, 0xd2, 0x68, 0x65, 0x6c, 0x6c, 
        0x6f, 0x20, 0x64, 0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00, 0x44, 0x6c, 0x69, 0x6c, 0x79, 
        0x0c, 0x00, 0x34, 0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75, 0x63, 0x17, 0x00, 0x50, 0x6f, 
        0x20, 0x62, 0x6f, 0x62, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x04, 0x5b, 0x61
	};	
	char buf[64] = {0};

	// 压缩
    LZ4F_preferences_t pref = LZ4F_INIT_PREFERENCES;
    pref.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; //使能C.Checksum
    size_t bound_size = LZ4F_compressFrameBound(strlen(msg), &pref);
    ret = LZ4F_compressFrame(buf, bound_size, msg, strlen(msg), &pref);
    if (LZ4F_isError(ret)) {
        printf("Failed to compresss data, err=%d\n", LZ4F_isError(ret));
        return -1;
    }
    if (ret != sizeof(expected_compressed_data) || \
            memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
        printf("Compresss data is not expected\n");
        return -1;
    }
    hexdump("Compress data", buf, ret);

    // 解压
    LZ4F_dctx* dctx;
    size_t in_len = sizeof(expected_compressed_data);
    size_t out_len = sizeof(buf);    
    ret = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
	if (LZ4F_isError(ret)) {
		printf("Decompress context creation err=%d\n", LZ4F_isError(ret));
		return -1;
	}    
	ret = LZ4F_decompress(dctx, buf, &out_len, expected_compressed_data, &in_len, NULL);
    if (LZ4F_isError(ret)) {
        printf("Failed to decompresss data, err=%d\n", LZ4F_isError(ret));
        goto exit;
    }
    if (memcmp(buf, msg, strlen(msg)) != 0) {
        printf("Decompresss data is not expected\n");
    }
    printf("Deompress data: %.*s\n", (uint32_t)strlen(msg), buf);
exit:
    LZ4F_freeDecompressionContext(dctx);
	return ret;    
}

完整代码

#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "lz4.h"
#include "lz4frame.h"

static void hexdump(const char *name, const uint8_t *buffer, size_t len)
{
    printf("****************%s****************\n", name);
    for (uint16_t i = 0; i < len; i++) {
        printf("%02x ", buffer[i]);
        if ((i + 1) % 16 == 0) {
            printf("\n");
        }
    }
    if (len % 16 ) {
        printf("\n");
    }
}

int lz4_block_compress_test(void)
{
    int ret;
	const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";

	// 块block格式
	const char expected_compressed_data[] = {
		0xd2, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x64, 0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00, 
		0x44, 0x6c, 0x69, 0x6c, 0x79, 0x0c, 0x00, 0x34, 0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75, 
		0x63, 0x17, 0x00, 0x50, 0x6f, 0x20, 0x62, 0x6f, 0x62
	};	
	char buf[64] = {0};

	// 压缩
    ret = LZ4_compress_default(msg, buf, strlen(msg), sizeof(buf));
    if (ret <= 0) {
        printf("Failed to compresss data, ret=%d\n", ret);
        return -1;
    }
    if (ret != sizeof(expected_compressed_data) || \
            memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
        printf("Compresss data is not expected\n");
        return -1;
    }
    hexdump("Compress data", buf, ret);
    

    // 解压
	ret = LZ4_decompress_safe(expected_compressed_data, buf, sizeof(expected_compressed_data), sizeof(buf));
    if (ret <= 0) {
        printf("Failed to decompresss data, ret=%d\n", ret);
        return -1;
    }
    if (ret != strlen(msg) || \
            memcmp(buf, msg, strlen(msg)) != 0) {
        printf("Decompresss data is not expected\n");
        return -1;
    }
    printf("Deompress data: %.*s\n", ret, buf);

	return ret;    
}


#ifndef PUT_UINT32_LE
#define PUT_UINT32_LE( n, data, offset )                        \
{                                                               \
    ( data )[( offset )    ] = (uint8_t) ( (n)       );         \
    ( data )[( offset ) + 1] = (uint8_t) ( (n) >> 8  );         \
    ( data )[( offset ) + 2] = (uint8_t) ( (n) >> 16 );         \
    ( data )[( offset ) + 3] = (uint8_t) ( (n) >> 24 );         \
}
#endif

#ifndef GET_UINT32_LE
#define GET_UINT32_LE( data , offset )                          \
    (                                                           \
          ( (uint32_t) ( data )[( offset )    ] 	  )         \
        | ( (uint32_t) ( data )[( offset ) + 1] << 8  )         \
        | ( (uint32_t) ( data )[( offset ) + 2] << 16 )         \
        | ( (uint32_t) ( data )[( offset ) + 3] << 24 )         \
    )
#endif

#define ARCHIVE_MAGICNUMBER 0x184C2102

int lz4_block_compress_kernel_format_test(void)
{
    int ret, chunksize;
	const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";

	// 块block格式(linux kernel format)
	const char expected_compressed_data[] = {
        0x02, 0x21, 0x4c, 0x18, 0x29, 0x00, 0x00, 0x00, 0xd2, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x64, 
        0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00, 0x44, 0x6c, 0x69, 0x6c, 0x79, 0x0c, 0x00, 0x34, 
        0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75, 0x63, 0x17, 0x00, 0x50, 0x6f, 0x20, 0x62, 0x6f, 
        0x62
	};	
	char buf[64] = {0};

	// 写入魔法数
    PUT_UINT32_LE(ARCHIVE_MAGICNUMBER, buf, 0);
    // 压缩
    ret = LZ4_compress_default(msg, buf + 8, strlen(msg), sizeof(buf) - 8);
    if (ret <= 0) {
        printf("Failed to compresss data, ret=%d\n", ret);
        return -1;
    }
    // 写入压缩后的数据长度
    chunksize = ret;
    PUT_UINT32_LE(chunksize, buf, 4);
    if (ret + 8 != sizeof(expected_compressed_data) || \
            memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
        printf("Compresss data is not expected\n");
        return -1;
    }
    hexdump("Compress data", buf, ret + 8);

    // 读取魔法数
	chunksize = GET_UINT32_LE(expected_compressed_data, 0);
	if (chunksize != ARCHIVE_MAGICNUMBER) {
		printf("Invalid header\n");
		return -1;
	}
    // 读取压缩后的数据长度
    chunksize = GET_UINT32_LE(expected_compressed_data, 4);
    // 解压
	ret = LZ4_decompress_safe(expected_compressed_data + 8, buf, chunksize, sizeof(buf));
    if (ret <= 0) {
        printf("Failed to decompresss data, ret=%d\n", ret);
        return -1;
    }
    if (ret != strlen(msg) || \
            memcmp(buf, msg, strlen(msg)) != 0) {
        printf("Decompresss data is not expected\n");
        return -1;
    }
    printf("Deompress data: %.*s\n", ret, buf);

	return ret;    
}


int lz4_frame_compress_test(void)
{
    int ret;
	const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";

	// 帧frame格式
	const char expected_compressed_data[] = {
		0x04, 0x22, 0x4d, 0x18, 0x64, 0x40, 0xa7, 0x29, 0x00, 0x00, 0x00, 0xd2, 0x68, 0x65, 0x6c, 0x6c, 
        0x6f, 0x20, 0x64, 0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00, 0x44, 0x6c, 0x69, 0x6c, 0x79, 
        0x0c, 0x00, 0x34, 0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75, 0x63, 0x17, 0x00, 0x50, 0x6f, 
        0x20, 0x62, 0x6f, 0x62, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x04, 0x5b, 0x61
	};	
	char buf[64] = {0};

	// 压缩
    LZ4F_preferences_t pref = LZ4F_INIT_PREFERENCES;
    pref.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
    size_t bound_size = LZ4F_compressFrameBound(strlen(msg), &pref);
    ret = LZ4F_compressFrame(buf, bound_size, msg, strlen(msg), &pref);
    if (LZ4F_isError(ret)) {
        printf("Failed to compresss data, err=%d\n", LZ4F_isError(ret));
        return -1;
    }
    if (ret != sizeof(expected_compressed_data) || \
            memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
        printf("Compresss data is not expected\n");
        return -1;
    }
    hexdump("Compress data", buf, ret);

    // 解压
    LZ4F_dctx* dctx;
    size_t in_len = sizeof(expected_compressed_data);
    size_t out_len = sizeof(buf);    
    ret = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
	if (LZ4F_isError(ret)) {
		printf("Decompress context creation err=%d\n", LZ4F_isError(ret));
		return -1;
	}    
	ret = LZ4F_decompress(dctx, buf, &out_len, expected_compressed_data, &in_len, NULL);
    if (LZ4F_isError(ret)) {
        printf("Failed to decompresss data, err=%d\n", LZ4F_isError(ret));
        goto exit;
    }
    if (memcmp(buf, msg, strlen(msg)) != 0) {
        printf("Decompresss data is not expected\n");
    }
    printf("Deompress data: %.*s\n", (uint32_t)strlen(msg), buf);
exit:
    LZ4F_freeDecompressionContext(dctx);
	return ret;    
}

int main(int argc, char* argv[])
{
    // 块格式压缩解压测试
    lz4_block_compress_test();
    // linux kernel形式的块压缩解压测试
    lz4_block_compress_kernel_format_test();
    // 帧格式压缩解压测试
    lz4_frame_compress_test();

    return 0;
}

编译,并运行测试:

$ gcc -g main.c lz4.c lz4frame.c lz4hc.c xxhash.c -o main
$ ./main

各种压缩算法对比

以kernel采用的各种解压算法为例,原始镜像Image大小为6M左右,在开发板上,CPU速率为1GHz,实际测试的数据如下表。

算法压缩率解压时间
lz464.68%28ms
gzip55.73%155ms
lzma44.33%1722ms
xz44.16%1034ms
lzo60.98%48ms

可以看出,lz4算法在压缩率和解压时间综合表现较好。

欢迎关注“安全有理”微信公众号。

安全有理

  • 4
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值