LZ4下载编译
LZ4 是无损压缩算法,提供每个核 大于 500 MB/s 的压缩速度,可通过多核 CPU 进行扩展。LZ4算法解压速度极快,单核解压速度达到GB/s,通常达到多核系统的 RAM 速度极限。
$ git clone https://github.com/lz4/lz4.git
make
make install
lz4算法的实现在lib
下,lz4.c
实现最基本的块格式压缩和解压,lz4hc.c
实现块高压缩率的块格式压缩和解压,lz4frame.c
实现帧格式压缩和解压,为了方便lz4file.c
实现文件压缩解压操作,xxhash.c
是LZ4采用的哈希算法。
$ tree lib
lib
├── lz4.c
├── lz4.h
├── lz4file.c
├── lz4file.h
├── lz4frame.c
├── lz4frame.h
├── lz4frame_static.h
├── lz4hc.c
├── lz4hc.h
├── xxhash.c
├── xxhash.h
lz4命令行使用如下:
$ lz4 -h
*** LZ4 command line interface 64-bits v1.9.2, by Yann Collet ***
Usage :
lz4 [arg] [input] [output]
input : a filename
with no FILE, or when FILE is - or stdin, read standard input
Arguments :
-1 : Fast compression (default)
-9 : High compression
-d : decompression (default for .lz4 extension)
-z : force compression
-D FILE: use FILE as dictionary
-f : overwrite output without prompting
-k : preserve source files(s) (default)
--rm : remove source file(s) after successful de/compression
-h/-H : display help/long help and exit
Advanced arguments :
-V : display Version number and exit
-v : verbose mode
-q : suppress warnings; specify twice to suppress errors too
-c : force write to standard output, even if it is the console
-t : test compressed file integrity
-m : multiple input files (implies automatic output filenames)
-r : operate recursively on directories (sets also -m)
-l : compress using Legacy format (Linux kernel compression)
-B# : cut file into blocks of size # bytes [32+]
or predefined block size [4-7] (default: 7)
-BI : Block Independence (default)
-BD : Block dependency (improves compression ratio)
-BX : enable block checksum (default:disabled)
--no-frame-crc : disable stream checksum (default:enabled)
--content-size : compressed frame includes original size (default:not present)
--list FILE : lists information about .lz4 files (useful for files compressed with --content-size flag)
--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)
--favor-decSpeed: compressed files decompress faster, but are less compressed
--fast[=#]: switch to ultra fast compression level (default: 1)
--best : same as -12
Benchmark arguments :
-b# : benchmark file(s), using # compression level (default : 1)
-e# : test all compression levels from -bX to # (default : 1)
-i# : minimum evaluation time in seconds (default : 3s)
使用示例
块格式
LZ4块格式压缩和解压的函数为:LZ4_compress_default
和LZ4_decompress_safe
。测试代码如下:
int lz4_block_compress_test(void)
{
int ret;
const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";
// 块block格式
const char expected_compressed_data[] = {
0xd2, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x64, 0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00,
0x44, 0x6c, 0x69, 0x6c, 0x79, 0x0c, 0x00, 0x34, 0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75,
0x63, 0x17, 0x00, 0x50, 0x6f, 0x20, 0x62, 0x6f, 0x62
};
char buf[64] = {0};
// 压缩
ret = LZ4_compress_default(msg, buf, strlen(msg), sizeof(buf));
if (ret <= 0) {
printf("Failed to compresss data, ret=%d\n", ret);
return -1;
}
if (ret != sizeof(expected_compressed_data) || \
memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
printf("Compresss data is not expected\n");
return -1;
}
hexdump("Compress data", buf, ret);
// 解压
ret = LZ4_decompress_safe(expected_compressed_data, buf, sizeof(expected_compressed_data), sizeof(buf));
if (ret <= 0) {
printf("Failed to decompresss data, ret=%d\n", ret);
return -1;
}
if (ret != strlen(msg) || \
memcmp(buf, msg, strlen(msg)) != 0) {
printf("Decompresss data is not expected\n");
return -1;
}
printf("Deompress data: %.*s\n", ret, buf);
return ret;
}
Linux kernel块格式
由于下一节帧格式解压需要依赖动态内存分配,因此linux kernel的自解压代码也是采用帧格式形式,只是在压缩数据最前面添加了4字节的压缩魔法数0x184C2102
和4字节压缩后的数据长度。
$ echo -n "hello david, hello lily, hello tom, hello lucy, hello bob" > hello
# 注意使用-l参数,Linux kernel compression
$ lz4 -l -f -v hello
$ hexdump -C hello.lz4
00000000 02 21 4c 18 29 00 00 00 d2 68 65 6c 6c 6f 20 64 |.!L.)....hello d|
00000010 61 76 69 64 2c 20 0d 00 44 6c 69 6c 79 0c 00 34 |avid, ..Dlily..4|
00000020 74 6f 6d 0b 00 33 6c 75 63 17 00 50 6f 20 62 6f |tom..3luc..Po bo|
00000030 62 |b|
00000031
测试代码如下:
int lz4_block_compress_kernel_format_test(void)
{
int ret, chunksize;
const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";
// 块block格式(linux kernel format)
const char expected_compressed_data[] = {
0x02, 0x21, 0x4c, 0x18, 0x29, 0x00, 0x00, 0x00, 0xd2, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x64,
0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00, 0x44, 0x6c, 0x69, 0x6c, 0x79, 0x0c, 0x00, 0x34,
0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75, 0x63, 0x17, 0x00, 0x50, 0x6f, 0x20, 0x62, 0x6f,
0x62
};
char buf[64] = {0};
// 写入魔法数
PUT_UINT32_LE(ARCHIVE_MAGICNUMBER, buf, 0);
// 压缩
ret = LZ4_compress_default(msg, buf + 8, strlen(msg), sizeof(buf) - 8);
if (ret <= 0) {
printf("Failed to compresss data, ret=%d\n", ret);
return -1;
}
// 写入压缩后的数据长度
chunksize = ret;
PUT_UINT32_LE(chunksize, buf, 4);
if (ret + 8 != sizeof(expected_compressed_data) || \
memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
printf("Compresss data is not expected\n");
return -1;
}
hexdump("Compress data", buf, ret + 8);
// 读取魔法数
chunksize = GET_UINT32_LE(expected_compressed_data, 0);
if (chunksize != ARCHIVE_MAGICNUMBER) {
printf("Invalid header\n");
return -1;
}
// 读取压缩后的数据长度
chunksize = GET_UINT32_LE(expected_compressed_data, 4);
// 解压
ret = LZ4_decompress_safe(expected_compressed_data + 8, buf, chunksize, sizeof(buf));
if (ret <= 0) {
printf("Failed to decompresss data, ret=%d\n", ret);
return -1;
}
if (ret != strlen(msg) || \
memcmp(buf, msg, strlen(msg)) != 0) {
printf("Decompresss data is not expected\n");
return -1;
}
printf("Deompress data: %.*s\n", ret, buf);
return ret;
}
帧格式
LZ4帧格式压缩和解压的函数为:LZ4F_compressFrame
和LZ4F_decompress
。测试代码如下:
int lz4_frame_compress_test(void)
{
int ret;
const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";
// 帧frame格式
const char expected_compressed_data[] = {
0x04, 0x22, 0x4d, 0x18, 0x64, 0x40, 0xa7, 0x29, 0x00, 0x00, 0x00, 0xd2, 0x68, 0x65, 0x6c, 0x6c,
0x6f, 0x20, 0x64, 0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00, 0x44, 0x6c, 0x69, 0x6c, 0x79,
0x0c, 0x00, 0x34, 0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75, 0x63, 0x17, 0x00, 0x50, 0x6f,
0x20, 0x62, 0x6f, 0x62, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x04, 0x5b, 0x61
};
char buf[64] = {0};
// 压缩
LZ4F_preferences_t pref = LZ4F_INIT_PREFERENCES;
pref.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; //使能C.Checksum
size_t bound_size = LZ4F_compressFrameBound(strlen(msg), &pref);
ret = LZ4F_compressFrame(buf, bound_size, msg, strlen(msg), &pref);
if (LZ4F_isError(ret)) {
printf("Failed to compresss data, err=%d\n", LZ4F_isError(ret));
return -1;
}
if (ret != sizeof(expected_compressed_data) || \
memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
printf("Compresss data is not expected\n");
return -1;
}
hexdump("Compress data", buf, ret);
// 解压
LZ4F_dctx* dctx;
size_t in_len = sizeof(expected_compressed_data);
size_t out_len = sizeof(buf);
ret = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
if (LZ4F_isError(ret)) {
printf("Decompress context creation err=%d\n", LZ4F_isError(ret));
return -1;
}
ret = LZ4F_decompress(dctx, buf, &out_len, expected_compressed_data, &in_len, NULL);
if (LZ4F_isError(ret)) {
printf("Failed to decompresss data, err=%d\n", LZ4F_isError(ret));
goto exit;
}
if (memcmp(buf, msg, strlen(msg)) != 0) {
printf("Decompresss data is not expected\n");
}
printf("Deompress data: %.*s\n", (uint32_t)strlen(msg), buf);
exit:
LZ4F_freeDecompressionContext(dctx);
return ret;
}
完整代码
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "lz4.h"
#include "lz4frame.h"
static void hexdump(const char *name, const uint8_t *buffer, size_t len)
{
printf("****************%s****************\n", name);
for (uint16_t i = 0; i < len; i++) {
printf("%02x ", buffer[i]);
if ((i + 1) % 16 == 0) {
printf("\n");
}
}
if (len % 16 ) {
printf("\n");
}
}
int lz4_block_compress_test(void)
{
int ret;
const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";
// 块block格式
const char expected_compressed_data[] = {
0xd2, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x64, 0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00,
0x44, 0x6c, 0x69, 0x6c, 0x79, 0x0c, 0x00, 0x34, 0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75,
0x63, 0x17, 0x00, 0x50, 0x6f, 0x20, 0x62, 0x6f, 0x62
};
char buf[64] = {0};
// 压缩
ret = LZ4_compress_default(msg, buf, strlen(msg), sizeof(buf));
if (ret <= 0) {
printf("Failed to compresss data, ret=%d\n", ret);
return -1;
}
if (ret != sizeof(expected_compressed_data) || \
memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
printf("Compresss data is not expected\n");
return -1;
}
hexdump("Compress data", buf, ret);
// 解压
ret = LZ4_decompress_safe(expected_compressed_data, buf, sizeof(expected_compressed_data), sizeof(buf));
if (ret <= 0) {
printf("Failed to decompresss data, ret=%d\n", ret);
return -1;
}
if (ret != strlen(msg) || \
memcmp(buf, msg, strlen(msg)) != 0) {
printf("Decompresss data is not expected\n");
return -1;
}
printf("Deompress data: %.*s\n", ret, buf);
return ret;
}
#ifndef PUT_UINT32_LE
#define PUT_UINT32_LE( n, data, offset ) \
{ \
( data )[( offset ) ] = (uint8_t) ( (n) ); \
( data )[( offset ) + 1] = (uint8_t) ( (n) >> 8 ); \
( data )[( offset ) + 2] = (uint8_t) ( (n) >> 16 ); \
( data )[( offset ) + 3] = (uint8_t) ( (n) >> 24 ); \
}
#endif
#ifndef GET_UINT32_LE
#define GET_UINT32_LE( data , offset ) \
( \
( (uint32_t) ( data )[( offset ) ] ) \
| ( (uint32_t) ( data )[( offset ) + 1] << 8 ) \
| ( (uint32_t) ( data )[( offset ) + 2] << 16 ) \
| ( (uint32_t) ( data )[( offset ) + 3] << 24 ) \
)
#endif
#define ARCHIVE_MAGICNUMBER 0x184C2102
int lz4_block_compress_kernel_format_test(void)
{
int ret, chunksize;
const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";
// 块block格式(linux kernel format)
const char expected_compressed_data[] = {
0x02, 0x21, 0x4c, 0x18, 0x29, 0x00, 0x00, 0x00, 0xd2, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x64,
0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00, 0x44, 0x6c, 0x69, 0x6c, 0x79, 0x0c, 0x00, 0x34,
0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75, 0x63, 0x17, 0x00, 0x50, 0x6f, 0x20, 0x62, 0x6f,
0x62
};
char buf[64] = {0};
// 写入魔法数
PUT_UINT32_LE(ARCHIVE_MAGICNUMBER, buf, 0);
// 压缩
ret = LZ4_compress_default(msg, buf + 8, strlen(msg), sizeof(buf) - 8);
if (ret <= 0) {
printf("Failed to compresss data, ret=%d\n", ret);
return -1;
}
// 写入压缩后的数据长度
chunksize = ret;
PUT_UINT32_LE(chunksize, buf, 4);
if (ret + 8 != sizeof(expected_compressed_data) || \
memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
printf("Compresss data is not expected\n");
return -1;
}
hexdump("Compress data", buf, ret + 8);
// 读取魔法数
chunksize = GET_UINT32_LE(expected_compressed_data, 0);
if (chunksize != ARCHIVE_MAGICNUMBER) {
printf("Invalid header\n");
return -1;
}
// 读取压缩后的数据长度
chunksize = GET_UINT32_LE(expected_compressed_data, 4);
// 解压
ret = LZ4_decompress_safe(expected_compressed_data + 8, buf, chunksize, sizeof(buf));
if (ret <= 0) {
printf("Failed to decompresss data, ret=%d\n", ret);
return -1;
}
if (ret != strlen(msg) || \
memcmp(buf, msg, strlen(msg)) != 0) {
printf("Decompresss data is not expected\n");
return -1;
}
printf("Deompress data: %.*s\n", ret, buf);
return ret;
}
int lz4_frame_compress_test(void)
{
int ret;
const char msg[] = "hello david, hello lily, hello tom, hello lucy, hello bob";
// 帧frame格式
const char expected_compressed_data[] = {
0x04, 0x22, 0x4d, 0x18, 0x64, 0x40, 0xa7, 0x29, 0x00, 0x00, 0x00, 0xd2, 0x68, 0x65, 0x6c, 0x6c,
0x6f, 0x20, 0x64, 0x61, 0x76, 0x69, 0x64, 0x2c, 0x20, 0x0d, 0x00, 0x44, 0x6c, 0x69, 0x6c, 0x79,
0x0c, 0x00, 0x34, 0x74, 0x6f, 0x6d, 0x0b, 0x00, 0x33, 0x6c, 0x75, 0x63, 0x17, 0x00, 0x50, 0x6f,
0x20, 0x62, 0x6f, 0x62, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x04, 0x5b, 0x61
};
char buf[64] = {0};
// 压缩
LZ4F_preferences_t pref = LZ4F_INIT_PREFERENCES;
pref.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
size_t bound_size = LZ4F_compressFrameBound(strlen(msg), &pref);
ret = LZ4F_compressFrame(buf, bound_size, msg, strlen(msg), &pref);
if (LZ4F_isError(ret)) {
printf("Failed to compresss data, err=%d\n", LZ4F_isError(ret));
return -1;
}
if (ret != sizeof(expected_compressed_data) || \
memcmp(buf, expected_compressed_data, sizeof(expected_compressed_data)) != 0) {
printf("Compresss data is not expected\n");
return -1;
}
hexdump("Compress data", buf, ret);
// 解压
LZ4F_dctx* dctx;
size_t in_len = sizeof(expected_compressed_data);
size_t out_len = sizeof(buf);
ret = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION);
if (LZ4F_isError(ret)) {
printf("Decompress context creation err=%d\n", LZ4F_isError(ret));
return -1;
}
ret = LZ4F_decompress(dctx, buf, &out_len, expected_compressed_data, &in_len, NULL);
if (LZ4F_isError(ret)) {
printf("Failed to decompresss data, err=%d\n", LZ4F_isError(ret));
goto exit;
}
if (memcmp(buf, msg, strlen(msg)) != 0) {
printf("Decompresss data is not expected\n");
}
printf("Deompress data: %.*s\n", (uint32_t)strlen(msg), buf);
exit:
LZ4F_freeDecompressionContext(dctx);
return ret;
}
int main(int argc, char* argv[])
{
// 块格式压缩解压测试
lz4_block_compress_test();
// linux kernel形式的块压缩解压测试
lz4_block_compress_kernel_format_test();
// 帧格式压缩解压测试
lz4_frame_compress_test();
return 0;
}
编译,并运行测试:
$ gcc -g main.c lz4.c lz4frame.c lz4hc.c xxhash.c -o main
$ ./main
各种压缩算法对比
以kernel采用的各种解压算法为例,原始镜像Image大小为6M左右,在开发板上,CPU速率为1GHz,实际测试的数据如下表。
算法 | 压缩率 | 解压时间 |
---|---|---|
lz4 | 64.68% | 28ms |
gzip | 55.73% | 155ms |
lzma | 44.33% | 1722ms |
xz | 44.16% | 1034ms |
lzo | 60.98% | 48ms |
可以看出,lz4算法在压缩率和解压时间综合表现较好。