zlib源码分析—compress函数

本篇博客的目的是分析compress.c中的函数,主要目的是学习deflate函数的使用。先从compress函数说起,compress函数其实就是以压缩级别Z_DEFAULT_COMPRESSION调用函数compress2。从zlib.h中第190行的宏定义可以看出zlib支持的压缩级别分为Z_NO_COMPRESSION(不压缩模式)、Z_BEST_SPEED(最快速度压缩模式)、Z_BEST_COMPRESSION(最高压缩率压缩模式)、Z_DEFAULT_COMPRESSION(默认压缩模式)。

int ZEXPORT compress (Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen)
{
    return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
}

// zlib.h中第190行
#define Z_NO_COMPRESSION         0
#define Z_BEST_SPEED             1
#define Z_BEST_COMPRESSION       9
#define Z_DEFAULT_COMPRESSION  (-1)
/* compression levels */

compress2函数将source缓冲区中的数据压缩存储到dest缓冲区,sourceLen是源缓冲区的长度(以字节为单位),destLen是目的缓冲区的大小(注意这里是指针,也就是调用时需要使用地址输入),需要比sourceLen大至少0.1%加上12字节,以应对压缩最坏情况的发生。level参数和deflateInit函数的level参数相同,用于表示压缩级别。
如果函数调用成功,返回Z_OK;如果没有足够内存,返回Z_MEM_ERROR;如果输出缓冲区中没有足够空间,返回Z_BUF_ERROR;如果level参数非法,返回Z_STREAM_ERROR。

int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
    Bytef *dest;
    uLongf *destLen;
    const Bytef *source;
    uLong sourceLen;
    int level;
{
    z_stream stream;
    int err;
    const uInt max = (uInt)-1;
    uLong left;
    left = *destLen;  // 保存输入参数的目的缓冲的大小(用户提供的目的缓冲大小,)
    *destLen = 0;     // 清零,用于传输最终压缩后数据占用缓冲大小大小

    stream.zalloc = (alloc_func)0;  // zalloc、zfree可自定义内存分配释放函数
    stream.zfree = (free_func)0;    // 如果不使用自定义函数(赋值为0),则在deflateInit函数初始化过程中使用默认
    stream.opaque = (voidpf)0;      // 传递给zalloc、zfree的私有数据  
    

    err = deflateInit(&stream, level);
    if (err != Z_OK) return err;

    stream.next_out = dest;
    stream.avail_out = 0;
    stream.next_in = (z_const Bytef *)source;
    stream.avail_in = 0;

    do {
        if (stream.avail_out == 0) {  // 上次迭代所给定next_out缓冲区使用完毕,再次指定可获得的缓冲区
            stream.avail_out = left > (uLong)max ? max : (uInt)left;
            left -= stream.avail_out;
        }
        if (stream.avail_in == 0) {
            stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen;
            sourceLen -= stream.avail_in;
        }
        err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
    } while (err == Z_OK);

    *destLen = stream.total_out;
    deflateEnd(&stream);
    return err == Z_STREAM_END ? Z_OK : err;
}

以一个不恰当的例子来形容这段程序的逻辑,next_in就像一罐奶粉,next_out就像马桶。stream.avail_in==0这段的逻辑代表奶瓶是否为空,整个do…while循环形象地说就像一只四脚吞金兽(泛指幼儿),在每次迭代(喂食时)不能一下子就把一罐奶粉喂给小朋友,而是一瓶一瓶地泡着喂,avail_out同样的道理,是指示拉了多少。当deflate宝宝吃完(进行完一次迭代),并且吃完一瓶(stream.avail_in==0),就要冲奶粉(stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen),奶粉罐里面会少相应奶粉(sourceLen -= stream.avail_in)。同样道理,stream.avail_out就不用举例说明了。这里的sourceLen > (uLong)max ? max : (uInt)sourceLen就是防止宝宝奶粉冲太厚了。而stream中的total_in和total_out则记录了宝宝吃了多少奶粉以及拉了多少臭臭。

下面分析一下deflateInit函数,deflateInit其实是一个宏定义。实际上就是带有ZLIB_VERSION,sizeof(z_stream)实参的deflateInit_函数。
在这里插入图片描述
从下面代码的调用可以看出deflateInit_函数可以看出其调用了deflateInit2_函数,传入参数Z_DEFLATED(deflate压缩方法,目前只支持这种方式),MAX_WBITS(#define MAX_WBITS 15),DEF_MEM_LEVEL(#define DEF_MEM_LEVEL 8),Z_DEFAULT_STRATEGY(#define Z_DEFAULT_STRATEGY 0 压缩策略)。

int ZEXPORT deflateInit_(z_streamp strm, int level, const char *version, int stream_size)
{
    return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, version, stream_size);
    /* To do: ignore strm->next_in if we use it as window */
}
#define Z_DEFLATED   8
/* The deflate compression method (the only one supported in this version) */

#define Z_FILTERED            1
#define Z_HUFFMAN_ONLY        2
#define Z_RLE                 3
#define Z_FIXED               4
#define Z_DEFAULT_STRATEGY    0

MAX_WBITS和LZ77窗口大小有关,比如LZ77窗口大小配为32K( 2 1 5 2^15 215),则MAX_WBITS为15。

int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, int windowBits, int memLevel, int strategy, const char *version, int stream_size)
{
    deflate_state *s;
    int wrap = 1;
    static const char my_version[] = ZLIB_VERSION;

    ushf *overlay;
    /* We overlay pending_buf and d_buf+l_buf. This works since the average
     * output size for (length,distance) codes is <= 24 bits.
     */

    if (version == Z_NULL || version[0] != my_version[0] ||
        stream_size != sizeof(z_stream)) {
        return Z_VERSION_ERROR;
    }
    if (strm == Z_NULL) return Z_STREAM_ERROR;

    strm->msg = Z_NULL;
    //未自定义内存分配释放函数,使用默认函数
    if (strm->zalloc == (alloc_func)0) {
        strm->zalloc = zcalloc;
        strm->opaque = (voidpf)0;
    }
    if (strm->zfree == (free_func)0)
        strm->zfree = zcfree;

#ifdef FASTEST
    if (level != 0) level = 1;
#else
    if (level == Z_DEFAULT_COMPRESSION) level = 6;
#endif

    if (windowBits < 0) { /* suppress zlib wrapper */
        wrap = 0;
        windowBits = -windowBits;
    }
#ifdef GZIP
    else if (windowBits > 15) {
        wrap = 2;       /* write gzip wrapper instead */
        windowBits -= 16;
    }
#endif
    if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
        windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
        strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) {
        return Z_STREAM_ERROR;
    }
    if (windowBits == 8) windowBits = 9;  /* until 256-byte window bug fixed */
    
    s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
    if (s == Z_NULL) return Z_MEM_ERROR;
    strm->state = (struct internal_state FAR *)s;
    s->strm = strm;
    s->status = INIT_STATE;     /* to pass state test in deflateReset() */

    s->wrap = wrap;
    s->gzhead = Z_NULL;
    s->w_bits = (uInt)windowBits;
    s->w_size = 1 << s->w_bits;
    s->w_mask = s->w_size - 1;

    s->hash_bits = (uInt)memLevel + 7;
    s->hash_size = 1 << s->hash_bits;
    s->hash_mask = s->hash_size - 1;
    s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);

    s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
    s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
    s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));

    s->high_water = 0;      /* nothing written to s->window yet */

    s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
    overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
    s->pending_buf = (uchf *) overlay;
    s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);

    if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
        s->pending_buf == Z_NULL) {
        s->status = FINISH_STATE;
        strm->msg = ERR_MSG(Z_MEM_ERROR);
        deflateEnd (strm);
        return Z_MEM_ERROR;
    }
    s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
    s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;

    s->level = level;
    s->strategy = strategy;
    s->method = (Byte)method;

    return deflateReset(strm);
}

下面先看看对压缩级别level的处理,分两种情况:
定义了FASTEST

deflateInit/deflateInit_/deflateInit2_函数输入leveldeflate_state结构体中的level
Z_NO_COMPRESSION(0)0
Z_BEST_SPEED(1)1
21
31
41
51
61
71
81
Z_BEST_COMPRESSION(9)1
Z_DEFAULT_COMPRESSION(-1)1

未定义了FASTEST

deflateInit/deflateInit_/deflateInit2_函数输入leveldeflate_state结构体中的level
Z_NO_COMPRESSION(0)0
Z_BEST_SPEED(1)1
22
33
44
55
66
77
88
Z_BEST_COMPRESSION(9)
Z_DEFAULT_COMPRESSION(-1)6

现在看看对windowBits和wrap的处理:未定义GZIP时,windowBits处于闭区间[8, 15]时,wrap等于1;windowBits处于闭区间[-15, -8]时,wrap等于0,windowBits变为相反数,处于闭区间[8, 15],但是要剔除windowBits=8,且wrap=0的情况,也就是windowBits的相反数只能处于[9, 15]之间。

    if (windowBits < 0) { /* suppress zlib wrapper */
        wrap = 0;
        windowBits = -windowBits;
    }
    if (... || (windowBits == 8 && wrap != 1)) {
        return Z_STREAM_ERROR;
    }

当定义了GZIP时(gzip模式),zlib wrapper处理逻辑相同。但是当windowBits大于15时,wrap=2,windowBits需要减去16。这里注意windowBits不能等于24时,因为下面判断的逻辑不成立了。

输入windowBitswindowBitswrap模式
[8, 15][8, 15]1ZLIB
[-15, -9][15, 9]0ZLIB
x >15且x!=24x-162GZIP

注意:由于256字节的滑动窗的实现有bug,所以windowBits等于8时需要修改为9。
比如LZ77窗口大小是32K( 2 15 2^{15} 215),w_bits是则是15,w_size就是1<<15,w_mask就是二进制111 1111 1111 1111。从下面代码中可以看出LZ77窗口申请的大小实际是64KB。

    s->w_bits = (uInt)windowBits;
    s->w_size = 1 << s->w_bits;
    s->w_mask = s->w_size - 1;
    s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));  //注意这里是2倍的字节

和LZ77的hash链相关,后面再详细讲解,这里只需要知道memLevel的大小在区间[1, 9]中。

    s->hash_bits = (uInt)memLevel + 7;
    s->hash_size = 1 << s->hash_bits;
    s->hash_mask = s->hash_size - 1;
    s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
    s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
    s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));

按照上述的调用memLevel被设置为了8,memLevel加6也就是14,也就是16K个元素。这种情况下,lit_bufsize等于16K。如果这里ush类型的大小为2,这里申请的内存大小就是s->lit_bufsize * (sizeof(ush)+2L)=16K*4=64K,也就是pending_buf的大小。d_buf和l_buf就借用pending_buf的内存,d_buf就从s->lit_bufsize/sizeof(ush)=16K/2=8K的位置开始,而l_buf就从(1+sizeof(ush))*s->lit_bufsize=(1+2)*16K=48K的地方开始。

ushf *overlay;
s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
s->pending_buf = (uchf *) overlay;
s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;

下面来看deflateReset函数,他调用了deflateResetKeep函数和lm_init函数。先从deflateResetKeep看起,其最重要的操作就是修改status的状态,在调用该函数前,state为INIT_STATE(s->status = INIT_STATE)。如果未定义GZIP,当wrap不为0时,state就是INIT_STATE,否则切换为BUSY_STATE。如果定义了GZIP,则当warp等于2时,state为GZIP_STATE。同样根据wrap,初始化adler纠错码用什么算法。

wrap模式经过deflateResetKeep后status的状态经过deflateResetKeep后adler的状态
1ZLIBINIT_STATEadler32(0L, Z_NULL, 0)
0ZLIBBUSY_STATEadler32(0L, Z_NULL, 0)
2GZIPGZIP_STATEcrc32(0L, Z_NULL, 0)
int ZEXPORT deflateReset (z_streamp strm)
{
    int ret;
    ret = deflateResetKeep(strm);
    if (ret == Z_OK)  lm_init(strm->state);
    return ret;
}

int ZEXPORT deflateResetKeep (z_streamp strm)
{
    deflate_state *s;
    if (deflateStateCheck(strm)) {
        return Z_STREAM_ERROR;
    }
    strm->total_in = strm->total_out = 0;
    strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
    strm->data_type = Z_UNKNOWN;
    s = (deflate_state *)strm->state;
    // 初始化pending_buf的指示标志
    s->pending = 0;
    s->pending_out = s->pending_buf;

    if (s->wrap < 0) {  // 对于特殊情况,即deflate(..., Z_FINISH)会将wrap改为负数,需要修改为相反数
        s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
    }

	    s->status =
#ifdef GZIP
        s->wrap == 2 ? GZIP_STATE :
#endif
        s->wrap ? INIT_STATE : BUSY_STATE;
        
       strm->adler =
#ifdef GZIP
        s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
#endif
        adler32(0L, Z_NULL, 0);     
  s->last_flush = Z_NO_FLUSH;
  _tr_init(s);  
  return Z_OK;
}  

下面接着分析_tr_init函数,_tr_init用于初始化树数据结构,tr_static_init用于初始化静态树,后续分析。s->bi_buf用于缓冲比特数据最终刷写到pending_buf中。bi_valid只的是bi_buf中有效比特数据的数量。后面就是将动态哈夫曼树结构连接到l_desc的dyn_tree中,将静态哈夫曼树结构连接到l_desc的stat_tree中。同样针对其他数据的哈夫曼树。

void _tr_init(deflate_state *s)
{
    tr_static_init();
    s->l_desc.dyn_tree = s->dyn_ltree;
    s->l_desc.stat_desc = &static_l_desc;
    s->d_desc.dyn_tree = s->dyn_dtree;
    s->d_desc.stat_desc = &static_d_desc;
    s->bl_desc.dyn_tree = s->bl_tree;
    s->bl_desc.stat_desc = &static_bl_desc;
    
    s->bi_buf = 0;
    s->bi_valid = 0;
    /* Initialize the first block of the first file: */
    init_block(s);
}

init_block函数,将初始化字符长度哈夫曼树、距离树和比特长度树,并将字符长度树第END_BLOCK(256)个元素的频次置为1。最后将opt_len、static_len、last_lit和matches清零。

/* ===========================================================================
 * Initialize a new block.
 */
local void init_block(deflate_state *s)
{
    int n; /* iterates over tree elements */
    /* Initialize the trees. */
    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
    s->dyn_ltree[END_BLOCK].Freq = 1;
    s->opt_len = s->static_len = 0L;
    s->last_lit = s->matches = 0;
}

tr_static_init函数,后续分析静态哈夫曼树时分析

/* ===========================================================================
 * Initialize the various 'constant' tables.
 */
local void tr_static_init()
{
#if defined(GEN_TREES_H) || !defined(STDC)
    static int static_init_done = 0;
    int n;        /* iterates over tree elements */
    int bits;     /* bit counter */
    int length;   /* length value */
    int code;     /* code value */
    int dist;     /* distance index */
    ush bl_count[MAX_BITS+1];
    /* number of codes at each bit length for an optimal tree */

    if (static_init_done) return;

    /* For some embedded targets, global variables are not initialized: */
#ifdef NO_INIT_GLOBAL_POINTERS
    static_l_desc.static_tree = static_ltree;
    static_l_desc.extra_bits = extra_lbits;
    static_d_desc.static_tree = static_dtree;
    static_d_desc.extra_bits = extra_dbits;
    static_bl_desc.extra_bits = extra_blbits;
#endif

    /* Initialize the mapping length (0..255) -> length code (0..28) */
    length = 0;
    for (code = 0; code < LENGTH_CODES-1; code++) {
        base_length[code] = length;
        for (n = 0; n < (1<<extra_lbits[code]); n++) {
            _length_code[length++] = (uch)code;
        }
    }
    Assert (length == 256, "tr_static_init: length != 256");
    /* Note that the length 255 (match length 258) can be represented
     * in two different ways: code 284 + 5 bits or code 285, so we
     * overwrite length_code[255] to use the best encoding:
     */
    _length_code[length-1] = (uch)code;

    /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
    dist = 0;
    for (code = 0 ; code < 16; code++) {
        base_dist[code] = dist;
        for (n = 0; n < (1<<extra_dbits[code]); n++) {
            _dist_code[dist++] = (uch)code;
        }
    }
    Assert (dist == 256, "tr_static_init: dist != 256");
    dist >>= 7; /* from now on, all distances are divided by 128 */
    for ( ; code < D_CODES; code++) {
        base_dist[code] = dist << 7;
        for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
            _dist_code[256 + dist++] = (uch)code;
        }
    }
    Assert (dist == 256, "tr_static_init: 256+dist != 512");

    /* Construct the codes of the static literal tree */
    for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
    n = 0;
    while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
    while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
    while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
    while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
    /* Codes 286 and 287 do not exist, but we must include them in the
     * tree construction to get a canonical Huffman tree (longest code
     * all ones)
     */
    gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);

    /* The static distance tree is trivial: */
    for (n = 0; n < D_CODES; n++) {
        static_dtree[n].Len = 5;
        static_dtree[n].Code = bi_reverse((unsigned)n, 5);
    }
    static_init_done = 1;

#  ifdef GEN_TREES_H
    gen_trees_header();
#  endif
#endif /* defined(GEN_TREES_H) || !defined(STDC) */
}

分析lm_init函数函数,主要逻辑是通过配置表配置longest match算法。首先初始化window_size为64K,然后清除Hash链,配置max_lazy、good_length、nice_length和max_chain参数。这里s->match_length和s->prev_length初始化为2。

/* ===========================================================================
 * Initialize the "longest match" routines for a new zlib stream
 */
local void lm_init (deflate_state *s)
{
    s->window_size = (ulg)2L*s->w_size;

    CLEAR_HASH(s);

    /* Set the default configuration parameters:
     */
    s->max_lazy_match   = configuration_table[s->level].max_lazy;
    s->good_match       = configuration_table[s->level].good_length;
    s->nice_match       = configuration_table[s->level].nice_length;
    s->max_chain_length = configuration_table[s->level].max_chain;

    s->strstart = 0;
    s->block_start = 0L;
    s->lookahead = 0;
    s->insert = 0;
    s->match_length = s->prev_length = MIN_MATCH-1;
    s->match_available = 0;
    s->ins_h = 0;
#ifndef FASTEST
#ifdef ASMV
    match_init(); /* initialize the asm code */
#endif
#endif
}
local const config configuration_table[10] = {
/*      good lazy nice chain */
/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
/* 1 */ {4,    4,  8,    4, deflate_fast}, /* max speed, no lazy matches */
/* 2 */ {4,    5, 16,    8, deflate_fast},
/* 3 */ {4,    6, 32,   32, deflate_fast},
/* 4 */ {4,    4, 16,   16, deflate_slow},  /* lazy matches */
/* 5 */ {8,   16, 32,   32, deflate_slow},
/* 6 */ {8,   16, 128, 128, deflate_slow},
/* 7 */ {8,   32, 128, 256, deflate_slow},
/* 8 */ {32, 128, 258, 1024, deflate_slow},
/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */

最后列一下status成员的取值和last_flush成员的取值

/* Stream status */
#define INIT_STATE    42    /* zlib header -> BUSY_STATE */
#ifdef GZIP
#  define GZIP_STATE  57    /* gzip header -> BUSY_STATE | EXTRA_STATE */
#endif
#define EXTRA_STATE   69    /* gzip extra block -> NAME_STATE */
#define NAME_STATE    73    /* gzip file name -> COMMENT_STATE */
#define COMMENT_STATE 91    /* gzip comment -> HCRC_STATE */
#define HCRC_STATE   103    /* gzip header CRC -> BUSY_STATE */
#define BUSY_STATE   113    /* deflate -> FINISH_STATE */
#define FINISH_STATE 666    /* stream complete */
/* Allowed flush values; see deflate() and inflate() below for details */
#define Z_NO_FLUSH      0
#define Z_PARTIAL_FLUSH 1
#define Z_SYNC_FLUSH    2
#define Z_FULL_FLUSH    3
#define Z_FINISH        4
#define Z_BLOCK         5
#define Z_TREES         6

上述代码所涉及的数据结构之间的关系如下图所示:
在这里插入图片描述

©️2020 CSDN 皮肤主题: 酷酷鲨 设计师:CSDN官方博客 返回首页