23、LZ77压缩和解压

1、LZ77是基于字典的算法,和霍夫曼编码不同,其处理的符号不一定是文本字符,可以是任何大小的符号。
2、LZ77使用前向缓冲区(待编码区的小段)和一个滑动窗口(搜索区)实现。滑动窗口是个历史缓冲器,它被用来存放输入流的前n个字节的有关信息。前向缓冲区是与动态窗口相对应的,它被用来存放输入流的前n个字节。常用滑动窗口4KB,前向缓冲区32B
   算法主要思想就是在前向缓冲区中不断寻找能够与字典中短语匹配的最长短语。如果匹 配的数据长度大于最小匹配长度,那么就输出一对〈长度,距离滑动窗中对应的位置〉数组。长度(length)是匹配的数据长度,而距离(distance)说明了在输入流中向后多少字节这个匹配数据可以被找到。
   LZ77算法中代价最高的是滑动窗口中扫描匹配短语。一个更高效的方法是用某种高效搜索性能的数据结构代替滑动窗口。
   LZ77比霍夫曼编码有更好的压缩比,但是压缩过程中LZ77要消耗相当长的时间。

   如果前向缓冲区包含A、B、D那么缓冲区就包含了短语{(A),(A,B)(A,B,D)}
   如果滑动窗口中包含A、B、C那么窗口和字典中短语为{(A),(A,B)(A,B,C),(B),(B,C),(C)}

3、基本流程
(1) 、从当前压缩位置开始,考察未编码的数据,并试图在滑动窗口中找出最长的匹配字符串,如果找到,则进行步骤 2 ,否则进行步骤 3.
(2) 、输出三元符号组( off,len,c )。其中 off 为窗口中匹配字符串相对窗口边界的偏移,len为可匹配的长度,c 为下一个字符,即不匹配的第一个字符。然后将窗口向后滑动 len+1 个字符,继续步骤 1.
(3) 、输出三元符号组( 0,0,c )。其中 c 为下一个字符。然后将窗口向后滑动一个字符,继续步骤 1.

这里写图片描述

3、最长字符串匹配

//从window中匹配buffer中最长字符串;offset返回window中匹配首位置;next返回buffer字符串后不匹配第一个字符位置 
//返回匹配最长字符串的长度 
static int compare_win(const unsigned char *window, const unsigned char *buffer, int *offset, unsigned char *next)
{
    int match, longest, i, j, k;
    *offset = 0;
    longest = 0;
    *next = buffer[0];

    //最外面循环在window中第1个字符-第n个字符,第2个-第n个一个....., 第n-1个到第n个 
    for(k = 0; k < LZ77_WINDOW_SIZE; k++)
    {
        i = k;
        j = 0;
        match = 0;

        //在最外层循环的一个中找buffer能匹配的最长字符串 (从buffer第一个符号开始) 
        while(i < LZ77_WINDOW_SIZE && j < LZ77_BUFFER_SIZE - 1)
        {
            if(window[i] != buffer[j])
                break;

            //match统计目前匹配的长度 
            match++;
            i++;
            j++;
        } 

        //保存返回信息 
        if(match > longest)
        {
            *offset = k;
            longest = match;
            *next = buffer[j]
        }
    }
    return longest; 
}

4、LZ77压缩

int lz77_compress(const unsigned char *original, unsigned char **compressed, int size)
{
    unsigned char window[LZ77_WINDOW_SIZE], buffer[LZ77_BUFFER_SIZE], *comp, *temp, next;
    int offset, length, remaining, hsize, ipos, opos, tpos, i;
    int token, tbits;

    //初始化 
    *compressed = NULL;
    memset(window, 0, LZ77_WINDOW_SIZE);
    memset(buffer, 0, LZ77_BUFFER_SIZE);

    //向头信息中写入源数据字节数 
    hsize = sizeof(int);
    comp = (unsigned char *)malloc(hsize);
    memcpy(comp, &size, sizeof(int)); 

    ipos = 0;//ipos指向源数据中正在处理的字节
    //从源数据中取数据到缓冲区中 
    for(i = 0; i < LZ77_BUFFER_SIZE && ipos < size; i++)
    {
        buffer[i] = original[ipos];
        ipos++;
    } 

    opos = hsize * 8;//opos是压缩数据bit的位置 
    remaining = size;

    while(remaining > 0)
    {
        //标记 = type + offset(在window中) + length + next 
        //next就是不匹配的字符  
        //tbit表示生成标记长度 
        if((length = compare_win(window, buffer, &offset, &next)) != 0)
        {
            //能找到type为1 
            token = 0x0000_0001 << (LZ77_PHRASE_BITS - 1);
            token = token | (offset << LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS);
            token = token | (length << LZ77_PHRASE_BITS - LZ77_TYPE_BITS - LZ77_WINOFF_BITS - LZ77_BUFLEN_BITS);
            token = token | next;

            tbits =  LZ77_PHRASE_BITS;
        }
        else
        {
            //没找到 ,标记就是原符号 
            token = 0x0000_0000;
            token = token | next;

            tbits = LZ77_SYMBOL_BITS;
        } 

        //s数据处理为大端模式 
        token = htonl(token);

        //往压缩区填数据
        for(i = 0; i < tbits; i++)
        {
            if(opos % 8 == 0)
            {
                temp = (unsigned char *)realloc(comp, (opos / 8) + 1);
                comp = temp;
            }

            //根据长度tbits取一位一位压缩
            tpos = (sizeof(unsigned long) * 8) - tbits + i; 
            bit_set(comp, opos, bit_get((unsigned char *)&token, tpos));
        } 

        length++;//length是匹配数据字节长度

        //左移更新window把buffer中以编码的字符移到window 
        memmove(&window[0], &window[length], LZ77_WINDOW_SIZE - length); 
        memmove(&window[LZ77_WINDOW_SIZE - length], &buffer[0], length);

        //更新buffer中内容,做移除已经编码的字符,从源数据中调入新字符 
        memmove(&buffer[0], &buffer[length], LZ77_BUFFER_SIZE - length);
        for(i = LZ77_BUFFER_SIZE - length; (i < LZ77_BUFFER_SIZE) &&(ipos < size); i++)
        {
            buffer[i] = original[ipos];
            ipos++;
        }
        remaining = remaining - length;

    } 

    *compressed = comp;
    return ((opos - 1) / 8) + 1; 
}

5、LZ77解压
这里写图片描述

int lz77_uncompress(const unsigned char *compressed, unsigned char **original)
{
    unsigned char window[LZ77_WINDOW_SIZE], buffer[LZ77_BUFFER_SIZE];
    unsigned char *orig, *temp, next;
    int offset, length, remaining, hsize, size, ipos, opos, tpos, state, i;

    *original = orig = NULL;

    //从压缩数据头信息中读出源数据字节数 
    hize = sizeof(int);
    memcpy(&size, compressed, sizeof(int));

    memset(window, 0, LZ77_WINDOW_SIZE);
    memset(buffer, 0, LZ77_BUFFER_SIZE);

    ipos = hsize * 8;
    opos = 0;
    remaining = size;

    while(remaining > 0)
    {
        //先读出type,看是否为window中字符 
        state = bit_get(compressed, ipos);
        ipos++;

        if(state == 1)
        {
            //读出offset到offset中 
            memset(&offset, 0, sizeof(int));    
            for(i = 0; i < LZ77_WINOFF_BITS; i++)
            {
                tpos = (sizeof(int) * 8) - LZ77_WINOFF_BITS + i;
                bit_set((unsigned char *)&offset, tpos, bit_get(compressed, ipos));
                ipos++;
            }

            //读出length到length中 
            memset(&length, 0, sizeof(int));    
            for(i = 0; i < LZ77_BUFLEN_BITS; i++)
            {
                tpos = (sizeof(int) * 8) - LZ77_BUFLEN_BITS + i;
                bit_set((unsigned char *)&offset, tpos, bit_get(compressed, ipos));
                ipos++;
            }

            //读出next
            next = 0x00; 
            for(i = 0; i < LZ77_NEXT_BITS; i++)
            {
                tpos = (sizeof(int) * 8) - LZ77_NEXT_BITS + i;
                bit_set((unsigned char *)&next, tpos, bit_get(compressed, ipos));
                ipos++;
            }

            offset = ntohl(offset);
            length = ntohl(length);
            i = 0;

            //解压时ipos是压缩数据对应的bit位;
            //opos是解压后数据的第几字节 
            //为这个字串申请空间 
            if(opos > 0)
            {
                temp = (unsigned char *)realloc(orig, opos + length + 1);
                orig = temp;
            }
            else
                orig = (unsigned char *)malloc(length + 1);


            //标记 = type + offset(在window中) + length + next 
            //解码 offset(在window中) + length对应字串
            while(i < length && remaining > 0)
            {
                orig[opos] = window[offset + i];
                opos++;
                buffer[i] = window[offset + i];
                i++;

                remaining--;
            } 

            //存入next 
            if(remaining > 0)
            {
                orig[opos] = next;
                opos++;

                buffer[i] = next;
                remaining--; 
            }
            length--;
        } 

        //是源字符 
        else
        {
            next =0x00;

            //读出源字符给next 
            for(i = 0; i < LZ77_NEXT_BITS; i++)
            {
                tpos = (sizeof(unsigned char) * 8) - LZ77_NEXT_BITS + i;
                bit_set((unsigned char *)&next, tpos, tpos, bit_get(compressed, ipos));
                ipos++;
            } 

            if(opos > 0)
            {
                temp = (unsigned char *)realloc(orig, opos + 1);
                orig = temp;
            }
            else
                orig = (unsigned char *)malloc(1);

            orig[opos] = next;
            opos++;

            if(remaining > 0)
                buffer[0] = next;
            remaining--;
            length = 1;
        }

        //根据buffer更新window,这里的buffer为每次读到的数据,第一次更新一次
        memmove(&window[0], &window[length], LZ77_WINDOW_SIZE - length);
        memmove(&window[LZ77_WINDOW_SIZE - length], &buffer[0], length); 
    } 
    *original = orig;
    return opos;
}
  • 12
    点赞
  • 53
    收藏
    觉得还不错? 一键收藏
  • 8
    评论
以下是 C 语言实现 LZ77 压缩与解的简单示例代码,仅供参考: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_WINDOW_SIZE 4096 #define MAX_LOOKAHEAD_SIZE 18 #define MIN_MATCH_LENGTH 3 typedef struct { int position; int length; char next; } Match; void compress(unsigned char* input, int input_size, unsigned char* output, int* output_size) { int i, j, k, pos, len, lookahead_size, window_size; unsigned char* window, *lookahead; Match match, best_match; window_size = MAX_WINDOW_SIZE; lookahead_size = MAX_LOOKAHEAD_SIZE; window = (unsigned char*)malloc(window_size); lookahead = (unsigned char*)malloc(lookahead_size); memset(window, ' ', window_size); memcpy(lookahead, input, lookahead_size); *output_size = 0; pos = 0; while (pos < input_size) { best_match.position = 0; best_match.length = 0; for (i = 0; i < lookahead_size; i++) { match.position = 0; match.length = 0; for (j = 0, k = i; j < window_size && k < lookahead_size; j++, k++) { if (window[j] != lookahead[k]) { break; } match.length++; match.position = j; } if (match.length > best_match.length) { best_match = match; } } if (best_match.length >= MIN_MATCH_LENGTH) { output[(*output_size)++] = best_match.position; output[(*output_size)++] = (best_match.position >> 4) | ((best_match.length - MIN_MATCH_LENGTH) << 4); lookahead_size = best_match.length; } else { output[(*output_size)++] = lookahead[0]; lookahead_size = 1; } memmove(window, window + lookahead_size, window_size - lookahead_size); memcpy(window + window_size - lookahead_size, lookahead, lookahead_size); memmove(lookahead, lookahead + lookahead_size, MAX_LOOKAHEAD_SIZE - lookahead_size); if (pos + MAX_LOOKAHEAD_SIZE < input_size) { memcpy(lookahead + lookahead_size, input + pos + lookahead_size, MAX_LOOKAHEAD_SIZE - lookahead_size); } else { memcpy(lookahead + lookahead_size, input + pos + lookahead_size, input_size - pos - lookahead_size); lookahead_size = input_size - pos - lookahead_size; } pos += lookahead_size; } free(window); free(lookahead); } void decompress(unsigned char* input, int input_size, unsigned char* output, int* output_size) { int i, j, pos, len; unsigned char* window; Match match; window = (unsigned char*)malloc(MAX_WINDOW_SIZE); memset(window, ' ', MAX_WINDOW_SIZE); *output_size = 0; pos = 0; while (pos < input_size) { if (input[pos] == 0 && pos + 1 < input_size) { output[(*output_size)++] = input[++pos]; } else if (input[pos] > 0 && input[pos] <= MAX_WINDOW_SIZE && pos + 1 < input_size) { match.position = input[pos] - 1; match.length = (input[pos + 1] >> 4) + MIN_MATCH_LENGTH; len = match.length; for (i = 0, j = match.position; i < len; i++, j++) { if (j >= *output_size) { break; } match.next = output[j]; output[(*output_size)++] = match.next; window[i] = match.next; } pos += 2; } } free(window); } int main() { unsigned char input[] = "Hello, world! This is a test string."; int input_size = sizeof(input) - 1; unsigned char compressed[1024], decompressed[1024]; int compressed_size, decompressed_size; compress(input, input_size, compressed, &compressed_size); decompress(compressed, compressed_size, decompressed, &decompressed_size); printf("Original: %s\n", input); printf("Compressed: "); for (int i = 0; i < compressed_size; i++) { printf("%02X ", compressed[i]); } printf("\n"); printf("Decompressed: %s\n", decompressed); return 0; } ``` 该示例代码实现了 LZ77 压缩和解,其中 `compress` 函数接受输入数据和输入数据大小,并输出压缩后的数据和压缩后的数据大小;`decompress` 函数接受压缩后的数据和压缩后的数据大小,并输出解后的数据和解后的数据大小。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 8
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值