LZW编码实验

最新推荐文章于 2022-07-06 00:48:01 发布

m0_52105369

最新推荐文章于 2022-07-06 00:48:01 发布

阅读量150

点赞数

文章标签： c语言

本文链接：https://blog.csdn.net/m0_52105369/article/details/125450661

版权

LZW的编码原理

LZW编码原理

LZW编码的思想是从读入文件的字符流中不断提取新的字符串（看作词条），同时将“词条”与“代号”（即码字）项对应，便可以通过码字流代替字符流，达到压缩的目的。在编码的同时，会生成“词典”，词典中存储每个码字与字符之间的对应关系，编码器通过管理词典来完成输入与输出之间的转换。LZW编码器的输入是字符流，输出是码字流。
LZW编码步骤

步骤1：将词典初始化为包含所有可能的单字符，当前前缀P初始化为空。
步骤2：当前字符C=字符流中的下一个字符。
步骤3：判断P＋C是否在词典中
- （1）如果“是”，则用C扩展P，即让P=P＋C，返回到步骤2。
- （2）如果“否”，则输出与当前前缀P相对应的码字W；将P＋C添加到词典中；令P=C，并返回到步骤2

LZW解码步骤

步骤1：在开始译码时词典包含所有可能的前缀根
步骤2：令CW：=码字流中的第一个码字。
步骤3：输出当前缀-符串string.CW到码字流。
步骤4：先前码字PW：=当前码字CW。
步骤5：当前码字CW：=码字流的下一个码字。
步骤6：判断当前缀-符串string.CW 是否在词典中。
- （1）如果”是”，则把当前缀-符串string.CW输出到字符流。当前前缀P：=先前缀-符串string.PW。当前字符C：=当前前缀-符串string.CW的第一个字符。把缀-符串P+C添加到词典。
- （2）如果”否”，则当前前缀P：=先前缀-符串string.PW。当前字符C：=当前缀-符串string.CW的第一个字符。输出缀-符串P+C到字符流,然后把它添加到词典中。
步骤7：判断码字流中是否还有码字要译。
- （1）如果”是”，就返回步骤4。
- （2）如果”否”，结束。

LZW实验

编写解码器

解码器代码

void LZWDecode(BITFILE* bf, FILE* fp) {
    //需填充
    int character;
    int new_code, last_code;
    int phrase_length;
    unsigned long file_length;
    file_length = BitsInput(bf, 4 * 8);
    if (-1 == file_length) file_length = 0;
    InitDictionary();//初始化词典
    last_code = -1;
    while (0 < file_length) {
        new_code = input(bf); //读入字符流
        if (new_code >= next_code) { // this is the case CSCSC( not in dict)
            d_stack[0] = character; //将上一个字符串的第一个字符对应码字存入栈底
            phrase_length = DecodeString(1, last_code);
        }
        else {
            phrase_length = DecodeString(0, new_code);
        }
        character = d_stack[phrase_length - 1]; //提取出cw/pw第一个字符（由于是倒序存储，因此读出栈顶元素）
        //将对应码字流按序写入文件
        while (0 < phrase_length) {
            phrase_length--;
            fputc(d_stack[phrase_length], fp);
            file_length--;
        }
        if (MAX_CODE > next_code) {// add the new phrase to dictionary
            AddToDictionary(character, last_code);
        }
        last_code = new_code;
    }

}

DecodeString

int DecodeString(int start, int code) {
    //需填充
    /*
    作用：将code码字对应的字符串的码字从start开始逐个存入d_stack中（倒序，abc->cba），并返回字符串长度
    */
    int count = start;
    while (code >= 0)
    {
        d_stack[count] = dictionary[code].suffix;
        code = dictionary[code].parent;
        count++;
    }
    return count;
}

解码时遇到字典中不存在的码字时

当解码时遇到字典中不存在的编码时，栈底d_stack[0]存入上一个字符串的第一个字符对应码字character，遍历上一个码字所在树将上一个码字对应字符串第二个字符往后的每个字符的对应码字倒序存入栈中。根据当前码字对应字符串长度+1从栈中取出码字写入码字流。并将character+last_code写入词典。

LZW编码对不同格式文件类型的压缩效率

文件格式	压缩前文件大小	压缩后文件大小	压缩率
.docx	25	39	156%
.mp4	891	1104	124%
.pdf	289	265	91.7%
.txt	20	15	75%
.yuv	733	116	15.8%
.jpg	25	41	164%
.png	33	50	152%
.wav	305	113	37%
.ts	1045	1259	120%
.html	301	158	52.5%

可以看到在本次测试的文件中，.yuv、.wav、.txt、.html文件压缩的效果比较好，.pdf文件也能够被一定程度的压缩，但其他的文件格式在压缩后都出现了不降反增的现象，说明LZW对于这些文件格式并不太适用。

实验完整代码

数据结构

struct {
    int suffix;
    int parent, firstchild, nextsibling;
} dictionary[MAX_CODE + 1];

工具函数
- 初始化字典

//解码端
void InitDictionary(void) {
    int i;

    for (i = 0; i < 256; i++) {
        dictionary[i].suffix = i;
        dictionary[i].parent = -1;
        dictionary[i].firstchild = -1;
        dictionary[i].nextsibling = i + 1;
    }
    dictionary[255].nextsibling = -1;
    next_code = 256;
}
//编码端
int InDictionary(int character, int string_code) {
    int sibling;
    if (0 > string_code) return character;
    sibling = dictionary[string_code].firstchild;
    while (-1 < sibling) {
        if (character == dictionary[sibling].suffix) return sibling;
        sibling = dictionary[sibling].nextsibling;
    }
    return -1;
}

添加词条

void AddToDictionary(int character, int string_code) {
    int firstsibling, nextsibling;
    if (0 > string_code) return;
    dictionary[next_code].suffix = character;
    dictionary[next_code].parent = string_code;
    dictionary[next_code].nextsibling = -1;
    dictionary[next_code].firstchild = -1;
    firstsibling = dictionary[string_code].firstchild;
    if (-1 < firstsibling) {    // the parent has child
        nextsibling = firstsibling;
        while (-1 < dictionary[nextsibling].nextsibling)
            nextsibling = dictionary[nextsibling].nextsibling;
        dictionary[nextsibling].nextsibling = next_code;
    }
    else {// no child before, modify it to be the first
        dictionary[string_code].firstchild = next_code;
    }
    next_code++;
}

字符串解码

int DecodeString(int start, int code) {
    //需填充
    /*
    作用：将code码字对应的字符串的码字从start开始逐个存入d_stack中（倒序，abc->cba），并返回字符串长度
    */
    int count = start;
    while (code >= 0)
    {
        d_stack[count] = dictionary[code].suffix;
        code = dictionary[code].parent;
        count++;
    }
    return count;
}

编解码器

void LZWEncode(FILE* fp, BITFILE* bf) {
    int character;
    int string_code;
    int index;
    unsigned long file_length;

    fseek(fp, 0, SEEK_END);
    file_length = ftell(fp);
    fseek(fp, 0, SEEK_SET);
    BitsOutput(bf, file_length, 4 * 8);
    InitDictionary();
    string_code = -1;
    while (EOF != (character = fgetc(fp))) {
        index = InDictionary(character, string_code);
        if (0 <= index) {   // string+character in dictionary
            string_code = index;
        }
        else {  // string+character not in dictionary
            output(bf, string_code);
            if (MAX_CODE > next_code) { // free space in dictionary
                // add string+character to dictionary
                AddToDictionary(character, string_code);
            }
            string_code = character;
        }
    }
    output(bf, string_code);
}

void LZWDecode(BITFILE* bf, FILE* fp) {
    int character;
    int new_code, last_code;
    int phrase_length;
    unsigned long file_length;
    file_length = BitsInput(bf, 4 * 8);
    if (-1 == file_length) file_length = 0;
    InitDictionary();
    last_code = -1;
    while (0 < file_length) {
        new_code = input(bf);
        if (new_code >= next_code) { // this is the case CSCSC( not in dict)
            d_stack[0] = character;
            phrase_length = DecodeString(1, last_code);
        }
        else {
            phrase_length = DecodeString(0, new_code);
        }
        character = d_stack[phrase_length - 1]; //提取出cw/pw第一个字符（由于是倒序存储，因此读出最后一个）
        while (0 < phrase_length) {
            phrase_length--;
            fputc(d_stack[phrase_length], fp);
            file_length--;
        }
        if (MAX_CODE > next_code) {// add the new phrase to dictionary
            AddToDictionary(character, last_code);
        }
        last_code = new_code;
    }

}

bitio.c


#pragma warning(disable:4996)
/*
 * Definitions for bitwise IO
 *
 * vim: ts=4 sw=4 cindent
 */

#include <stdlib.h>
#include <stdio.h>
#include "bitio.h"
BITFILE* OpenBitFileInput(char* filename) {
	BITFILE* bf;
	bf = (BITFILE*)malloc(sizeof(BITFILE));
	if (NULL == bf) return NULL;
	if (NULL == filename)	bf->fp = stdin;
	else bf->fp = fopen(filename, "rb");
	if (NULL == bf->fp) return NULL;
	bf->mask = 0x80;
	bf->rack = 0;
	return bf;
}

BITFILE* OpenBitFileOutput(char* filename) {
	BITFILE* bf;
	bf = (BITFILE*)malloc(sizeof(BITFILE));
	if (NULL == bf) return NULL;
	if (NULL == filename)	bf->fp = stdout;
	else bf->fp = fopen(filename, "wb");
	if (NULL == bf->fp) return NULL;
	bf->mask = 0x80;
	bf->rack = 0;
	return bf;
}

void CloseBitFileInput(BITFILE* bf) {
	fclose(bf->fp);
	free(bf);
}

void CloseBitFileOutput(BITFILE* bf) {
	// Output the remaining bits
	if (0x80 != bf->mask) fputc(bf->rack, bf->fp);
	fclose(bf->fp);
	free(bf);
}

int BitInput(BITFILE* bf) {
	int value;

	if (0x80 == bf->mask) {
		bf->rack = fgetc(bf->fp);
		if (EOF == bf->rack) {
			fprintf(stderr, "Read after the end of file reached\n");
			exit(-1);
		}
	}
	value = bf->mask & bf->rack;
	bf->mask >>= 1;
	if (0 == bf->mask) bf->mask = 0x80;
	return((0 == value) ? 0 : 1);
}

unsigned long BitsInput(BITFILE* bf, int count) {
	unsigned long mask;
	unsigned long value;
	mask = 1L << (count - 1);
	value = 0L;
	while (0 != mask) {
		if (1 == BitInput(bf))
			value |= mask;
		mask >>= 1;
	}
	return value;
}

void BitOutput(BITFILE* bf, int bit) {
	if (0 != bit) bf->rack |= bf->mask;
	bf->mask >>= 1;
	if (0 == bf->mask) {	// eight bits in rack
		fputc(bf->rack, bf->fp);
		bf->rack = 0;
		bf->mask = 0x80;
	}
}

void BitsOutput(BITFILE* bf, unsigned long code, int count) {
	unsigned long mask;

	mask = 1L << (count - 1);
	while (0 != mask) {
		BitOutput(bf, (int)(0 == (code & mask) ? 0 : 1));
		mask >>= 1;
	}
}
#if 0
int main(int argc, char** argv) {
	BITFILE* bfi, * bfo;
	int bit;
	int count = 0;

	if (1 < argc) {
		if (NULL == OpenBitFileInput(bfi, argv[1])) {
			fprintf(stderr, "fail open the file\n");
			return -1;
		}
	}
	else {
		if (NULL == OpenBitFileInput(bfi, NULL)) {
			fprintf(stderr, "fail open stdin\n");
			return -2;
		}
	}
	if (2 < argc) {
		if (NULL == OpenBitFileOutput(bfo, argv[2])) {
			fprintf(stderr, "fail open file for output\n");
			return -3;
		}
	}
	else {
		if (NULL == OpenBitFileOutput(bfo, NULL)) {
			fprintf(stderr, "fail open stdout\n");
			return -4;
		}
	}
	while (1) {
		bit = BitInput(bfi);
		fprintf(stderr, "%d", bit);
		count++;
		if (0 == (count & 7))fprintf(stderr, " ");
		BitOutput(bfo, bit);
	}
	return 0;
}
#endif

bitio.h


/*
 * Declaration for bitwise IO
 *
 * vim: ts=4 sw=4 cindent
 */
#ifndef __BITIO__
#define __BITIO__

#include <stdio.h>

typedef struct {
	FILE* fp;
	unsigned char mask;
	int rack;
}BITFILE;

BITFILE* OpenBitFileInput(char* filename);
BITFILE* OpenBitFileOutput(char* filename);
void CloseBitFileInput(BITFILE* bf);
void CloseBitFileOutput(BITFILE* bf);
int BitInput(BITFILE* bf);
unsigned long BitsInput(BITFILE* bf, int count);
void BitOutput(BITFILE* bf, int bit);
void BitsOutput(BITFILE* bf, unsigned long code, int count);
#endif	// __BITIO__