编程实现LZW编解码

最新推荐文章于 2022-07-10 23:09:56 发布

cxrabc

最新推荐文章于 2022-07-10 23:09:56 发布

阅读量420

点赞数

分类专栏：数据压缩课程实验

本文链接：https://blog.csdn.net/cxrabc/article/details/115576885

版权

数据压缩课程实验专栏收录该内容

12 篇文章 0 订阅

订阅专栏

这里写目录标题

一、实验目的
二、实验思路
三、代码部分
四、结果分析
- LZW编码的应用

一、实验目的

首先调试LZW的编码程序，以一个文本文件作为输入，得到输出的LZW编码文件。
以实验步骤一得到的编码文件作为输入，编写LZW的解码程序。在写解码程序时需要对关键语句加上注释，并说明进行何操作。重点说明当前码字在词典中不存在时应如何处理并解释原因。
选择至少十种不同格式类型的文件，使用LZW编码器进行压缩得到输出的压缩比特流文件。对各种不同格式的文件进行压缩效率的分析。

二、实验思路

1. 编码

建立一个字符串表，把每一个第一次出现的字符串放入串表中，并用一个数字来表示。如果这个字符串再次出现时，即可用表示它的数字来代替。

在这里插入图片描述

2. 解码

由于解码会比编码晚一步，所以在解码的过程中，可能会出现一个没有存储到字典里的码字。

解决方法如图所示：

在这里插入图片描述

3. 数据结构分析

在这里插入图片描述

尾缀字符（suffix）
母节点（parent）
第一个孩子节点( firstchild )
下一个兄弟节点（nextsibling）
树用数组dict[ ]表示，数组下标用pointer表示，所以dict[pointer]表示一个节点
dict[pointer].suffix
dict[pointer].parent
dict[pointer].firstchild
dict[pointer].nextsibling

三、代码部分

1. bitio.h

/*
 * Declaration for bitwise IO
 *
 * vim: ts=4 sw=4 cindent
 */
#ifndef __BITIO__
#define __BITIO__

#include <stdio.h>

typedef struct {
	FILE* fp;
	unsigned char mask;
	int rack;
}BITFILE;

BITFILE* OpenBitFileInput(char* filename);
BITFILE* OpenBitFileOutput(char* filename);
void CloseBitFileInput(BITFILE* bf);
void CloseBitFileOutput(BITFILE* bf);
int BitInput(BITFILE* bf);
unsigned long BitsInput(BITFILE* bf, int count);
void BitOutput(BITFILE* bf, int bit);
void BitsOutput(BITFILE* bf, unsigned long code, int count);
#endif	// __BITIO__

2. bitio.c

/*
 * Definitions for bitwise IO
 *
 * vim: ts=4 sw=4 cindent
 */

#include <stdlib.h>
#include <stdio.h>
#include "bitio.h"
BITFILE* OpenBitFileInput(char* filename) {
	BITFILE* bf;
	bf = (BITFILE*)malloc(sizeof(BITFILE));
	if (NULL == bf) return NULL;
	if (NULL == filename)	bf->fp = stdin;
	else bf->fp = fopen(filename, "rb");
	if (NULL == bf->fp) return NULL;
	bf->mask = 0x80;
	bf->rack = 0;
	return bf;
}

BITFILE* OpenBitFileOutput(char* filename) {
	BITFILE* bf;
	bf = (BITFILE*)malloc(sizeof(BITFILE));
	if (NULL == bf) return NULL;
	if (NULL == filename)	bf->fp = stdout;
	else bf->fp = fopen(filename, "wb");
	if (NULL == bf->fp) return NULL;
	bf->mask = 0x80;
	bf->rack = 0;
	return bf;
}

void CloseBitFileInput(BITFILE* bf) {
	fclose(bf->fp);
	free(bf);
}

void CloseBitFileOutput(BITFILE* bf) {
	// Output the remaining bits
	if (0x80 != bf->mask) fputc(bf->rack, bf->fp);
	fclose(bf->fp);
	free(bf);
}

int BitInput(BITFILE* bf) {
	int value;

	if (0x80 == bf->mask) {
		bf->rack = fgetc(bf->fp);
		if (EOF == bf->rack) {
			fprintf(stderr, "Read after the end of file reached\n");
			exit(-1);
		}
	}
	value = bf->mask & bf->rack;
	bf->mask >>= 1;
	if (0 == bf->mask) bf->mask = 0x80;
	return((0 == value) ? 0 : 1);
}

unsigned long BitsInput(BITFILE* bf, int count) {
	unsigned long mask;
	unsigned long value;
	mask = 1L << (count - 1);
	value = 0L;
	while (0 != mask) {
		if (1 == BitInput(bf))
			value |= mask;
		mask >>= 1;
	}
	return value;
}

void BitOutput(BITFILE* bf, int bit) {
	if (0 != bit) bf->rack |= bf->mask;
	bf->mask >>= 1;
	if (0 == bf->mask) {	// eight bits in rack
		fputc(bf->rack, bf->fp);
		bf->rack = 0;
		bf->mask = 0x80;
	}
}

void BitsOutput(BITFILE* bf, unsigned long code, int count) {
	unsigned long mask;

	mask = 1L << (count - 1);
	while (0 != mask) {
		BitOutput(bf, (int)(0 == (code & mask) ? 0 : 1));
		mask >>= 1;
	}
}
#if 0
int main(int argc, char** argv) {
	BITFILE* bfi, * bfo;
	int bit;
	int count = 0;

	if (1 < argc) {
		if (NULL == OpenBitFileInput(bfi, argv[1])) {
			fprintf(stderr, "fail open the file\n");
			return -1;
		}
	}
	else {
		if (NULL == OpenBitFileInput(bfi, NULL)) {
			fprintf(stderr, "fail open stdin\n");
			return -2;
		}
	}
	if (2 < argc) {
		if (NULL == OpenBitFileOutput(bfo, argv[2])) {
			fprintf(stderr, "fail open file for output\n");
			return -3;
		}
	}
	else {
		if (NULL == OpenBitFileOutput(bfo, NULL)) {
			fprintf(stderr, "fail open stdout\n");
			return -4;
		}
	}
	while (1) {
		bit = BitInput(bfi);
		fprintf(stderr, "%d", bit);
		count++;
		if (0 == (count & 7))fprintf(stderr, " ");
		BitOutput(bfo, bit);
	}
	return 0;
}
#endif

3. main.c

/*
 * Definition for LZW coding
 *
 * vim: ts=4 sw=4 cindent nowrap
 */
#include <stdlib.h>
#include <stdio.h>
#include "bitio.h"
#define MAX_CODE 65535

struct {
	int suffix;
	int parent, firstchild, nextsibling;
} dictionary[MAX_CODE + 1];
int next_code;
int d_stack[MAX_CODE]; // stack for decoding a phrase

#define input(f) ((int)BitsInput( f, 16))
#define output(f, x) BitsOutput( f, (unsigned long)(x), 16)

int DecodeString(int start, int code);
void InitDictionary(void);
void PrintDictionary(void) {
	int n;
	int count;
	for (n = 256; n < next_code; n++) {
		count = DecodeString(0, n);
		printf("%4d->", n);
		while (0 < count--) printf("%c", (char)(d_stack[count]));
		printf("\n");
	}
}

int DecodeString(int start, int code) {
	int count;
	count = start;
	while (0 <= code) {
		d_stack[count] = dictionary[code].suffix;
		code = dictionary[code].parent;
		count++;
	}
	return count;
}
void InitDictionary(void) {
	int i;

	for (i = 0; i < 256; i++) {
		dictionary[i].suffix = i;//尾缀字符
		dictionary[i].parent = -1;//母节点
		dictionary[i].firstchild = -1;//第一个孩子节点
		dictionary[i].nextsibling = i + 1;//下一个兄弟节点
	}
	dictionary[255].nextsibling = -1;//第255个ASCII字符没有兄弟节点
	next_code = 256;
}
/*
 * Input: string represented by string_code in dictionary,
 * Output: the index of character+string in the dictionary
 * 		index = -1 if not found
 */
int InDictionary(int character, int string_code) {
	int sibling;
	if (0 > string_code) return character;
	sibling = dictionary[string_code].firstchild;
	while (-1 < sibling) {
		if (character == dictionary[sibling].suffix) return sibling;
		sibling = dictionary[sibling].nextsibling;
	}
	return -1;
}

//将一个新组成的字符串加入字典
void AddToDictionary(int character, int string_code) {
	int firstsibling, nextsibling;
	if (0 > string_code) return;
	dictionary[next_code].suffix = character;
	dictionary[next_code].parent = string_code;
	dictionary[next_code].nextsibling = -1;
	dictionary[next_code].firstchild = -1;
	firstsibling = dictionary[string_code].firstchild;
	if (-1 < firstsibling) {	// the parent has child
		nextsibling = firstsibling;
		while (-1 < dictionary[nextsibling].nextsibling)
			nextsibling = dictionary[nextsibling].nextsibling;
		dictionary[nextsibling].nextsibling = next_code;
	}
	else {// no child before, modify it to be the first
		dictionary[string_code].firstchild = next_code;
	}
	next_code++;
}

//LZW编码
void LZWEncode(FILE* fp, BITFILE* bf) {
	int character;
	int string_code;
	int index;
	unsigned long file_length;

	fseek(fp, 0, SEEK_END);
	file_length = ftell(fp);
	fseek(fp, 0, SEEK_SET);
	BitsOutput(bf, file_length, 4 * 8);
	InitDictionary();
	string_code = -1;
	while (EOF != (character = fgetc(fp))) {
		index = InDictionary(character, string_code);
		if (0 <= index) {	// string+character in dictionary
			string_code = index;
		}
		else {	// string+character not in dictionary
			output(bf, string_code);
			if (MAX_CODE > next_code) {	// free space in dictionary
				// add string+character to dictionary
				AddToDictionary(character, string_code);
			}
			string_code = character;
		}
	}
	output(bf, string_code);
}

//LZW解码
void LZWDecode(BITFILE* bf, FILE* fp)
{
	int character;
	int new_code, last_code;
	int phrase_length;
	unsigned long file_length;

	file_length = BitsInput(bf, 4 * 8);
	if (-1 == file_length) file_length = 0;
	InitDictionary();//初始化词典
	last_code = -1;
	while (0 < file_length)
	{
		new_code = input(bf);
		if (new_code >= next_code)//如果读入的不在字典里
		{ // this is the case CSCSC( not in dict)
			d_stack[0] = character;
			phrase_length = DecodeString(1, last_code);
		}
		else//如果在字典里
		{
			phrase_length = DecodeString(0, new_code);
		}
		character = d_stack[phrase_length - 1];// 更新下一个字符为当前字符串首字符
		while (0 < phrase_length)//输出字符串
		{
			phrase_length--;
			fputc(d_stack[phrase_length], fp);
			file_length--;
		}
		if (MAX_CODE > next_code) // 当字典还有空间时
		{	// add the new phrase to dictionary
			AddToDictionary(character, last_code);
		}
		last_code = new_code;
	}
}



int main(int argc, char** argv)
{
	FILE* fp;
	BITFILE* bf;

	if (4 > argc) {
		fprintf(stdout, "usage: \n%s <o> <ifile> <ofile>\n", argv[0]);
		fprintf(stdout, "\t<o>: E or D reffers encode or decode\n");
		fprintf(stdout, "\t<ifile>: input file name\n");
		fprintf(stdout, "\t<ofile>: output file name\n");
		return -1;
	}
	if ('E' == argv[1][0]) { // do encoding
		fp = fopen(argv[2], "rb");
		bf = OpenBitFileOutput(argv[3]);
		if (NULL != fp && NULL != bf) {
			LZWEncode(fp, bf);
			fclose(fp);
			CloseBitFileOutput(bf);
			fprintf(stdout, "encoding done\n");
		}
	}
	else if ('D' == argv[1][0]) {	// do decoding
		bf = OpenBitFileInput(argv[2]);
		fp = fopen(argv[3], "wb");
		if (NULL != fp && NULL != bf) {
			LZWDecode(bf, fp);
			fclose(fp);
			CloseBitFileInput(bf);
			fprintf(stdout, "decoding done\n");
		}
	}
	else {	// otherwise
		fprintf(stderr, "not supported operation\n");
	}
	return 0;
}

四、结果分析

尾号为0的是原文件，尾号为1的是编码后的文件，尾号为2的是解码后的文件。
在这里插入图片描述

文件格式	原文件/KB	编码后/KB	压缩效率/ %
txt	29字节	60字节	206.9
bmp	762	198	26.0
jpg	29	43	148.3
gif	131	166	126.7
mp3	1881	2293	121.9
mp4	11932	8435	70.7
mkv	1248	1427	114.3
avi	3136	3806	121.4
mpg	6176	7465	120.9
yuv	33750	15446	45.8

在使用LZW编码后，除少部分文件有明显的压缩效果外，许多文件的大小反而增大了。经过理论分析，如果数据重复的词组不多，那么经过LZW编码后的文件可能会增大。

LZW编码的应用

通用文件压缩（WinZip）、动画图像压缩（GIF，TIFF）、电子邮件压缩、PDF文档压缩、雷达数据压缩等。

cxrabc

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
编程实现LZW编解码

这里写目录标题一、实验目的二、实验思路1. 编码2. 解码3. 数据结构分析三、代码部分1. bitio.h2. bitio.c3. main.c四、结果分析LZW编码的应用一、实验目的首先调试LZW的编码程序，以一个文本文件作为输入，得到输出的LZW编码文件。以实验步骤一得到的编码文件作为输入，编写LZW的解码程序。在写解码程序时需要对关键语句加上注释，并说明进行何操作。重点说明当前码字在词典中不存在时应如何处理并解释原因。选择至少十种不同格式类型的文件，使用LZW编码器进行压缩得到输出的压缩比
复制链接

扫一扫